Message ID | 1394811272-1547-5-git-send-email-Liviu.Dudau@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Mar 14, 2014 at 03:34:30PM +0000, Liviu Dudau wrote: > Make it easier to discover the domain number of a bus by storing > the number in pci_host_bridge for the root bus. Several architectures > have their own way of storing this information, so it makes sense > to try to unify the code. I like the idea of unifying the way we handle the domain number. But I'd like to see more of the strategy before committing to this approach. This patch adds struct pci_host_bridge.domain_nr, but of course pci_domain_nr() doesn't use it. It can't today, because pci_create_root_bus() relies on pci_domain_nr() to fill in pci_host_bridge.domain_nr. But I suppose the intent is that someday we can make pci_domain_nr() arch-independent somehow. I'd just like to see more of the path to that happening. > While at this, add a new function that > creates a root bus in a given domain and make pci_create_root_bus() > a wrapper around this function. I'm a little concerned about adding a new "create root bus" interface, partly because we have quite a few already, and I'd like to reduce the number of them instead of adding more. And I think there might be other similar opportunities for unification, so I could easily see us adding new functions in the future to unify NUMA node info, ECAM info, etc. I wonder if we need some sort of descriptor structure that the arch could fill in and pass to the PCI core. Then we could add new members without having to create new interfaces each time. > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com> > Tested-by: Tanmay Inamdar <tinamdar@apm.com> > --- > drivers/pci/probe.c | 41 +++++++++++++++++++++++++++++++++-------- > include/linux/pci.h | 4 ++++ > 2 files changed, 37 insertions(+), 8 deletions(-) > > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c > index fd11c12..172c615 100644 > --- a/drivers/pci/probe.c > +++ b/drivers/pci/probe.c > @@ -1714,8 +1714,9 @@ void __weak pcibios_remove_bus(struct pci_bus *bus) > { > } > > -struct pci_bus *pci_create_root_bus(struct device *parent, int bus, > - struct pci_ops *ops, void *sysdata, struct list_head *resources) > +struct pci_bus *pci_create_root_bus_in_domain(struct device *parent, > + int domain, int bus, struct pci_ops *ops, void *sysdata, > + struct list_head *resources) > { > int error; > struct pci_host_bridge *bridge; > @@ -1728,30 +1729,34 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, > > bridge = pci_alloc_host_bridge(); > if (!bridge) > - return NULL; > + return ERR_PTR(-ENOMEM); > > bridge->dev.parent = parent; > bridge->dev.release = pci_release_host_bridge_dev; > + bridge->domain_nr = domain; > error = pcibios_root_bridge_prepare(bridge); > if (error) > goto err_out; > > b = pci_alloc_bus(); > - if (!b) > + if (!b) { > + error = -ENOMEM; > goto err_out; > + } > > b->sysdata = sysdata; > b->ops = ops; > b->number = b->busn_res.start = bus; > - b2 = pci_find_bus(pci_domain_nr(b), bus); > + b2 = pci_find_bus(bridge->domain_nr, bus); > if (b2) { > /* If we already got to this bus through a different bridge, ignore it */ > dev_dbg(&b2->dev, "bus already known\n"); > + error = -EEXIST; > goto err_bus_out; > } > > bridge->bus = b; > - dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(b), bus); > + dev_set_name(&bridge->dev, "pci%04x:%02x", bridge->domain_nr, bus); > error = device_register(&bridge->dev); > if (error) { > put_device(&bridge->dev); > @@ -1766,7 +1771,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, > > b->dev.class = &pcibus_class; > b->dev.parent = b->bridge; > - dev_set_name(&b->dev, "%04x:%02x", pci_domain_nr(b), bus); > + dev_set_name(&b->dev, "%04x:%02x", bridge->domain_nr, bus); > error = device_register(&b->dev); > if (error) > goto class_dev_reg_err; > @@ -1816,7 +1821,27 @@ err_bus_out: > kfree(b); > err_out: > kfree(bridge); > - return NULL; > + return ERR_PTR(error); > +} > + > +struct pci_bus *pci_create_root_bus(struct device *parent, int bus, > + struct pci_ops *ops, void *sysdata, struct list_head *resources) > +{ > + int domain_nr; > + struct pci_bus *b = pci_alloc_bus(); > + if (!b) > + return NULL; > + > + b->sysdata = sysdata; > + domain_nr = pci_domain_nr(b); > + kfree(b); > + > + b = pci_create_root_bus_in_domain(parent, domain_nr, bus, > + ops, sysdata, resources); > + if (IS_ERR(b)) > + return NULL; > + > + return b; > } > > int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int bus_max) > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 33aa2ca..1eed009 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -394,6 +394,7 @@ struct pci_host_bridge_window { > struct pci_host_bridge { > struct device dev; > struct pci_bus *bus; /* root bus */ > + int domain_nr; > struct list_head windows; /* pci_host_bridge_windows */ > void (*release_fn)(struct pci_host_bridge *); > void *release_data; > @@ -747,6 +748,9 @@ struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata); > struct pci_bus *pci_create_root_bus(struct device *parent, int bus, > struct pci_ops *ops, void *sysdata, > struct list_head *resources); > +struct pci_bus *pci_create_root_bus_in_domain(struct device *parent, > + int domain, int bus, struct pci_ops *ops, > + void *sysdata, struct list_head *resources); > int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax); > int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax); > void pci_bus_release_busn_res(struct pci_bus *b); > -- > 1.9.0 >
On Sat, Apr 05, 2014 at 01:00:07AM +0100, Bjorn Helgaas wrote: > On Fri, Mar 14, 2014 at 03:34:30PM +0000, Liviu Dudau wrote: > > Make it easier to discover the domain number of a bus by storing > > the number in pci_host_bridge for the root bus. Several architectures > > have their own way of storing this information, so it makes sense > > to try to unify the code. > > I like the idea of unifying the way we handle the domain number. But > I'd like to see more of the strategy before committing to this approach. *My* strategy is to get rid of pci_domain_nr(). I don't see why we need to have arch specific way of providing the number, specially after looking at the existing implementations that return a value from a variable that is never touched or incremented. My guess is that pci_domain_nr() was created to work around the fact that there was no domain_nr maintainance in the generic code. > > This patch adds struct pci_host_bridge.domain_nr, but of course > pci_domain_nr() doesn't use it. It can't today, because > pci_create_root_bus() relies on pci_domain_nr() to fill in > pci_host_bridge.domain_nr. > > But I suppose the intent is that someday we can make pci_domain_nr() > arch-independent somehow. I'd just like to see more of the path to > that happening. The path would be to send a patch that removes all existing pci_domain_nr() macros/inline functions and rely on the generic function. > > > While at this, add a new function that > > creates a root bus in a given domain and make pci_create_root_bus() > > a wrapper around this function. > > I'm a little concerned about adding a new "create root bus" interface, > partly because we have quite a few already, and I'd like to reduce the > number of them instead of adding more. And I think there might be other > similar opportunities for unification, so I could easily see us adding new > functions in the future to unify NUMA node info, ECAM info, etc. The reason for creating the wrapper function was to allow for explicit passing of domain_nr. If we find architectures where generic allocation of domain_nr doesn't work for them, we can make them use this wrapper to pass the domain_nr into the host bridge when creating the root bus. > > I wonder if we need some sort of descriptor structure that the arch could > fill in and pass to the PCI core. Then we could add new members without > having to create new interfaces each time. I'm trying to reduce the number of variables being passed between architectures and generic code. host_bridge (with the associated root bus), domain_nr those are needed. Is there anything else that you have in your mind that needs to be shared? My approach would be in sharing of the data: PCI is a standard, and the core framework implements it. What is so broken in your architecture that you need to work around the core code? And I'm not talking about drivers and quirks, but architectural level support. Best regards, Liviu > > > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com> > > Tested-by: Tanmay Inamdar <tinamdar@apm.com> > > --- > > drivers/pci/probe.c | 41 +++++++++++++++++++++++++++++++++-------- > > include/linux/pci.h | 4 ++++ > > 2 files changed, 37 insertions(+), 8 deletions(-) > > > > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c > > index fd11c12..172c615 100644 > > --- a/drivers/pci/probe.c > > +++ b/drivers/pci/probe.c > > @@ -1714,8 +1714,9 @@ void __weak pcibios_remove_bus(struct pci_bus *bus) > > { > > } > > > > -struct pci_bus *pci_create_root_bus(struct device *parent, int bus, > > - struct pci_ops *ops, void *sysdata, struct list_head *resources) > > +struct pci_bus *pci_create_root_bus_in_domain(struct device *parent, > > + int domain, int bus, struct pci_ops *ops, void *sysdata, > > + struct list_head *resources) > > { > > int error; > > struct pci_host_bridge *bridge; > > @@ -1728,30 +1729,34 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, > > > > bridge = pci_alloc_host_bridge(); > > if (!bridge) > > - return NULL; > > + return ERR_PTR(-ENOMEM); > > > > bridge->dev.parent = parent; > > bridge->dev.release = pci_release_host_bridge_dev; > > + bridge->domain_nr = domain; > > error = pcibios_root_bridge_prepare(bridge); > > if (error) > > goto err_out; > > > > b = pci_alloc_bus(); > > - if (!b) > > + if (!b) { > > + error = -ENOMEM; > > goto err_out; > > + } > > > > b->sysdata = sysdata; > > b->ops = ops; > > b->number = b->busn_res.start = bus; > > - b2 = pci_find_bus(pci_domain_nr(b), bus); > > + b2 = pci_find_bus(bridge->domain_nr, bus); > > if (b2) { > > /* If we already got to this bus through a different bridge, ignore it */ > > dev_dbg(&b2->dev, "bus already known\n"); > > + error = -EEXIST; > > goto err_bus_out; > > } > > > > bridge->bus = b; > > - dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(b), bus); > > + dev_set_name(&bridge->dev, "pci%04x:%02x", bridge->domain_nr, bus); > > error = device_register(&bridge->dev); > > if (error) { > > put_device(&bridge->dev); > > @@ -1766,7 +1771,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, > > > > b->dev.class = &pcibus_class; > > b->dev.parent = b->bridge; > > - dev_set_name(&b->dev, "%04x:%02x", pci_domain_nr(b), bus); > > + dev_set_name(&b->dev, "%04x:%02x", bridge->domain_nr, bus); > > error = device_register(&b->dev); > > if (error) > > goto class_dev_reg_err; > > @@ -1816,7 +1821,27 @@ err_bus_out: > > kfree(b); > > err_out: > > kfree(bridge); > > - return NULL; > > + return ERR_PTR(error); > > +} > > + > > +struct pci_bus *pci_create_root_bus(struct device *parent, int bus, > > + struct pci_ops *ops, void *sysdata, struct list_head *resources) > > +{ > > + int domain_nr; > > + struct pci_bus *b = pci_alloc_bus(); > > + if (!b) > > + return NULL; > > + > > + b->sysdata = sysdata; > > + domain_nr = pci_domain_nr(b); > > + kfree(b); > > + > > + b = pci_create_root_bus_in_domain(parent, domain_nr, bus, > > + ops, sysdata, resources); > > + if (IS_ERR(b)) > > + return NULL; > > + > > + return b; > > } > > > > int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int bus_max) > > diff --git a/include/linux/pci.h b/include/linux/pci.h > > index 33aa2ca..1eed009 100644 > > --- a/include/linux/pci.h > > +++ b/include/linux/pci.h > > @@ -394,6 +394,7 @@ struct pci_host_bridge_window { > > struct pci_host_bridge { > > struct device dev; > > struct pci_bus *bus; /* root bus */ > > + int domain_nr; > > struct list_head windows; /* pci_host_bridge_windows */ > > void (*release_fn)(struct pci_host_bridge *); > > void *release_data; > > @@ -747,6 +748,9 @@ struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata); > > struct pci_bus *pci_create_root_bus(struct device *parent, int bus, > > struct pci_ops *ops, void *sysdata, > > struct list_head *resources); > > +struct pci_bus *pci_create_root_bus_in_domain(struct device *parent, > > + int domain, int bus, struct pci_ops *ops, > > + void *sysdata, struct list_head *resources); > > int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax); > > int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax); > > void pci_bus_release_busn_res(struct pci_bus *b); > > -- > > 1.9.0 > > >
On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote: > > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need > to have arch specific way of providing the number, specially after looking > at the existing implementations that return a value from a variable that > is never touched or incremented. My guess is that pci_domain_nr() was > created to work around the fact that there was no domain_nr maintainance in > the generic code. Well, there was no generic host bridge structure. There is one now, it should go there. Cheers, Ben.
On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote: > On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote: > > > > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need > > to have arch specific way of providing the number, specially after looking > > at the existing implementations that return a value from a variable that > > is never touched or incremented. My guess is that pci_domain_nr() was > > created to work around the fact that there was no domain_nr maintainance in > > the generic code. > > Well, there was no generic host bridge structure. There is one now, it should > go there. Exactly! Hence my patch. After it gets accepted I will go through architectures and remove their version of pci_domain_nr(). Best regards, Liviu > > Cheers, > Ben. > > >
On Mon, Apr 7, 2014 at 4:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: > On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote: >> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote: >> > >> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need >> > to have arch specific way of providing the number, specially after looking >> > at the existing implementations that return a value from a variable that >> > is never touched or incremented. My guess is that pci_domain_nr() was >> > created to work around the fact that there was no domain_nr maintainance in >> > the generic code. >> >> Well, there was no generic host bridge structure. There is one now, it should >> go there. > > Exactly! Hence my patch. After it gets accepted I will go through architectures > and remove their version of pci_domain_nr(). Currently the arch has to supply pci_domain_nr() because that's the only way for the generic code to learn the domain. After you add pci_create_root_bus_in_domain(), the arch can supply the domain that way, and we won't need the arch-specific pci_domain_nr(). Right? That makes more sense to me; thanks for the explanation. Let me try to explain my concern about the pci_create_root_bus_in_domain() interface. We currently have these interfaces: pci_scan_root_bus() pci_scan_bus() pci_scan_bus_parented() pci_create_root_bus() pci_scan_root_bus() is a higher-level interface than pci_create_root_bus(), so I'm trying to migrate toward it because it lets us remove a little code from the arch, e.g., pci_scan_child_bus() and pci_bus_add_devices(). I think we can only remove the arch-specific pci_domain_nr() if that arch uses pci_create_root_bus_in_domain(). When we convert an arch from using scan_bus interfaces to using pci_create_root_bus_in_domain(), we will have to move the rest of the scan_bus code (pci_scan_child_bus(), pci_bus_add_devices()) back into the arch code. One alternative is to add an _in_domain() variant of each of these interfaces, but that doesn't seem very convenient either. My idea of passing in a structure would also require adding variants, so there's not really an advantage there, but I am thinking of the next unification effort, e.g., for NUMA node info. I don't really want to have to change all the _in_domain() interfaces to also take yet another parameter for the node number. Bjorn
On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote: > On Mon, Apr 7, 2014 at 4:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: > > On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote: > >> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote: > >> > > >> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need > >> > to have arch specific way of providing the number, specially after looking > >> > at the existing implementations that return a value from a variable that > >> > is never touched or incremented. My guess is that pci_domain_nr() was > >> > created to work around the fact that there was no domain_nr maintainance in > >> > the generic code. > >> > >> Well, there was no generic host bridge structure. There is one now, it should > >> go there. > > > > Exactly! Hence my patch. After it gets accepted I will go through architectures > > and remove their version of pci_domain_nr(). > > Currently the arch has to supply pci_domain_nr() because that's the > only way for the generic code to learn the domain. After you add > pci_create_root_bus_in_domain(), the arch can supply the domain that > way, and we won't need the arch-specific pci_domain_nr(). Right? > That makes more sense to me; thanks for the explanation. Right. > > Let me try to explain my concern about the > pci_create_root_bus_in_domain() interface. We currently have these > interfaces: > > pci_scan_root_bus() > pci_scan_bus() > pci_scan_bus_parented() > pci_create_root_bus() > > pci_scan_root_bus() is a higher-level interface than > pci_create_root_bus(), so I'm trying to migrate toward it because it > lets us remove a little code from the arch, e.g., pci_scan_child_bus() > and pci_bus_add_devices(). > > I think we can only remove the arch-specific pci_domain_nr() if that > arch uses pci_create_root_bus_in_domain(). When we convert an arch > from using scan_bus interfaces to using > pci_create_root_bus_in_domain(), we will have to move the rest of the > scan_bus code (pci_scan_child_bus(), pci_bus_add_devices()) back into > the arch code. > > One alternative is to add an _in_domain() variant of each of these > interfaces, but that doesn't seem very convenient either. My idea of > passing in a structure would also require adding variants, so there's > not really an advantage there, but I am thinking of the next > unification effort, e.g., for NUMA node info. I don't really want to > have to change all the _in_domain() interfaces to also take yet > another parameter for the node number. OK, what about this: all the functions that you have mentioned take a void *sysdata parameter. Should we convert this opaque pointer into a specific structure that holds the domain_nr and (in future) the NUMA node info? Converting all the architectures is going to be a long winded job as everyone loved to add their own stuff in the structure pointed by sysdata, breaking that will lead to frustrations, but maybe the framework should take back ownership of it. Best regards, Liviu > > Bjorn > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >
On Tue, Apr 8, 2014 at 4:20 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: > On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote: >> Let me try to explain my concern about the >> pci_create_root_bus_in_domain() interface. We currently have these >> interfaces: >> >> pci_scan_root_bus() >> pci_scan_bus() >> pci_scan_bus_parented() >> pci_create_root_bus() >> ... >> One alternative is to add an _in_domain() variant of each of these >> interfaces, but that doesn't seem very convenient either. My idea of >> passing in a structure would also require adding variants, so there's >> not really an advantage there, but I am thinking of the next >> unification effort, e.g., for NUMA node info. I don't really want to >> have to change all the _in_domain() interfaces to also take yet >> another parameter for the node number. > > OK, what about this: all the functions that you have mentioned take a > void *sysdata parameter. Should we convert this opaque pointer into a > specific structure that holds the domain_nr and (in future) the NUMA > node info? I doubt if we can make sysdata itself generic because I suspect we need a way to have *some* arch-specific data. But maybe the arch could supply a structure containing a struct device *, domain, struct pci_ops *, list of resources, aperture info, etc. I wonder if struct pci_host_bridge would be a reasonable place to put this stuff, e.g., something like this: struct pci_host_bridge { int domain; int node; struct device *dev; struct pci_ops *ops; struct list_head resources; void *sysdata; struct pci_bus *bus; /* filled in by core, not by arch */ ... /* other existing contents managed by core */ }; struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge);
On Tue, Apr 08, 2014 at 05:28:39PM +0100, Bjorn Helgaas wrote: > On Tue, Apr 8, 2014 at 4:20 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: > > On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote: > > >> Let me try to explain my concern about the > >> pci_create_root_bus_in_domain() interface. We currently have these > >> interfaces: > >> > >> pci_scan_root_bus() > >> pci_scan_bus() > >> pci_scan_bus_parented() > >> pci_create_root_bus() > >> ... > > >> One alternative is to add an _in_domain() variant of each of these > >> interfaces, but that doesn't seem very convenient either. My idea of > >> passing in a structure would also require adding variants, so there's > >> not really an advantage there, but I am thinking of the next > >> unification effort, e.g., for NUMA node info. I don't really want to > >> have to change all the _in_domain() interfaces to also take yet > >> another parameter for the node number. > > > > OK, what about this: all the functions that you have mentioned take a > > void *sysdata parameter. Should we convert this opaque pointer into a > > specific structure that holds the domain_nr and (in future) the NUMA > > node info? > > I doubt if we can make sysdata itself generic because I suspect we > need a way to have *some* arch-specific data. But maybe the arch > could supply a structure containing a struct device *, domain, struct > pci_ops *, list of resources, aperture info, etc. I wonder if struct > pci_host_bridge would be a reasonable place to put this stuff, e.g., > something like this: > > struct pci_host_bridge { > int domain; > int node; > struct device *dev; > struct pci_ops *ops; > struct list_head resources; > void *sysdata; > struct pci_bus *bus; /* filled in by core, not by arch */ > ... /* other existing contents managed by core */ > }; > > struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge); I'm really reluctant to give the arches more rope to hang themselves. I really dislike the use of xxxx_initcall() to do PCI initialisation ordering that is currently in widespread use through the arch code. As I hope to have proven with my arm64 code, you can have PCI support for an architecture without having to provide any arch specific code. We have enough correct code in the PCI framework, what would the architectures provide to the generic code that we cannot get by following the standard? Of course, there are always arch specific corners and they need their data structures to make sense of those, but rather than having architectures fill in a structure *before* we can setup host bridges I think we need to reverse the order. Using your example structure, I don't think is the arch's job to provide the list of resources or the domain number before we can scan the host bridge. We should be able to get those from somewhere else (like adding by default the ioport_resource and iomem_resource and managing domain numbers inside the core framework). Best regards, Liviu > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >
On Wed, Apr 9, 2014 at 6:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: > On Tue, Apr 08, 2014 at 05:28:39PM +0100, Bjorn Helgaas wrote: >> On Tue, Apr 8, 2014 at 4:20 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: >> > On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote: >> >> >> Let me try to explain my concern about the >> >> pci_create_root_bus_in_domain() interface. We currently have these >> >> interfaces: >> >> >> >> pci_scan_root_bus() >> >> pci_scan_bus() >> >> pci_scan_bus_parented() >> >> pci_create_root_bus() >> >> ... >> >> >> One alternative is to add an _in_domain() variant of each of these >> >> interfaces, but that doesn't seem very convenient either. My idea of >> >> passing in a structure would also require adding variants, so there's >> >> not really an advantage there, but I am thinking of the next >> >> unification effort, e.g., for NUMA node info. I don't really want to >> >> have to change all the _in_domain() interfaces to also take yet >> >> another parameter for the node number. >> > >> > OK, what about this: all the functions that you have mentioned take a >> > void *sysdata parameter. Should we convert this opaque pointer into a >> > specific structure that holds the domain_nr and (in future) the NUMA >> > node info? >> >> I doubt if we can make sysdata itself generic because I suspect we >> need a way to have *some* arch-specific data. But maybe the arch >> could supply a structure containing a struct device *, domain, struct >> pci_ops *, list of resources, aperture info, etc. I wonder if struct >> pci_host_bridge would be a reasonable place to put this stuff, e.g., >> something like this: >> >> struct pci_host_bridge { >> int domain; >> int node; >> struct device *dev; >> struct pci_ops *ops; >> struct list_head resources; >> void *sysdata; >> struct pci_bus *bus; /* filled in by core, not by arch */ >> ... /* other existing contents managed by core */ >> }; >> >> struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge); > > I'm really reluctant to give the arches more rope to hang themselves. If you mean the sysdata pointer is rope to hang themselves, I think it would be great it we didn't need sysdata at all. But I think it would be a huge amount of work to get rid of it completely, and keeping it would let us work at that incrementally. > I > really dislike the use of xxxx_initcall() to do PCI initialisation ordering > that is currently in widespread use through the arch code. I certainly agree that initcall ordering is fragile and to be avoided when possible. > As I hope to > have proven with my arm64 code, you can have PCI support for an architecture > without having to provide any arch specific code. We have enough correct > code in the PCI framework, what would the architectures provide to the generic > code that we cannot get by following the standard? PCI host bridges are not architected, i.e., the PCI/PCIe specs do not say anything about how to discover them or how to program them. So the arch or a driver for another bus (ACPI, OF, etc.) must enumerate them and discover the bridge apertures (those are in the resource list). And obviously the arch has to provide the root bus number and PCI config accessors. > Of course, there are always arch specific corners and they need their data > structures to make sense of those, but rather than having architectures > fill in a structure *before* we can setup host bridges I think we need > to reverse the order. Using your example structure, I don't think is > the arch's job to provide the list of resources or the domain number > before we can scan the host bridge. We should be able to get those from > somewhere else (like adding by default the ioport_resource and > iomem_resource and managing domain numbers inside the core framework). It's possible we could manage domain numbers in the core. On ACPI systems, we currently we use the ACPI _SEG value as the domain. In some cases, e.g., on ia64, config space access is done via firmware interfaces, and those interfaces expect the _SEG values. We could conceivably maintain a mapping between _SEG and domain, but I'm not sure there's an advantage there. I probably don't understand what you intend by reversing the order. Are you suggesting something like new pcibios_*() interfaces the arch can use to get the host bridge apertures and domain number? Bjorn
On Wednesday 09 April 2014 08:02:41 Bjorn Helgaas wrote: > > It's possible we could manage domain numbers in the core. On ACPI > systems, we currently we use the ACPI _SEG value as the domain. In > some cases, e.g., on ia64, config space access is done via firmware > interfaces, and those interfaces expect the _SEG values. We could > conceivably maintain a mapping between _SEG and domain, but I'm not > sure there's an advantage there. I think it's a safe assumption that we will never have more than one firmware trying to enumerate the domains, so it would be safe to let ACPI keep doing its own thing for domain numbers, have the DT code pick domain number using some method we come up with, and for everything else let the architecture code deal with it. There are probably very few systems that have multiple domains but use neither ACPI nor DT. Arnd
On Wed, 2014-04-09 at 08:02 -0600, Bjorn Helgaas wrote: > It's possible we could manage domain numbers in the core. On ACPI > systems, we currently we use the ACPI _SEG value as the domain. In > some cases, e.g., on ia64, config space access is done via firmware > interfaces, and those interfaces expect the _SEG values. We could > conceivably maintain a mapping between _SEG and domain, but I'm not > sure there's an advantage there. I'd rather keep the ability for the architecture to assign domain numbers. I'm working on making them relate to the physical slot numbers on our new systems so we get predictable PCI IDs which helps with some stuff like the new network device naming scheme etc... Predictability is a good thing :-) > I probably don't understand what you intend by reversing the order. > Are you suggesting something like new pcibios_*() interfaces the arch > can use to get the host bridge apertures and domain number? Cheers, Ben.
On Wed, Apr 09, 2014 at 08:02:41AM -0600, Bjorn Helgaas wrote: > On Wed, Apr 9, 2014 at 6:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: > > On Tue, Apr 08, 2014 at 05:28:39PM +0100, Bjorn Helgaas wrote: > >> On Tue, Apr 8, 2014 at 4:20 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: > >> > On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote: > >> > >> >> Let me try to explain my concern about the > >> >> pci_create_root_bus_in_domain() interface. We currently have these > >> >> interfaces: > >> >> > >> >> pci_scan_root_bus() > >> >> pci_scan_bus() > >> >> pci_scan_bus_parented() > >> >> pci_create_root_bus() > >> >> ... > >> > >> >> One alternative is to add an _in_domain() variant of each of these > >> >> interfaces, but that doesn't seem very convenient either. My idea of > >> >> passing in a structure would also require adding variants, so there's > >> >> not really an advantage there, but I am thinking of the next > >> >> unification effort, e.g., for NUMA node info. I don't really want to > >> >> have to change all the _in_domain() interfaces to also take yet > >> >> another parameter for the node number. > >> > > >> > OK, what about this: all the functions that you have mentioned take a > >> > void *sysdata parameter. Should we convert this opaque pointer into a > >> > specific structure that holds the domain_nr and (in future) the NUMA > >> > node info? > >> > >> I doubt if we can make sysdata itself generic because I suspect we > >> need a way to have *some* arch-specific data. But maybe the arch > >> could supply a structure containing a struct device *, domain, struct > >> pci_ops *, list of resources, aperture info, etc. I wonder if struct > >> pci_host_bridge would be a reasonable place to put this stuff, e.g., > >> something like this: > >> > >> struct pci_host_bridge { > >> int domain; > >> int node; > >> struct device *dev; > >> struct pci_ops *ops; > >> struct list_head resources; > >> void *sysdata; > >> struct pci_bus *bus; /* filled in by core, not by arch */ > >> ... /* other existing contents managed by core */ > >> }; > >> > >> struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge); > > > > I'm really reluctant to give the arches more rope to hang themselves. > > If you mean the sysdata pointer is rope to hang themselves, I think it > would be great it we didn't need sysdata at all. But I think it would > be a huge amount of work to get rid of it completely, and keeping it > would let us work at that incrementally. Agree. But then your suggestion was to wrap sysdata inside another structure, which to me constitutes additional rope. > > > I > > really dislike the use of xxxx_initcall() to do PCI initialisation ordering > > that is currently in widespread use through the arch code. > > I certainly agree that initcall ordering is fragile and to be avoided > when possible. > > > As I hope to > > have proven with my arm64 code, you can have PCI support for an architecture > > without having to provide any arch specific code. We have enough correct > > code in the PCI framework, what would the architectures provide to the generic > > code that we cannot get by following the standard? > > PCI host bridges are not architected, i.e., the PCI/PCIe specs do not > say anything about how to discover them or how to program them. So > the arch or a driver for another bus (ACPI, OF, etc.) must enumerate > them and discover the bridge apertures (those are in the resource > list). And obviously the arch has to provide the root bus number and > PCI config accessors. The "what" for PCI host bridges is defined in the spec. The "how" is implementation defined. What I'm trying to get with the cleanup is the ordering of pci_host_bridge creation: core creates the structure first ("what"), arch then has the chance of adding specific data to it (ops, resources, etc) ("how"). At the moment arm and powerpc do some horrible dances in trying to create their local idea of a host bridge before passing it to the core code. As for the root bus number, maybe we can offer some sensible default strategy for numbering them and the arches that don't care too much can use that. > > > Of course, there are always arch specific corners and they need their data > > structures to make sense of those, but rather than having architectures > > fill in a structure *before* we can setup host bridges I think we need > > to reverse the order. Using your example structure, I don't think is > > the arch's job to provide the list of resources or the domain number > > before we can scan the host bridge. We should be able to get those from > > somewhere else (like adding by default the ioport_resource and > > iomem_resource and managing domain numbers inside the core framework). > > It's possible we could manage domain numbers in the core. On ACPI > systems, we currently we use the ACPI _SEG value as the domain. In > some cases, e.g., on ia64, config space access is done via firmware > interfaces, and those interfaces expect the _SEG values. We could > conceivably maintain a mapping between _SEG and domain, but I'm not > sure there's an advantage there. > > I probably don't understand what you intend by reversing the order. > Are you suggesting something like new pcibios_*() interfaces the arch > can use to get the host bridge apertures and domain number? Yes. Lets stop having the architectures do early inits so that they can prepare their host bridge structures to be ready for when the PCI framework calls them. Lets create the host bridge in the PCI core and use pcibios_*() to add to it where necessary without having to race for position. Best regards, Liviu > > Bjorn > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >
On Wed, Apr 9, 2014 at 7:27 PM, Liviu Dudau <liviu@dudau.co.uk> wrote: > On Wed, Apr 09, 2014 at 08:02:41AM -0600, Bjorn Helgaas wrote: >> On Wed, Apr 9, 2014 at 6:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: >> > On Tue, Apr 08, 2014 at 05:28:39PM +0100, Bjorn Helgaas wrote: >> >> On Tue, Apr 8, 2014 at 4:20 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: >> >> > On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote: >> >> >> >> >> Let me try to explain my concern about the >> >> >> pci_create_root_bus_in_domain() interface. We currently have these >> >> >> interfaces: >> >> >> >> >> >> pci_scan_root_bus() >> >> >> pci_scan_bus() >> >> >> pci_scan_bus_parented() >> >> >> pci_create_root_bus() >> >> >> ... >> >> >> >> >> One alternative is to add an _in_domain() variant of each of these >> >> >> interfaces, but that doesn't seem very convenient either. My idea of >> >> >> passing in a structure would also require adding variants, so there's >> >> >> not really an advantage there, but I am thinking of the next >> >> >> unification effort, e.g., for NUMA node info. I don't really want to >> >> >> have to change all the _in_domain() interfaces to also take yet >> >> >> another parameter for the node number. >> >> > >> >> > OK, what about this: all the functions that you have mentioned take a >> >> > void *sysdata parameter. Should we convert this opaque pointer into a >> >> > specific structure that holds the domain_nr and (in future) the NUMA >> >> > node info? >> >> >> >> I doubt if we can make sysdata itself generic because I suspect we >> >> need a way to have *some* arch-specific data. But maybe the arch >> >> could supply a structure containing a struct device *, domain, struct >> >> pci_ops *, list of resources, aperture info, etc. I wonder if struct >> >> pci_host_bridge would be a reasonable place to put this stuff, e.g., >> >> something like this: >> >> >> >> struct pci_host_bridge { >> >> int domain; >> >> int node; >> >> struct device *dev; >> >> struct pci_ops *ops; >> >> struct list_head resources; >> >> void *sysdata; >> >> struct pci_bus *bus; /* filled in by core, not by arch */ >> >> ... /* other existing contents managed by core */ >> >> }; >> >> >> >> struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge); >> > >> > I'm really reluctant to give the arches more rope to hang themselves. >> >> If you mean the sysdata pointer is rope to hang themselves, I think it >> would be great it we didn't need sysdata at all. But I think it would >> be a huge amount of work to get rid of it completely, and keeping it >> would let us work at that incrementally. > > Agree. But then your suggestion was to wrap sysdata inside another structure, > which to me constitutes additional rope. I'll ponder this more, but I don't see your point here yet. The arch already supplies a sysdata pointer to pci_scan_root_bus(), and we stash it in every struct pci_bus already. My idea was just to pass it in differently, as a structure member rather than a separate argument. (And I'm not completely attached to my proposal; it was only to illustrate my concern about the explosion of interfaces if we have to add *_domain(), *_node(), etc.) > The "what" for PCI host bridges is defined in the spec. The "how" is implementation > defined. What I'm trying to get with the cleanup is the ordering of pci_host_bridge > creation: core creates the structure first ("what"), arch then has the chance > of adding specific data to it (ops, resources, etc) ("how"). > > At the moment arm and powerpc do some horrible dances in trying to create their > local idea of a host bridge before passing it to the core code. > > As for the root bus number, maybe we can offer some sensible default strategy > for numbering them and the arches that don't care too much can use that. I think we're at the limit of what can be accomplished with the abstractness of English. My opinion is that it's reasonable for the arch to discover the host bridge properties first and pass them to the core, and it doesn't require unreasonable things of the arch. I know the arm PCI setup is complicated, partly because it deals with a huge number of machines that don't have a consistent firmware interface. The x86/ACPI setup is relatively simple because it deals with a simple firmware interface. But that's just my opinion, and maybe your code will show otherwise. Bjorn
On Wednesday 09 April 2014 21:48:14 Bjorn Helgaas wrote: > On Wed, Apr 9, 2014 at 7:27 PM, Liviu Dudau <liviu@dudau.co.uk> wrote: > > On Wed, Apr 09, 2014 at 08:02:41AM -0600, Bjorn Helgaas wrote: > >> >> struct pci_host_bridge { > >> >> int domain; > >> >> int node; > >> >> struct device *dev; > >> >> struct pci_ops *ops; > >> >> struct list_head resources; > >> >> void *sysdata; > >> >> struct pci_bus *bus; /* filled in by core, not by arch */ > >> >> ... /* other existing contents managed by core */ > >> >> }; > >> >> > >> >> struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge); > >> > > >> > I'm really reluctant to give the arches more rope to hang themselves. > >> > >> If you mean the sysdata pointer is rope to hang themselves, I think it > >> would be great it we didn't need sysdata at all. But I think it would > >> be a huge amount of work to get rid of it completely, and keeping it > >> would let us work at that incrementally. > > > > Agree. But then your suggestion was to wrap sysdata inside another structure, > > which to me constitutes additional rope. > > I'll ponder this more, but I don't see your point here yet. The arch > already supplies a sysdata pointer to pci_scan_root_bus(), and we > stash it in every struct pci_bus already. My idea was just to pass it > in differently, as a structure member rather than a separate argument. > (And I'm not completely attached to my proposal; it was only to > illustrate my concern about the explosion of interfaces if we have to > add *_domain(), *_node(), etc.) As a minor variation of your suggestion, how about passing in a pointer to struct pci_host_bridge, and embed that within its own private structure? I think this is closer to how a lot of other subsystems do the abstraction. > > The "what" for PCI host bridges is defined in the spec. The "how" is implementation > > defined. What I'm trying to get with the cleanup is the ordering of pci_host_bridge > > creation: core creates the structure first ("what"), arch then has the chance > > of adding specific data to it (ops, resources, etc) ("how"). > > > > At the moment arm and powerpc do some horrible dances in trying to create their > > local idea of a host bridge before passing it to the core code. > > > > As for the root bus number, maybe we can offer some sensible default strategy > > for numbering them and the arches that don't care too much can use that. > > I think we're at the limit of what can be accomplished with the > abstractness of English. > > My opinion is that it's reasonable for the arch to discover the host > bridge properties first and pass them to the core, and it doesn't > require unreasonable things of the arch. I know the arm PCI setup is > complicated, partly because it deals with a huge number of machines > that don't have a consistent firmware interface. The x86/ACPI setup > is relatively simple because it deals with a simple firmware > interface. But that's just my opinion, and maybe your code will show > otherwise. Makes sense. One of the areas where the PCI code shows its age is the method how the various parts link together: there are function calls going back and forth between architecture specific files and generic files, rather a hierarchy of files with generic code being code by more specific code. To do the probing properly, I think it's totally fine to have the core code expect stuff like the resources and domain number to be filled out already by whoever calls it, but then have wrappers around it that get this information from a firmware interface, or from hardwired architecture specific code where necessary. Arnd
On Thu, Apr 10, 2014 at 2:00 AM, Arnd Bergmann <arnd@arndb.de> wrote: > On Wednesday 09 April 2014 21:48:14 Bjorn Helgaas wrote: >> On Wed, Apr 9, 2014 at 7:27 PM, Liviu Dudau <liviu@dudau.co.uk> wrote: >> > On Wed, Apr 09, 2014 at 08:02:41AM -0600, Bjorn Helgaas wrote: >> >> >> struct pci_host_bridge { >> >> >> int domain; >> >> >> int node; >> >> >> struct device *dev; >> >> >> struct pci_ops *ops; >> >> >> struct list_head resources; >> >> >> void *sysdata; >> >> >> struct pci_bus *bus; /* filled in by core, not by arch */ >> >> >> ... /* other existing contents managed by core */ >> >> >> }; >> >> >> >> >> >> struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge); >> >> > >> >> > I'm really reluctant to give the arches more rope to hang themselves. >> >> >> >> If you mean the sysdata pointer is rope to hang themselves, I think it >> >> would be great it we didn't need sysdata at all. But I think it would >> >> be a huge amount of work to get rid of it completely, and keeping it >> >> would let us work at that incrementally. >> > >> > Agree. But then your suggestion was to wrap sysdata inside another structure, >> > which to me constitutes additional rope. >> >> I'll ponder this more, but I don't see your point here yet. The arch >> already supplies a sysdata pointer to pci_scan_root_bus(), and we >> stash it in every struct pci_bus already. My idea was just to pass it >> in differently, as a structure member rather than a separate argument. >> (And I'm not completely attached to my proposal; it was only to >> illustrate my concern about the explosion of interfaces if we have to >> add *_domain(), *_node(), etc.) > > As a minor variation of your suggestion, how about passing in a pointer > to struct pci_host_bridge, and embed that within its own private > structure? I think this is closer to how a lot of other subsystems > do the abstraction. I'm not sure I'm following you; you mean the arch-specific sysdata structure would contain a pointer to struct pci_host_bridge? I have to admit that I'm not up on how other subsystems handle this sort of abstraction. Do you have any pointers to good examples that I can study? Bjorn
On Thursday 10 April 2014 07:50:52 Bjorn Helgaas wrote: > On Thu, Apr 10, 2014 at 2:00 AM, Arnd Bergmann <arnd@arndb.de> wrote: > > On Wednesday 09 April 2014 21:48:14 Bjorn Helgaas wrote: > >> On Wed, Apr 9, 2014 at 7:27 PM, Liviu Dudau <liviu@dudau.co.uk> wrote: > >> > On Wed, Apr 09, 2014 at 08:02:41AM -0600, Bjorn Helgaas wrote: > >> >> >> struct pci_host_bridge { > >> >> >> int domain; > >> >> >> int node; > >> >> >> struct device *dev; > >> >> >> struct pci_ops *ops; > >> >> >> struct list_head resources; > >> >> >> void *sysdata; > >> >> >> struct pci_bus *bus; /* filled in by core, not by arch */ > >> >> >> ... /* other existing contents managed by core */ > >> >> >> }; > >> >> >> > >> >> >> struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge); > >> >> > > > I'm not sure I'm following you; you mean the arch-specific sysdata > structure would contain a pointer to struct pci_host_bridge? > > I have to admit that I'm not up on how other subsystems handle this > sort of abstraction. Do you have any pointers to good examples that I > can study? What I mean is like this: /* generic structure */ struct pci_host_bridge { int domain; int node; struct device *dev; struct pci_ops *ops; struct list_head resources; struct pci_bus *bus; /* filled in by core, not by arch */ ... /* other existing contents managed by core */ }; /* arm specific structure */ struct pci_sys_data { char io_res_name[12]; /* Bridge swizzling */ u8 (*swizzle)(struct pci_dev *, u8 *); /* IRQ mapping */ int (*map_irq)(const struct pci_dev *, u8, u8); /* Resource alignement requirements */ void (*add_bus)(struct pci_bus *bus); void (*remove_bus)(struct pci_bus *bus); void *private_data; /* platform controller private data */ /* not a pointer: */ struct pci_host_bridge bridge; }; static inline struct pci_sys_data *to_pci_sys_data(struct pci_host_bridge *bridge) { return container_of(bridge, struct pci_sys_data, bridge); } /* arm specific, driver specific structure */ struct tegra_pcie { void __iomem *pads; void __iomem *afi; struct clk *pex_clk; struct clk *afi_clk; struct clk *pll_e; struct clk *cml_clk; struct tegra_msi msi; struct list_head ports; unsigned int num_ports; struct pci_sys_data sysdata; }; static inline struct tegra_pcie *to_tegra_pcie(struct pci_sys_data *sysdata) { return container_of(sysdata, struct tegra_pcie, sysdata); } This mirrors how we treat devices: a pci_device has an embedded device, and so on, in other subsystems we can have multiple layers. In this example, the tegra pcie driver then allocates its own tegra_pcie structure, fills out the fields it needs, and registers it with the ARM architecture code, passing just the pci_sys_data pointer. That function in turn passes a pointer to the embedded pci_host_bridge down to the generic code. Ideally we should try to eliminate the architecture specific portion here, but that is a later step. Arnd
On Thu, Apr 10, 2014 at 03:07:44PM +0100, Arnd Bergmann wrote: > On Thursday 10 April 2014 07:50:52 Bjorn Helgaas wrote: > > On Thu, Apr 10, 2014 at 2:00 AM, Arnd Bergmann <arnd@arndb.de> wrote: > > > On Wednesday 09 April 2014 21:48:14 Bjorn Helgaas wrote: > > >> On Wed, Apr 9, 2014 at 7:27 PM, Liviu Dudau <liviu@dudau.co.uk> wrote: > > >> > On Wed, Apr 09, 2014 at 08:02:41AM -0600, Bjorn Helgaas wrote: > > >> >> >> struct pci_host_bridge { > > >> >> >> int domain; > > >> >> >> int node; > > >> >> >> struct device *dev; > > >> >> >> struct pci_ops *ops; > > >> >> >> struct list_head resources; > > >> >> >> void *sysdata; > > >> >> >> struct pci_bus *bus; /* filled in by core, not by arch */ > > >> >> >> ... /* other existing contents managed by core */ > > >> >> >> }; > > >> >> >> > > >> >> >> struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge); > > >> >> > > > > > I'm not sure I'm following you; you mean the arch-specific sysdata > > structure would contain a pointer to struct pci_host_bridge? > > > > I have to admit that I'm not up on how other subsystems handle this > > sort of abstraction. Do you have any pointers to good examples that I > > can study? > > What I mean is like this: > > /* generic structure */ > struct pci_host_bridge { > int domain; > int node; > struct device *dev; > struct pci_ops *ops; > struct list_head resources; > struct pci_bus *bus; /* filled in by core, not by arch */ > ... /* other existing contents managed by core */ > }; > > /* arm specific structure */ > struct pci_sys_data { > char io_res_name[12]; > /* Bridge swizzling */ > u8 (*swizzle)(struct pci_dev *, u8 *); > /* IRQ mapping */ > int (*map_irq)(const struct pci_dev *, u8, u8); > /* Resource alignement requirements */ > void (*add_bus)(struct pci_bus *bus); > void (*remove_bus)(struct pci_bus *bus); > void *private_data; /* platform controller private data */ > > /* not a pointer: */ > struct pci_host_bridge bridge; > }; > static inline struct pci_sys_data *to_pci_sys_data(struct pci_host_bridge *bridge) > { > return container_of(bridge, struct pci_sys_data, bridge); > } > > /* arm specific, driver specific structure */ > struct tegra_pcie { > void __iomem *pads; > void __iomem *afi; > > struct clk *pex_clk; > struct clk *afi_clk; > struct clk *pll_e; > struct clk *cml_clk; > > struct tegra_msi msi; > > struct list_head ports; > unsigned int num_ports; > > struct pci_sys_data sysdata; > }; > static inline struct tegra_pcie *to_tegra_pcie(struct pci_sys_data *sysdata) > { > return container_of(sysdata, struct tegra_pcie, sysdata); > } > > This mirrors how we treat devices: a pci_device has an embedded device, > and so on, in other subsystems we can have multiple layers. > > In this example, the tegra pcie driver then allocates its own tegra_pcie > structure, fills out the fields it needs, and registers it with the > ARM architecture code, passing just the pci_sys_data pointer. That function > in turn passes a pointer to the embedded pci_host_bridge down to the > generic code. Ideally we should try to eliminate the architecture specific > portion here, but that is a later step. So Arnd seems to agree with me: we should try to get out of architecture specific pci_sys_data and link the host bridge driver straight into the PCI core. The core then can call into arch code via pcibios_*() functions. Arnd, am I reading correctly into what you are saying? Liviu > > Arnd > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > >
On Thursday 10 April 2014 15:53:04 Liviu Dudau wrote: > On Thu, Apr 10, 2014 at 03:07:44PM +0100, Arnd Bergmann wrote: > > This mirrors how we treat devices: a pci_device has an embedded device, > > and so on, in other subsystems we can have multiple layers. > > > > In this example, the tegra pcie driver then allocates its own tegra_pcie > > structure, fills out the fields it needs, and registers it with the > > ARM architecture code, passing just the pci_sys_data pointer. That function > > in turn passes a pointer to the embedded pci_host_bridge down to the > > generic code. Ideally we should try to eliminate the architecture specific > > portion here, but that is a later step. > > So Arnd seems to agree with me: we should try to get out of architecture specific > pci_sys_data and link the host bridge driver straight into the PCI core. The > core then can call into arch code via pcibios_*() functions. > > Arnd, am I reading correctly into what you are saying? Half of it ;-) I think it would be better to not have an architecture specific data structure, just like it would be better not to have architecture specific pcibios_* functions that get called by the PCI core. Note that the architecture specific functions are the ones that rely on the architecture specific data structures as well. If they only use the common fields, it should also be possible to share the code. I also don't realistically think we can get there on a lot of architectures any time soon. Note that most architectures only have one PCI host implementation, so the architecture structure is the same as the host driver structure anyway. For architectures like powerpc and arm that have people actively working on them, we have a chance to clean up that code in the way we want it (if we can agree on the direction), but it's still not trivial to do. Speaking of arm32 in particular, I think we will end up with a split approach: modern platforms (multiplatform, possibly all DT based) using PCI core infrastructure directly and no architecture specific PCI code on the one side, and a variation of today's code for the legacy platforms on the other. Arnd
On Thu, 2014-04-10 at 22:46 +0200, Arnd Bergmann wrote: > Half of it ;-) > > I think it would be better to not have an architecture specific data > structure, just like it would be better not to have architecture specific > pcibios_* functions that get called by the PCI core. Note that the > architecture specific functions are the ones that rely on the architecture > specific data structures as well. If they only use the common fields, > it should also be possible to share the code. I don't understand... we'll never get rid of architecture specific hooks in one form or another. We'll always need to some things in an architecture or host-bridge specific way. Now if you don't want to call them arch hooks, then call them host bridge ops, but they are needed and thus they will need some kind of architecture specific extension to the base host bridge structure. EEH is one big nasty example on powerpc. Another random one that happens to be hot in my brain right now because we just finished debugging it: On powernv, we are just fixing a series of bugs caused by the generic code trying to do hot resets on PCIe "by hand" by directly toggling the secondary reset register in bridges. Well, on our root complexes, this triggers a link loss which triggers a fatal EEH "ER_all" interrupt which we escalate into a fence and all hell breaks loose. We need to mask some error traps in the hardware before doing something that can cause an intentional link loss... and unmask them when done. (Among other things, there are other issues on P7 with hot reset). So hot reset must be an architecture hook. PERST (fundamental reset) can *only* be a hook. The way to generate a PERST is not specified. In fact, on our machines, we have special GPIOs we can use to generate PERST on individual slots below a PLX bridge and a different methods for slots directly on a PHB. Eventually most of those hooks land into firmware, and as such it's akin to ACPI which also keeps a separate state structure and a pile of hooks. > I also don't realistically think we can get there on a lot of architectures > any time soon. Note that most architectures only have one PCI host > implementation, so the architecture structure is the same as the host > driver structure anyway. > > For architectures like powerpc and arm that have people actively working > on them, we have a chance to clean up that code in the way we want it > (if we can agree on the direction), but it's still not trivial to do. > > Speaking of arm32 in particular, I think we will end up with a split > approach: modern platforms (multiplatform, possibly all DT based) using > PCI core infrastructure directly and no architecture specific PCI > code on the one side, and a variation of today's code for the legacy > platforms on the other. > > Arnd
On Friday 11 April 2014 15:01:09 Benjamin Herrenschmidt wrote: > On Thu, 2014-04-10 at 22:46 +0200, Arnd Bergmann wrote: > > > Half of it ;-) > > > > I think it would be better to not have an architecture specific data > > structure, just like it would be better not to have architecture specific > > pcibios_* functions that get called by the PCI core. Note that the > > architecture specific functions are the ones that rely on the architecture > > specific data structures as well. If they only use the common fields, > > it should also be possible to share the code. > > I don't understand... we'll never get rid of architecture specific hooks > in one form or another. > > We'll always need to some things in an architecture or host-bridge > specific way. Now if you don't want to call them arch hooks, then call > them host bridge ops, but they are needed and thus they will need some > kind of architecture specific extension to the base host bridge > structure. Absolutely right. The thing I'd like to get rid of in the long run is global functions defined in the architecture code that are called by core code for host bridge specific functionality. In a lot of cases, they should not be needed if we can express the same things in a generic way. In other cases, we can use function pointers that are set at the time that the host bridge is registered. > EEH is one big nasty example on powerpc. > > Another random one that happens to be hot in my brain right now because > we just finished debugging it: On powernv, we are just fixing a series > of bugs caused by the generic code trying to do hot resets on PCIe "by > hand" by directly toggling the secondary reset register in bridges. > > Well, on our root complexes, this triggers a link loss which triggers > a fatal EEH "ER_all" interrupt which we escalate into a fence and all > hell breaks loose. > > We need to mask some error traps in the hardware before doing something > that can cause an intentional link loss... and unmask them when done. > (Among other things, there are other issues on P7 with hot reset). > > So hot reset must be an architecture hook. This sounds to me very much host bridge specific, not architecture specific. If you have the same host bridge in an ARM system, you'd want the same things to happen, and if you have another host bridge on PowerPC, you probably don't want that code to be called. > PERST (fundamental reset) can *only* be a hook. The way to generate a > PERST is not specified. In fact, on our machines, we have special GPIOs > we can use to generate PERST on individual slots below a PLX bridge > and a different methods for slots directly on a PHB. > > Eventually most of those hooks land into firmware, and as such it's akin > to ACPI which also keeps a separate state structure and a pile of hooks. On PowerPC, there are currently a bunch of platform specific callbacks in the ppc_md: pcibios_after_init, pci_exclude_device, pcibios_fixup_resources, pcibios_fixup_bus, pcibios_enable_device_hook, pcibios_fixup_phb, pcibios_window_alignment, and possibly some more. There is some architecture specific code that gets called by the PCI core, with the main purpose of calling into these. On ARM32, we have a similar set of callbacks in the architecture private pci_sys_data: swizzle, map_irq, align_resource, add_bus, remove_bus, and some more callbacks for setup in the hw_pci structure that is used at initialization time: setup, scan, preinit, postinit. Again, these are called from architecture specific code that gets called by the PCI core. I'm sure some of the other architectures have similar things, most of them probably less verbose because there is fewer variability between subarchitectures. I think a nice way to deal with these in the long run would be to have a generic 'struct pci_host_bridge_ops' that can be defined by the architecture or the platform, or a particular host bridge driver. We'd have to define exactly which function pointers would go in there, but a good start would be the set of functions that are today provided by each architecture. The reset method you describe above would also fit well into this. A host bridge driver can fill out the pointers with its own functions, or put platform or architecture specific function pointers in there, that get called by the PCI core. There are multiple ways to deal with default implementations here, one way would be that the core just falls back to a generic implementation (which is currently the __weak function) if it sees a NULL pointer. Another way would be to require each driver to either fill out all pointers or none of them, in which case we would use a default pci_host_bridge_ops struct that contains the pointers to the global pcibios_*() functions. Arnd
On Fri, 2014-04-11 at 10:36 +0200, Arnd Bergmann wrote: > > EEH is one big nasty example on powerpc. > > > > Another random one that happens to be hot in my brain right now because > > we just finished debugging it: On powernv, we are just fixing a series > > of bugs caused by the generic code trying to do hot resets on PCIe "by > > hand" by directly toggling the secondary reset register in bridges. > > > > Well, on our root complexes, this triggers a link loss which triggers > > a fatal EEH "ER_all" interrupt which we escalate into a fence and all > > hell breaks loose. > > > > We need to mask some error traps in the hardware before doing something > > that can cause an intentional link loss... and unmask them when done. > > (Among other things, there are other issues on P7 with hot reset). > > > > So hot reset must be an architecture hook. > > This sounds to me very much host bridge specific, not architecture specific. > If you have the same host bridge in an ARM system, you'd want the same > things to happen, and if you have another host bridge on PowerPC, you > probably don't want that code to be called. Yes, it is partially host bridge specific, partially firmware related (OPAL vs. ACPI, vs....) so it's a mixture here. So I do agree, host bridge ops to replace most of these pcibios_* hooks does make sense. > > > PERST (fundamental reset) can *only* be a hook. The way to generate a > > PERST is not specified. In fact, on our machines, we have special GPIOs > > we can use to generate PERST on individual slots below a PLX bridge > > and a different methods for slots directly on a PHB. > > > > Eventually most of those hooks land into firmware, and as such it's akin > > to ACPI which also keeps a separate state structure and a pile of hooks. > > On PowerPC, there are currently a bunch of platform specific callbacks > in the ppc_md: pcibios_after_init, pci_exclude_device, > pcibios_fixup_resources, pcibios_fixup_bus, pcibios_enable_device_hook, > pcibios_fixup_phb, pcibios_window_alignment, and possibly some more. > There is some architecture specific code that gets called by the > PCI core, with the main purpose of calling into these. Yes. Most of them could be made into host bridge hooks fairly easily. The remaining ones are going to need somebody (probably me) to untangle :-) > On ARM32, we have a similar set of callbacks in the architecture > private pci_sys_data: swizzle, map_irq, align_resource, add_bus, > remove_bus, and some more callbacks for setup in the hw_pci > structure that is used at initialization time: setup, scan, > preinit, postinit. Again, these are called from architecture > specific code that gets called by the PCI core. > > I'm sure some of the other architectures have similar things, most > of them probably less verbose because there is fewer variability > between subarchitectures. > > I think a nice way to deal with these in the long run would be > to have a generic 'struct pci_host_bridge_ops' that can be defined > by the architecture or the platform, or a particular host bridge > driver. Well, the host bridge needs either a "driver" or be subclassed or both... > We'd have to define exactly which function pointers would > go in there, but a good start would be the set of functions that > are today provided by each architecture. The reset method you describe > above would also fit well into this. > > A host bridge driver can fill out the pointers with its own functions, > or put platform or architecture specific function pointers in there, > that get called by the PCI core. There are multiple ways to deal with > default implementations here, one way would be that the core just > falls back to a generic implementation (which is currently the __weak > function) if it sees a NULL pointer. Another way would be to require > each driver to either fill out all pointers or none of them, in which > case we would use a default pci_host_bridge_ops struct that contains > the pointers to the global pcibios_*() functions. Either works, we can start with the easy ones like window alignment, and move from there. Ben.
On Thu, Apr 10, 2014 at 09:46:36PM +0100, Arnd Bergmann wrote: > On Thursday 10 April 2014 15:53:04 Liviu Dudau wrote: > > On Thu, Apr 10, 2014 at 03:07:44PM +0100, Arnd Bergmann wrote: > > > > This mirrors how we treat devices: a pci_device has an embedded device, > > > and so on, in other subsystems we can have multiple layers. > > > > > > In this example, the tegra pcie driver then allocates its own tegra_pcie > > > structure, fills out the fields it needs, and registers it with the > > > ARM architecture code, passing just the pci_sys_data pointer. That function > > > in turn passes a pointer to the embedded pci_host_bridge down to the > > > generic code. Ideally we should try to eliminate the architecture specific > > > portion here, but that is a later step. > > > > So Arnd seems to agree with me: we should try to get out of architecture specific > > pci_sys_data and link the host bridge driver straight into the PCI core. The > > core then can call into arch code via pcibios_*() functions. > > > > Arnd, am I reading correctly into what you are saying? > > Half of it ;-) > > I think it would be better to not have an architecture specific data > structure, just like it would be better not to have architecture specific > pcibios_* functions that get called by the PCI core. Note that the > architecture specific functions are the ones that rely on the architecture > specific data structures as well. If they only use the common fields, > it should also be possible to share the code. While I've come to like the pcibios_*() interface (and yes, it could be formalised and abstracted into a pci_xxxx_ops structure) I don't like the fact that those functions use architectural data in order to function. I know it might sound strange, as they *are* supposed to be implemented by the arches, but in my mind the link between generic code and arch code for PCI should be done by the host bridge driver. That's how PCI spec describes it, and I see no reason why we should not be able to adopt the same view. To be more precise, what I would like to happen in the case of some functions would be for the PCI core code to call a pci_host_bridge_ops method which in turn will call the arch specific code if it needs to. Why I think that would be better? Because otherwise you put in the architectural side code to cope with a certain host bridge, then another host bridge comes in and you add more architectural code, but then when you port host bridge X to arch B you discover that you need to add code there as well for X. And it all ends up in the mess we currently have where the drivers in drivers/pci/host are not capable of being ported to a different architecture because they rely on infrastructure only present in arm32 that is not properly documented. > > I also don't realistically think we can get there on a lot of architectures > any time soon. Note that most architectures only have one PCI host > implementation, so the architecture structure is the same as the host > driver structure anyway. > > For architectures like powerpc and arm that have people actively working > on them, we have a chance to clean up that code in the way we want it > (if we can agree on the direction), but it's still not trivial to do. > > Speaking of arm32 in particular, I think we will end up with a split > approach: modern platforms (multiplatform, possibly all DT based) using > PCI core infrastructure directly and no architecture specific PCI > code on the one side, and a variation of today's code for the legacy > platforms on the other. Actually, if we could come up with a compromise for the pci_fixup_*() functions (are they still used by functional hardware?) then I think we could convert most of the arm32 arch code to re-direct the calls to the infrastructure code. But yes, there might be a lot of resistance to change due to lack of resources when changing old platforms. Best regards, Liviu > > Arnd > >
On Friday 11 April 2014 10:22:25 Liviu Dudau wrote: > On Thu, Apr 10, 2014 at 09:46:36PM +0100, Arnd Bergmann wrote: > > On Thursday 10 April 2014 15:53:04 Liviu Dudau wrote: > > > So Arnd seems to agree with me: we should try to get out of architecture specific > > > pci_sys_data and link the host bridge driver straight into the PCI core. The > > > core then can call into arch code via pcibios_*() functions. > > > > > > Arnd, am I reading correctly into what you are saying? > > > > Half of it ;-) > > > > I think it would be better to not have an architecture specific data > > structure, just like it would be better not to have architecture specific > > pcibios_* functions that get called by the PCI core. Note that the > > architecture specific functions are the ones that rely on the architecture > > specific data structures as well. If they only use the common fields, > > it should also be possible to share the code. > > While I've come to like the pcibios_*() interface (and yes, it could be > formalised and abstracted into a pci_xxxx_ops structure) I don't like the fact > that those functions use architectural data in order to function. I know it > might sound strange, as they *are* supposed to be implemented by the arches, > but in my mind the link between generic code and arch code for PCI should be > done by the host bridge driver. That's how PCI spec describes it, and I see no > reason why we should not be able to adopt the same view. Yes, that's a good goal for the architectures that need the complexity. I would also like to have a way to change as little as possible for the architectures that don't care about this because they only have one possible host controller implementation, which isn't necessarily a conflict. > To be more precise, what I would like to happen in the case of some functions > would be for the PCI core code to call a pci_host_bridge_ops method which > in turn will call the arch specific code if it needs to. Why I think that would > be better? Because otherwise you put in the architectural side code to cope > with a certain host bridge, then another host bridge comes in and you add > more architectural code, but then when you port host bridge X to arch B you > discover that you need to add code there as well for X. And it all ends up in > the mess we currently have where the drivers in drivers/pci/host are not capable > of being ported to a different architecture because they rely on infrastructure > only present in arm32 that is not properly documented. Right. Now it was intentional that we started putting the host drivers into drivers/pci/host before cleaning it all up. We just had to start somewhere. > > I also don't realistically think we can get there on a lot of architectures > > any time soon. Note that most architectures only have one PCI host > > implementation, so the architecture structure is the same as the host > > driver structure anyway. > > > > For architectures like powerpc and arm that have people actively working > > on them, we have a chance to clean up that code in the way we want it > > (if we can agree on the direction), but it's still not trivial to do. > > > > Speaking of arm32 in particular, I think we will end up with a split > > approach: modern platforms (multiplatform, possibly all DT based) using > > PCI core infrastructure directly and no architecture specific PCI > > code on the one side, and a variation of today's code for the legacy > > platforms on the other. > > Actually, if we could come up with a compromise for the pci_fixup_*() functions > (are they still used by functional hardware?) then I think we could convert > most of the arm32 arch code to re-direct the calls to the infrastructure code. The fixups are used by hardware that we want to keep supporting, but I don't see a problem there. None of them rely on the architecture specific PCI implementation, and we could easily move the fixup code into a separate file. Also, I suspect they are all used only on platforms that won't be using CONFIG_ARCH_MULTIPLATFORM. > But yes, there might be a lot of resistance to change due to lack of resources > when changing old platforms. Well, it should be trivial to just create a pci_host_bridge_ops structure containing the currently global functions, and use that for everything registered through pci_common_init_dev(). We definitely have to support this method for things like iop/ixp/pxa/sa1100/footbridge, especially those that have their own concept of PCI domains. For the more modern multiplatform stuff that uses DT for probing and has a driver in drivers/pci/host, we should be able to use completely distinct pci_host_bridge_ops structure that can be shared with arm64. Arnd
On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote: > On Mon, Apr 7, 2014 at 4:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: > > On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote: > >> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote: > >> > > >> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need > >> > to have arch specific way of providing the number, specially after looking > >> > at the existing implementations that return a value from a variable that > >> > is never touched or incremented. My guess is that pci_domain_nr() was > >> > created to work around the fact that there was no domain_nr maintainance in > >> > the generic code. > >> > >> Well, there was no generic host bridge structure. There is one now, it should > >> go there. > > > > Exactly! Hence my patch. After it gets accepted I will go through architectures > > and remove their version of pci_domain_nr(). > > Currently the arch has to supply pci_domain_nr() because that's the > only way for the generic code to learn the domain. After you add > pci_create_root_bus_in_domain(), the arch can supply the domain that > way, and we won't need the arch-specific pci_domain_nr(). Right? > That makes more sense to me; thanks for the explanation. > > Let me try to explain my concern about the > pci_create_root_bus_in_domain() interface. We currently have these > interfaces: > > pci_scan_root_bus() > pci_scan_bus() > pci_scan_bus_parented() > pci_create_root_bus() > > pci_scan_root_bus() is a higher-level interface than > pci_create_root_bus(), so I'm trying to migrate toward it because it > lets us remove a little code from the arch, e.g., pci_scan_child_bus() > and pci_bus_add_devices(). > > I think we can only remove the arch-specific pci_domain_nr() if that > arch uses pci_create_root_bus_in_domain(). When we convert an arch > from using scan_bus interfaces to using > pci_create_root_bus_in_domain(), we will have to move the rest of the > scan_bus code (pci_scan_child_bus(), pci_bus_add_devices()) back into > the arch code. > > One alternative is to add an _in_domain() variant of each of these > interfaces, but that doesn't seem very convenient either. My idea of > passing in a structure would also require adding variants, so there's > not really an advantage there, but I am thinking of the next > unification effort, e.g., for NUMA node info. I don't really want to > have to change all the _in_domain() interfaces to also take yet > another parameter for the node number. Resurecting this thread as I'm about to send an updated patch: TL;DR: Bjorn is concerned that my introduction of an _in_domain() version of pci_create_root_bus() as a way to pass a domain number from the arch code down (or up?) into the generic PCI code is incomplete, as other APIs that he listed make use of the non-domain aware version of pci_create_root_bus() and as he plans to remove the use of the function and use higher level APIs like pci_scan_root_bus() we will have to introduce an _in_domain() version for those higher level functions. After a bit of thinking I think the change I'm proposing is fine exactly because it is a low level API. My intention is to automate the management of the PCI domain numbers and any architecture that wants to go against that should probably use the lower abstraction API to better control the flow. So, in my updated v8 version of the patch I'm going to keep the suggestion *as is* and hope we can have a(nother) discussion and come up with a conclusion. Best regards, Liviu > > Bjorn > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >
On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote: > On Mon, Apr 7, 2014 at 4:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: > > On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote: > >> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote: > >> > > >> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need > >> > to have arch specific way of providing the number, specially after looking > >> > at the existing implementations that return a value from a variable that > >> > is never touched or incremented. My guess is that pci_domain_nr() was > >> > created to work around the fact that there was no domain_nr maintainance in > >> > the generic code. > >> > >> Well, there was no generic host bridge structure. There is one now, it should > >> go there. > > > > Exactly! Hence my patch. After it gets accepted I will go through architectures > > and remove their version of pci_domain_nr(). > > Currently the arch has to supply pci_domain_nr() because that's the > only way for the generic code to learn the domain. After you add > pci_create_root_bus_in_domain(), the arch can supply the domain that > way, and we won't need the arch-specific pci_domain_nr(). Right? > That makes more sense to me; thanks for the explanation. > > Let me try to explain my concern about the > pci_create_root_bus_in_domain() interface. We currently have these > interfaces: > > pci_scan_root_bus() > pci_scan_bus() > pci_scan_bus_parented() > pci_create_root_bus() > > pci_scan_root_bus() is a higher-level interface than > pci_create_root_bus(), so I'm trying to migrate toward it because it > lets us remove a little code from the arch, e.g., pci_scan_child_bus() > and pci_bus_add_devices(). > > I think we can only remove the arch-specific pci_domain_nr() if that > arch uses pci_create_root_bus_in_domain(). When we convert an arch > from using scan_bus interfaces to using > pci_create_root_bus_in_domain(), we will have to move the rest of the > scan_bus code (pci_scan_child_bus(), pci_bus_add_devices()) back into > the arch code. > > One alternative is to add an _in_domain() variant of each of these > interfaces, but that doesn't seem very convenient either. My idea of > passing in a structure would also require adding variants, so there's > not really an advantage there, but I am thinking of the next > unification effort, e.g., for NUMA node info. I don't really want to > have to change all the _in_domain() interfaces to also take yet > another parameter for the node number. Bjorn, I'm coming around to your way of thinking and I want to suggest a strategy for adding the domain number into the PCI framework. My understanding is that when pci_host_bridge structure was introduced you were trying to keep the APIs unchanged and hence the creation of a bridge was hidden inside the pci_create_root_bus() function. If we want to store the domain_nr information in the host bridge structure, together with a pointer to sysdata, then we need to break up the creation of the pci_host_bridge from the creation of a root bus. At that moment, pci_scan_root_bus() will need to be changed to accept a pci_host_bridge pointer, while pci_scan_bus() and pci_scan_bus_parented() will create the host bridge in the body of their function. Did I understood correctly this time your intentions? Do you agree with this plan? Best regards, Liviu > > Bjorn > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >
On Fri, Jul 4, 2014 at 8:57 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: > On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote: >> On Mon, Apr 7, 2014 at 4:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: >> > On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote: >> >> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote: >> >> > >> >> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need >> >> > to have arch specific way of providing the number, specially after looking >> >> > at the existing implementations that return a value from a variable that >> >> > is never touched or incremented. My guess is that pci_domain_nr() was >> >> > created to work around the fact that there was no domain_nr maintainance in >> >> > the generic code. >> >> >> >> Well, there was no generic host bridge structure. There is one now, it should >> >> go there. >> > >> > Exactly! Hence my patch. After it gets accepted I will go through architectures >> > and remove their version of pci_domain_nr(). >> >> Currently the arch has to supply pci_domain_nr() because that's the >> only way for the generic code to learn the domain. After you add >> pci_create_root_bus_in_domain(), the arch can supply the domain that >> way, and we won't need the arch-specific pci_domain_nr(). Right? >> That makes more sense to me; thanks for the explanation. >> >> Let me try to explain my concern about the >> pci_create_root_bus_in_domain() interface. We currently have these >> interfaces: >> >> pci_scan_root_bus() >> pci_scan_bus() >> pci_scan_bus_parented() >> pci_create_root_bus() >> >> pci_scan_root_bus() is a higher-level interface than >> pci_create_root_bus(), so I'm trying to migrate toward it because it >> lets us remove a little code from the arch, e.g., pci_scan_child_bus() >> and pci_bus_add_devices(). >> >> I think we can only remove the arch-specific pci_domain_nr() if that >> arch uses pci_create_root_bus_in_domain(). When we convert an arch >> from using scan_bus interfaces to using >> pci_create_root_bus_in_domain(), we will have to move the rest of the >> scan_bus code (pci_scan_child_bus(), pci_bus_add_devices()) back into >> the arch code. >> >> One alternative is to add an _in_domain() variant of each of these >> interfaces, but that doesn't seem very convenient either. My idea of >> passing in a structure would also require adding variants, so there's >> not really an advantage there, but I am thinking of the next >> unification effort, e.g., for NUMA node info. I don't really want to >> have to change all the _in_domain() interfaces to also take yet >> another parameter for the node number. > > ... > My understanding is that when pci_host_bridge structure was introduced > you were trying to keep the APIs unchanged and hence the creation of a > bridge was hidden inside the pci_create_root_bus() function. You mean pci_alloc_host_bridge()? Right; ideally I would have used pci_scan_root_bus() everywhere and gotten rid of pci_create_root_bus(). The outline of pci_scan_root_bus() is: pci_create_root_bus() pci_scan_child_bus() pci_bus_add_devices() The problem was that several arches do interesting things scattered among that core. The ACPI host bridge driver used on x86 and ia64 does resource allocation before pci_bus_add_devices(), as does parisc. Probably all arches should do this, but they don't. And powerpc and sparc use of_scan_bus() or something similar instead of pci_scan_child_bus(). They probably *could* provide config space accessors that talk to OF and would allow pci_scan_child_bus() to work. But that seemed like too much work at the time. > If we want to store the domain_nr information in the host bridge structure, > together with a pointer to sysdata, then we need to break up the creation > of the pci_host_bridge from the creation of a root bus. At that moment, > pci_scan_root_bus() will need to be changed to accept a pci_host_bridge > pointer, while pci_scan_bus() and pci_scan_bus_parented() will create > the host bridge in the body of their function. It's hard to change an existing interface like pci_scan_root_bus() because it's called from so many places and you have to change them all at once. Then if something goes wrong, the revert makes a mess for everybody. But I think it makes sense to add a new interface that does what you want. Bjorn
On Tue, Jul 08, 2014 at 02:11:36AM +0100, Bjorn Helgaas wrote: > On Fri, Jul 4, 2014 at 8:57 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: > > On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote: > >> On Mon, Apr 7, 2014 at 4:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote: > >> > On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote: > >> >> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote: > >> >> > > >> >> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need > >> >> > to have arch specific way of providing the number, specially after looking > >> >> > at the existing implementations that return a value from a variable that > >> >> > is never touched or incremented. My guess is that pci_domain_nr() was > >> >> > created to work around the fact that there was no domain_nr maintainance in > >> >> > the generic code. > >> >> > >> >> Well, there was no generic host bridge structure. There is one now, it should > >> >> go there. > >> > > >> > Exactly! Hence my patch. After it gets accepted I will go through architectures > >> > and remove their version of pci_domain_nr(). > >> > >> Currently the arch has to supply pci_domain_nr() because that's the > >> only way for the generic code to learn the domain. After you add > >> pci_create_root_bus_in_domain(), the arch can supply the domain that > >> way, and we won't need the arch-specific pci_domain_nr(). Right? > >> That makes more sense to me; thanks for the explanation. > >> > >> Let me try to explain my concern about the > >> pci_create_root_bus_in_domain() interface. We currently have these > >> interfaces: > >> > >> pci_scan_root_bus() > >> pci_scan_bus() > >> pci_scan_bus_parented() > >> pci_create_root_bus() > >> > >> pci_scan_root_bus() is a higher-level interface than > >> pci_create_root_bus(), so I'm trying to migrate toward it because it > >> lets us remove a little code from the arch, e.g., pci_scan_child_bus() > >> and pci_bus_add_devices(). > >> > >> I think we can only remove the arch-specific pci_domain_nr() if that > >> arch uses pci_create_root_bus_in_domain(). When we convert an arch > >> from using scan_bus interfaces to using > >> pci_create_root_bus_in_domain(), we will have to move the rest of the > >> scan_bus code (pci_scan_child_bus(), pci_bus_add_devices()) back into > >> the arch code. > >> > >> One alternative is to add an _in_domain() variant of each of these > >> interfaces, but that doesn't seem very convenient either. My idea of > >> passing in a structure would also require adding variants, so there's > >> not really an advantage there, but I am thinking of the next > >> unification effort, e.g., for NUMA node info. I don't really want to > >> have to change all the _in_domain() interfaces to also take yet > >> another parameter for the node number. > > > > ... > > My understanding is that when pci_host_bridge structure was introduced > > you were trying to keep the APIs unchanged and hence the creation of a > > bridge was hidden inside the pci_create_root_bus() function. > > You mean pci_alloc_host_bridge()? Right; ideally I would have used > pci_scan_root_bus() everywhere and gotten rid of pci_create_root_bus(). > The outline of pci_scan_root_bus() is: > > pci_create_root_bus() > pci_scan_child_bus() > pci_bus_add_devices() > > The problem was that several arches do interesting things scattered among > that core. The ACPI host bridge driver used on x86 and ia64 does resource > allocation before pci_bus_add_devices(), as does parisc. Probably all > arches should do this, but they don't. > > And powerpc and sparc use of_scan_bus() or something similar instead of > pci_scan_child_bus(). They probably *could* provide config space accessors > that talk to OF and would allow pci_scan_child_bus() to work. But that > seemed like too much work at the time. > > > If we want to store the domain_nr information in the host bridge structure, > > together with a pointer to sysdata, then we need to break up the creation > > of the pci_host_bridge from the creation of a root bus. At that moment, > > pci_scan_root_bus() will need to be changed to accept a pci_host_bridge > > pointer, while pci_scan_bus() and pci_scan_bus_parented() will create > > the host bridge in the body of their function. > > It's hard to change an existing interface like pci_scan_root_bus() because > it's called from so many places and you have to change them all at once. > Then if something goes wrong, the revert makes a mess for everybody. But > I think it makes sense to add a new interface that does what you want. OK, I understand your concern. It does sort of return us back to the initial discussion, where you were arguing against adding a new set of functions for every existing function, but it makes sense from transition point of view. Best regards, Liviu > > Bjorn >
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index fd11c12..172c615 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1714,8 +1714,9 @@ void __weak pcibios_remove_bus(struct pci_bus *bus) { } -struct pci_bus *pci_create_root_bus(struct device *parent, int bus, - struct pci_ops *ops, void *sysdata, struct list_head *resources) +struct pci_bus *pci_create_root_bus_in_domain(struct device *parent, + int domain, int bus, struct pci_ops *ops, void *sysdata, + struct list_head *resources) { int error; struct pci_host_bridge *bridge; @@ -1728,30 +1729,34 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, bridge = pci_alloc_host_bridge(); if (!bridge) - return NULL; + return ERR_PTR(-ENOMEM); bridge->dev.parent = parent; bridge->dev.release = pci_release_host_bridge_dev; + bridge->domain_nr = domain; error = pcibios_root_bridge_prepare(bridge); if (error) goto err_out; b = pci_alloc_bus(); - if (!b) + if (!b) { + error = -ENOMEM; goto err_out; + } b->sysdata = sysdata; b->ops = ops; b->number = b->busn_res.start = bus; - b2 = pci_find_bus(pci_domain_nr(b), bus); + b2 = pci_find_bus(bridge->domain_nr, bus); if (b2) { /* If we already got to this bus through a different bridge, ignore it */ dev_dbg(&b2->dev, "bus already known\n"); + error = -EEXIST; goto err_bus_out; } bridge->bus = b; - dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(b), bus); + dev_set_name(&bridge->dev, "pci%04x:%02x", bridge->domain_nr, bus); error = device_register(&bridge->dev); if (error) { put_device(&bridge->dev); @@ -1766,7 +1771,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, b->dev.class = &pcibus_class; b->dev.parent = b->bridge; - dev_set_name(&b->dev, "%04x:%02x", pci_domain_nr(b), bus); + dev_set_name(&b->dev, "%04x:%02x", bridge->domain_nr, bus); error = device_register(&b->dev); if (error) goto class_dev_reg_err; @@ -1816,7 +1821,27 @@ err_bus_out: kfree(b); err_out: kfree(bridge); - return NULL; + return ERR_PTR(error); +} + +struct pci_bus *pci_create_root_bus(struct device *parent, int bus, + struct pci_ops *ops, void *sysdata, struct list_head *resources) +{ + int domain_nr; + struct pci_bus *b = pci_alloc_bus(); + if (!b) + return NULL; + + b->sysdata = sysdata; + domain_nr = pci_domain_nr(b); + kfree(b); + + b = pci_create_root_bus_in_domain(parent, domain_nr, bus, + ops, sysdata, resources); + if (IS_ERR(b)) + return NULL; + + return b; } int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int bus_max) diff --git a/include/linux/pci.h b/include/linux/pci.h index 33aa2ca..1eed009 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -394,6 +394,7 @@ struct pci_host_bridge_window { struct pci_host_bridge { struct device dev; struct pci_bus *bus; /* root bus */ + int domain_nr; struct list_head windows; /* pci_host_bridge_windows */ void (*release_fn)(struct pci_host_bridge *); void *release_data; @@ -747,6 +748,9 @@ struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata); struct pci_bus *pci_create_root_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata, struct list_head *resources); +struct pci_bus *pci_create_root_bus_in_domain(struct device *parent, + int domain, int bus, struct pci_ops *ops, + void *sysdata, struct list_head *resources); int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax); int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax); void pci_bus_release_busn_res(struct pci_bus *b);