diff mbox

[v3,10/10] ARM: tegra: pcie: Add device tree support

Message ID 1343332512-28762-11-git-send-email-thierry.reding@avionic-design.de (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Thierry Reding July 26, 2012, 7:55 p.m. UTC
This commit adds support for instantiating the Tegra PCIe controller
from a device tree.

Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
---
Changes in v3:
- rewrite the DT binding and adapt driver correspondingly

Changes in v2:
- increase compile coverage by using the IS_ENABLED() macro
- disable node by default

 .../bindings/pci/nvidia,tegra20-pcie.txt           |  94 ++++++++++
 arch/arm/boot/dts/tegra20.dtsi                     |  62 +++++++
 arch/arm/mach-tegra/board-dt-tegra20.c             |   7 +-
 arch/arm/mach-tegra/pcie.c                         | 195 +++++++++++++++++++++
 4 files changed, 353 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt

Comments

Thierry Reding Aug. 14, 2012, 8:12 p.m. UTC | #1
On Thu, Jul 26, 2012 at 09:55:12PM +0200, Thierry Reding wrote:
> diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
> index a094c97..c886dff 100644
> --- a/arch/arm/boot/dts/tegra20.dtsi
> +++ b/arch/arm/boot/dts/tegra20.dtsi
> @@ -199,6 +199,68 @@
>  		#size-cells = <0>;
>  	};
>  
> +	pcie-controller {
> +		compatible = "nvidia,tegra20-pcie";
> +		reg = <0x80003000 0x00000800   /* PADS registers */
> +		       0x80003800 0x00000200   /* AFI registers */
> +		       0x81000000 0x01000000   /* configuration space */
> +		       0x90000000 0x10000000>; /* extended configuration space */
> +		interrupts = <0 98 0x04   /* controller interrupt */
> +		              0 99 0x04>; /* MSI interrupt */
> +		status = "disabled";
> +
> +		ranges = <0 0 0  0x80000000 0x00001000   /* root port 0 */
> +			  0 1 0  0x81000000 0x00800000   /* port 0 config space */
> +			  0 2 0  0x90000000 0x08000000   /* port 0 ext config space */
> +			  0 3 0  0x82000000 0x00010000   /* port 0 downstream I/O */
> +			  0 4 0  0xa0000000 0x08000000   /* port 0 non-prefetchable memory */
> +			  0 5 0  0xb0000000 0x08000000   /* port 0 prefetchable memory */
> +
> +			  1 0 0  0x80001000 0x00001000   /* root port 1 */
> +			  1 1 0  0x81800000 0x00800000   /* port 1 config space */
> +			  1 2 0  0x98000000 0x08000000   /* port 1 ext config space */
> +			  1 3 0  0x82010000 0x00010000   /* port 1 downstream I/O */
> +			  1 4 0  0xa8000000 0x08000000   /* port 1 non-prefetchable memory */
> +			  1 5 0  0xb8000000 0x08000000>; /* port 1 prefetchable memory */

I've been thinking about this some more. The translations for both the
regular and extended configuration spaces are configured in the top-
level PCIe controller. It is therefore wrong how they are passed to the
PCI host bridges via the ranges property.

I remember Mitch saying that it should be passed down to the children
because it is partitioned among them, but since the layout is compatible
with ECAM, the partitioning isn't as simple as what's in the tree. In
fact the partitions will be dependent on the number of devices attached
to the host bridges.

> +
> +		#address-cells = <3>;
> +		#size-cells = <1>;
> +
> +		pci@0 {
> +			device_type = "pciex";
> +			reg = <0 0 0 0x1000>;
> +			status = "disabled";
> +
> +			#address-cells = <3>;
> +			#size-cells = <2>;
> +
> +			ranges = <0x80000000 0 0  0 1 0  0 0x00800000   /* config space */
> +				  0x90000000 0 0  0 2 0  0 0x08000000   /* ext config space */
> +				  0x81000000 0 0  0 3 0  0 0x00010000   /* I/O */
> +				  0x82000000 0 0  0 4 0  0 0x08000000   /* non-prefetchable memory */
> +				  0xc2000000 0 0  0 5 0  0 0x08000000>; /* prefetchable memory */
> +
> +			nvidia,num-lanes = <2>;
> +		};
> +
> +		pci@1 {
> +			device_type = "pciex";
> +			reg = <1 0 0 0x1000>;
> +			status = "disabled";
> +
> +			#address-cells = <3>;
> +			#size-cells = <2>;
> +
> +			ranges = <0x80000000 0 0  1 1 0  0 0x00800000   /* config space */
> +				  0x90000000 0 0  1 2 0  0 0x08000000   /* ext config space */
> +				  0x81000000 0 0  1 3 0  0 0x00010000   /* I/O */
> +				  0x82000000 0 0  1 4 0  0 0x08000000   /* non-prefetchable memory */
> +				  0xc2000000 0 0  1 5 0  0 0x08000000>; /* prefetchable memory */
> +
> +			nvidia,num-lanes = <2>;
> +		};
> +	};

The same is true for the ranges properties of the PCI host bridge nodes.
Which part of the configuration spaces maps to the children of the
respective host bridge depends on the actual device hierarchy.

Would it be possible to alternatively pass the complete range to the
children without further partitioning?

The driver doesn't actually care about the ranges property and only uses
the values specified in the reg property of the pcie-controller node.

Thierry
Bjorn Helgaas Aug. 14, 2012, 11:50 p.m. UTC | #2
On Tue, Aug 14, 2012 at 1:12 PM, Thierry Reding
<thierry.reding@avionic-design.de> wrote:
> On Thu, Jul 26, 2012 at 09:55:12PM +0200, Thierry Reding wrote:
>> diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
>> index a094c97..c886dff 100644
>> --- a/arch/arm/boot/dts/tegra20.dtsi
>> +++ b/arch/arm/boot/dts/tegra20.dtsi
>> @@ -199,6 +199,68 @@
>>               #size-cells = <0>;
>>       };
>>
>> +     pcie-controller {
>> +             compatible = "nvidia,tegra20-pcie";
>> +             reg = <0x80003000 0x00000800   /* PADS registers */
>> +                    0x80003800 0x00000200   /* AFI registers */
>> +                    0x81000000 0x01000000   /* configuration space */
>> +                    0x90000000 0x10000000>; /* extended configuration space */
>> +             interrupts = <0 98 0x04   /* controller interrupt */
>> +                           0 99 0x04>; /* MSI interrupt */
>> +             status = "disabled";
>> +
>> +             ranges = <0 0 0  0x80000000 0x00001000   /* root port 0 */
>> +                       0 1 0  0x81000000 0x00800000   /* port 0 config space */
>> +                       0 2 0  0x90000000 0x08000000   /* port 0 ext config space */
>> +                       0 3 0  0x82000000 0x00010000   /* port 0 downstream I/O */
>> +                       0 4 0  0xa0000000 0x08000000   /* port 0 non-prefetchable memory */
>> +                       0 5 0  0xb0000000 0x08000000   /* port 0 prefetchable memory */
>> +
>> +                       1 0 0  0x80001000 0x00001000   /* root port 1 */
>> +                       1 1 0  0x81800000 0x00800000   /* port 1 config space */
>> +                       1 2 0  0x98000000 0x08000000   /* port 1 ext config space */
>> +                       1 3 0  0x82010000 0x00010000   /* port 1 downstream I/O */
>> +                       1 4 0  0xa8000000 0x08000000   /* port 1 non-prefetchable memory */
>> +                       1 5 0  0xb8000000 0x08000000>; /* port 1 prefetchable memory */
>
> I've been thinking about this some more. The translations for both the
> regular and extended configuration spaces are configured in the top-
> level PCIe controller. It is therefore wrong how they are passed to the
> PCI host bridges via the ranges property.
>
> I remember Mitch saying that it should be passed down to the children
> because it is partitioned among them, but since the layout is compatible
> with ECAM, the partitioning isn't as simple as what's in the tree. In
> fact the partitions will be dependent on the number of devices attached
> to the host bridges.

I don't understand this last bit about the number of devices attached
to the host bridges.  Logically, the host bridge has a bus number
aperture that you can know up front, even before you know anything
about what devices are below it.  On x86, for example, the ACPI _CRS
method has something like "[bus 00-7f]" in it, which means that any
buses in that range are below this bridge.  That doesn't tell us
anything about which buses actually have devices on them, of course;
it's just analogous to the secondary and subordinate bus number
registers in a P2P bridge.

>> +
>> +             #address-cells = <3>;
>> +             #size-cells = <1>;
>> +
>> +             pci@0 {
>> +                     device_type = "pciex";
>> +                     reg = <0 0 0 0x1000>;
>> +                     status = "disabled";
>> +
>> +                     #address-cells = <3>;
>> +                     #size-cells = <2>;
>> +
>> +                     ranges = <0x80000000 0 0  0 1 0  0 0x00800000   /* config space */
>> +                               0x90000000 0 0  0 2 0  0 0x08000000   /* ext config space */
>> +                               0x81000000 0 0  0 3 0  0 0x00010000   /* I/O */
>> +                               0x82000000 0 0  0 4 0  0 0x08000000   /* non-prefetchable memory */
>> +                               0xc2000000 0 0  0 5 0  0 0x08000000>; /* prefetchable memory */
>> +
>> +                     nvidia,num-lanes = <2>;
>> +             };
>> +
>> +             pci@1 {
>> +                     device_type = "pciex";
>> +                     reg = <1 0 0 0x1000>;
>> +                     status = "disabled";
>> +
>> +                     #address-cells = <3>;
>> +                     #size-cells = <2>;
>> +
>> +                     ranges = <0x80000000 0 0  1 1 0  0 0x00800000   /* config space */
>> +                               0x90000000 0 0  1 2 0  0 0x08000000   /* ext config space */
>> +                               0x81000000 0 0  1 3 0  0 0x00010000   /* I/O */
>> +                               0x82000000 0 0  1 4 0  0 0x08000000   /* non-prefetchable memory */
>> +                               0xc2000000 0 0  1 5 0  0 0x08000000>; /* prefetchable memory */
>> +
>> +                     nvidia,num-lanes = <2>;
>> +             };
>> +     };
>
> The same is true for the ranges properties of the PCI host bridge nodes.
> Which part of the configuration spaces maps to the children of the
> respective host bridge depends on the actual device hierarchy.
>
> Would it be possible to alternatively pass the complete range to the
> children without further partitioning?
>
> The driver doesn't actually care about the ranges property and only uses
> the values specified in the reg property of the pcie-controller node.
>
> Thierry
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thierry Reding Aug. 15, 2012, 6:37 a.m. UTC | #3
On Tue, Aug 14, 2012 at 04:50:26PM -0700, Bjorn Helgaas wrote:
> On Tue, Aug 14, 2012 at 1:12 PM, Thierry Reding
> <thierry.reding@avionic-design.de> wrote:
> > On Thu, Jul 26, 2012 at 09:55:12PM +0200, Thierry Reding wrote:
> >> diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
> >> index a094c97..c886dff 100644
> >> --- a/arch/arm/boot/dts/tegra20.dtsi
> >> +++ b/arch/arm/boot/dts/tegra20.dtsi
> >> @@ -199,6 +199,68 @@
> >>               #size-cells = <0>;
> >>       };
> >>
> >> +     pcie-controller {
> >> +             compatible = "nvidia,tegra20-pcie";
> >> +             reg = <0x80003000 0x00000800   /* PADS registers */
> >> +                    0x80003800 0x00000200   /* AFI registers */
> >> +                    0x81000000 0x01000000   /* configuration space */
> >> +                    0x90000000 0x10000000>; /* extended configuration space */
> >> +             interrupts = <0 98 0x04   /* controller interrupt */
> >> +                           0 99 0x04>; /* MSI interrupt */
> >> +             status = "disabled";
> >> +
> >> +             ranges = <0 0 0  0x80000000 0x00001000   /* root port 0 */
> >> +                       0 1 0  0x81000000 0x00800000   /* port 0 config space */
> >> +                       0 2 0  0x90000000 0x08000000   /* port 0 ext config space */
> >> +                       0 3 0  0x82000000 0x00010000   /* port 0 downstream I/O */
> >> +                       0 4 0  0xa0000000 0x08000000   /* port 0 non-prefetchable memory */
> >> +                       0 5 0  0xb0000000 0x08000000   /* port 0 prefetchable memory */
> >> +
> >> +                       1 0 0  0x80001000 0x00001000   /* root port 1 */
> >> +                       1 1 0  0x81800000 0x00800000   /* port 1 config space */
> >> +                       1 2 0  0x98000000 0x08000000   /* port 1 ext config space */
> >> +                       1 3 0  0x82010000 0x00010000   /* port 1 downstream I/O */
> >> +                       1 4 0  0xa8000000 0x08000000   /* port 1 non-prefetchable memory */
> >> +                       1 5 0  0xb8000000 0x08000000>; /* port 1 prefetchable memory */
> >
> > I've been thinking about this some more. The translations for both the
> > regular and extended configuration spaces are configured in the top-
> > level PCIe controller. It is therefore wrong how they are passed to the
> > PCI host bridges via the ranges property.
> >
> > I remember Mitch saying that it should be passed down to the children
> > because it is partitioned among them, but since the layout is compatible
> > with ECAM, the partitioning isn't as simple as what's in the tree. In
> > fact the partitions will be dependent on the number of devices attached
> > to the host bridges.
> 
> I don't understand this last bit about the number of devices attached
> to the host bridges.  Logically, the host bridge has a bus number
> aperture that you can know up front, even before you know anything
> about what devices are below it.  On x86, for example, the ACPI _CRS
> method has something like "[bus 00-7f]" in it, which means that any
> buses in that range are below this bridge.  That doesn't tell us
> anything about which buses actually have devices on them, of course;
> it's just analogous to the secondary and subordinate bus number
> registers in a P2P bridge.

That's one of the issues I still need to take care of. Currently no bus
resource is attached to the individual bridges (nor the PCI controller
for that matter), so the PCI core will assign them dynamically. If this
range is known at boot time we could assign ECAM ranges based on the bus
numbers. Standard ECAM ranges, that is. On Tegra this won't work because
as Stephen mentioned in a previous mail, the bus field is not the top
field in the ECAM addresses. Basically what you have is this:

	[27:24] upper 4 bits of the register address for extended
	        configuration space
	[23:16] bus number
	[15:11] device number
	[10: 8] device function
	[ 8: 0] register

So the ECAM space cannot be partitioned by bus number.

Thierry
Bjorn Helgaas Aug. 15, 2012, 12:18 p.m. UTC | #4
On Tue, Aug 14, 2012 at 11:37 PM, Thierry Reding
<thierry.reding@avionic-design.de> wrote:
> On Tue, Aug 14, 2012 at 04:50:26PM -0700, Bjorn Helgaas wrote:
>> On Tue, Aug 14, 2012 at 1:12 PM, Thierry Reding
>> <thierry.reding@avionic-design.de> wrote:
>> > On Thu, Jul 26, 2012 at 09:55:12PM +0200, Thierry Reding wrote:
>> >> diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
>> >> index a094c97..c886dff 100644
>> >> --- a/arch/arm/boot/dts/tegra20.dtsi
>> >> +++ b/arch/arm/boot/dts/tegra20.dtsi
>> >> @@ -199,6 +199,68 @@
>> >>               #size-cells = <0>;
>> >>       };
>> >>
>> >> +     pcie-controller {
>> >> +             compatible = "nvidia,tegra20-pcie";
>> >> +             reg = <0x80003000 0x00000800   /* PADS registers */
>> >> +                    0x80003800 0x00000200   /* AFI registers */
>> >> +                    0x81000000 0x01000000   /* configuration space */
>> >> +                    0x90000000 0x10000000>; /* extended configuration space */
>> >> +             interrupts = <0 98 0x04   /* controller interrupt */
>> >> +                           0 99 0x04>; /* MSI interrupt */
>> >> +             status = "disabled";
>> >> +
>> >> +             ranges = <0 0 0  0x80000000 0x00001000   /* root port 0 */
>> >> +                       0 1 0  0x81000000 0x00800000   /* port 0 config space */
>> >> +                       0 2 0  0x90000000 0x08000000   /* port 0 ext config space */
>> >> +                       0 3 0  0x82000000 0x00010000   /* port 0 downstream I/O */
>> >> +                       0 4 0  0xa0000000 0x08000000   /* port 0 non-prefetchable memory */
>> >> +                       0 5 0  0xb0000000 0x08000000   /* port 0 prefetchable memory */
>> >> +
>> >> +                       1 0 0  0x80001000 0x00001000   /* root port 1 */
>> >> +                       1 1 0  0x81800000 0x00800000   /* port 1 config space */
>> >> +                       1 2 0  0x98000000 0x08000000   /* port 1 ext config space */
>> >> +                       1 3 0  0x82010000 0x00010000   /* port 1 downstream I/O */
>> >> +                       1 4 0  0xa8000000 0x08000000   /* port 1 non-prefetchable memory */
>> >> +                       1 5 0  0xb8000000 0x08000000>; /* port 1 prefetchable memory */
>> >
>> > I've been thinking about this some more. The translations for both the
>> > regular and extended configuration spaces are configured in the top-
>> > level PCIe controller. It is therefore wrong how they are passed to the
>> > PCI host bridges via the ranges property.
>> >
>> > I remember Mitch saying that it should be passed down to the children
>> > because it is partitioned among them, but since the layout is compatible
>> > with ECAM, the partitioning isn't as simple as what's in the tree. In
>> > fact the partitions will be dependent on the number of devices attached
>> > to the host bridges.
>>
>> I don't understand this last bit about the number of devices attached
>> to the host bridges.  Logically, the host bridge has a bus number
>> aperture that you can know up front, even before you know anything
>> about what devices are below it.  On x86, for example, the ACPI _CRS
>> method has something like "[bus 00-7f]" in it, which means that any
>> buses in that range are below this bridge.  That doesn't tell us
>> anything about which buses actually have devices on them, of course;
>> it's just analogous to the secondary and subordinate bus number
>> registers in a P2P bridge.
>
> That's one of the issues I still need to take care of. Currently no bus
> resource is attached to the individual bridges (nor the PCI controller
> for that matter), so the PCI core will assign them dynamically.

So your PCI controller driver knows how to program the controller bus
number aperture?  Sometimes people start by assuming that two host
bridges both have [bus 00-ff] apertures, then they enumerate below the
first and adjust the bus number apertures based on what they found.
For example, if they found buses 00-12 behind the first bridge, they
make the apertures [bus 00-12] for the first bridge and [bus 13-ff]
for the second.  That might be the case, depending on what firmware
set up, but it seems like a dubious way to do it, and of course it
precludes a lot of hot-plug scenarios.

> If this
> range is known at boot time we could assign ECAM ranges based on the bus
> numbers. Standard ECAM ranges, that is. On Tegra this won't work because
> as Stephen mentioned in a previous mail, the bus field is not the top
> field in the ECAM addresses. Basically what you have is this:
>
>         [27:24] upper 4 bits of the register address for extended
>                 configuration space
>         [23:16] bus number
>         [15:11] device number
>         [10: 8] device function
>         [ 8: 0] register
>
> So the ECAM space cannot be partitioned by bus number.

Ah, OK, so definitely not standard PCIe ECAM.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thierry Reding Aug. 15, 2012, 12:30 p.m. UTC | #5
On Wed, Aug 15, 2012 at 05:18:04AM -0700, Bjorn Helgaas wrote:
> On Tue, Aug 14, 2012 at 11:37 PM, Thierry Reding
> <thierry.reding@avionic-design.de> wrote:
> > On Tue, Aug 14, 2012 at 04:50:26PM -0700, Bjorn Helgaas wrote:
> >> On Tue, Aug 14, 2012 at 1:12 PM, Thierry Reding
> >> <thierry.reding@avionic-design.de> wrote:
> >> > On Thu, Jul 26, 2012 at 09:55:12PM +0200, Thierry Reding wrote:
> >> >> diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
> >> >> index a094c97..c886dff 100644
> >> >> --- a/arch/arm/boot/dts/tegra20.dtsi
> >> >> +++ b/arch/arm/boot/dts/tegra20.dtsi
> >> >> @@ -199,6 +199,68 @@
> >> >>               #size-cells = <0>;
> >> >>       };
> >> >>
> >> >> +     pcie-controller {
> >> >> +             compatible = "nvidia,tegra20-pcie";
> >> >> +             reg = <0x80003000 0x00000800   /* PADS registers */
> >> >> +                    0x80003800 0x00000200   /* AFI registers */
> >> >> +                    0x81000000 0x01000000   /* configuration space */
> >> >> +                    0x90000000 0x10000000>; /* extended configuration space */
> >> >> +             interrupts = <0 98 0x04   /* controller interrupt */
> >> >> +                           0 99 0x04>; /* MSI interrupt */
> >> >> +             status = "disabled";
> >> >> +
> >> >> +             ranges = <0 0 0  0x80000000 0x00001000   /* root port 0 */
> >> >> +                       0 1 0  0x81000000 0x00800000   /* port 0 config space */
> >> >> +                       0 2 0  0x90000000 0x08000000   /* port 0 ext config space */
> >> >> +                       0 3 0  0x82000000 0x00010000   /* port 0 downstream I/O */
> >> >> +                       0 4 0  0xa0000000 0x08000000   /* port 0 non-prefetchable memory */
> >> >> +                       0 5 0  0xb0000000 0x08000000   /* port 0 prefetchable memory */
> >> >> +
> >> >> +                       1 0 0  0x80001000 0x00001000   /* root port 1 */
> >> >> +                       1 1 0  0x81800000 0x00800000   /* port 1 config space */
> >> >> +                       1 2 0  0x98000000 0x08000000   /* port 1 ext config space */
> >> >> +                       1 3 0  0x82010000 0x00010000   /* port 1 downstream I/O */
> >> >> +                       1 4 0  0xa8000000 0x08000000   /* port 1 non-prefetchable memory */
> >> >> +                       1 5 0  0xb8000000 0x08000000>; /* port 1 prefetchable memory */
> >> >
> >> > I've been thinking about this some more. The translations for both the
> >> > regular and extended configuration spaces are configured in the top-
> >> > level PCIe controller. It is therefore wrong how they are passed to the
> >> > PCI host bridges via the ranges property.
> >> >
> >> > I remember Mitch saying that it should be passed down to the children
> >> > because it is partitioned among them, but since the layout is compatible
> >> > with ECAM, the partitioning isn't as simple as what's in the tree. In
> >> > fact the partitions will be dependent on the number of devices attached
> >> > to the host bridges.
> >>
> >> I don't understand this last bit about the number of devices attached
> >> to the host bridges.  Logically, the host bridge has a bus number
> >> aperture that you can know up front, even before you know anything
> >> about what devices are below it.  On x86, for example, the ACPI _CRS
> >> method has something like "[bus 00-7f]" in it, which means that any
> >> buses in that range are below this bridge.  That doesn't tell us
> >> anything about which buses actually have devices on them, of course;
> >> it's just analogous to the secondary and subordinate bus number
> >> registers in a P2P bridge.
> >
> > That's one of the issues I still need to take care of. Currently no bus
> > resource is attached to the individual bridges (nor the PCI controller
> > for that matter), so the PCI core will assign them dynamically.
> 
> So your PCI controller driver knows how to program the controller bus
> number aperture?  Sometimes people start by assuming that two host
> bridges both have [bus 00-ff] apertures, then they enumerate below the
> first and adjust the bus number apertures based on what they found.
> For example, if they found buses 00-12 behind the first bridge, they
> make the apertures [bus 00-12] for the first bridge and [bus 13-ff]
> for the second.  That might be the case, depending on what firmware
> set up, but it seems like a dubious way to do it, and of course it
> precludes a lot of hot-plug scenarios.

No, that's not what I meant. What happens is that no pre-assigned bus
range is specified for either of the host bridges, so that the range
0x00-0xff will be assigned by default in pci_scan_root_bus(). If I
understand correctly, what needs to be done is partition the bus range
between the two bridges (equally?). That would allow hot-plug scenarios
and be more in line with how other architectures do things.

I don't know if the Tegra PCIe controller supports hot-plug, though, so
maybe that wouldn't even be an issue and dynamic assignment would be
okay.

Thierry
Bjorn Helgaas Aug. 15, 2012, 2:36 p.m. UTC | #6
On Wed, Aug 15, 2012 at 5:30 AM, Thierry Reding
<thierry.reding@avionic-design.de> wrote:
> On Wed, Aug 15, 2012 at 05:18:04AM -0700, Bjorn Helgaas wrote:
>> On Tue, Aug 14, 2012 at 11:37 PM, Thierry Reding
>> <thierry.reding@avionic-design.de> wrote:
>> > On Tue, Aug 14, 2012 at 04:50:26PM -0700, Bjorn Helgaas wrote:
>> >> On Tue, Aug 14, 2012 at 1:12 PM, Thierry Reding
>> >> <thierry.reding@avionic-design.de> wrote:
>> >> > On Thu, Jul 26, 2012 at 09:55:12PM +0200, Thierry Reding wrote:
>> >> >> diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
>> >> >> index a094c97..c886dff 100644
>> >> >> --- a/arch/arm/boot/dts/tegra20.dtsi
>> >> >> +++ b/arch/arm/boot/dts/tegra20.dtsi
>> >> >> @@ -199,6 +199,68 @@
>> >> >>               #size-cells = <0>;
>> >> >>       };
>> >> >>
>> >> >> +     pcie-controller {
>> >> >> +             compatible = "nvidia,tegra20-pcie";
>> >> >> +             reg = <0x80003000 0x00000800   /* PADS registers */
>> >> >> +                    0x80003800 0x00000200   /* AFI registers */
>> >> >> +                    0x81000000 0x01000000   /* configuration space */
>> >> >> +                    0x90000000 0x10000000>; /* extended configuration space */
>> >> >> +             interrupts = <0 98 0x04   /* controller interrupt */
>> >> >> +                           0 99 0x04>; /* MSI interrupt */
>> >> >> +             status = "disabled";
>> >> >> +
>> >> >> +             ranges = <0 0 0  0x80000000 0x00001000   /* root port 0 */
>> >> >> +                       0 1 0  0x81000000 0x00800000   /* port 0 config space */
>> >> >> +                       0 2 0  0x90000000 0x08000000   /* port 0 ext config space */
>> >> >> +                       0 3 0  0x82000000 0x00010000   /* port 0 downstream I/O */
>> >> >> +                       0 4 0  0xa0000000 0x08000000   /* port 0 non-prefetchable memory */
>> >> >> +                       0 5 0  0xb0000000 0x08000000   /* port 0 prefetchable memory */
>> >> >> +
>> >> >> +                       1 0 0  0x80001000 0x00001000   /* root port 1 */
>> >> >> +                       1 1 0  0x81800000 0x00800000   /* port 1 config space */
>> >> >> +                       1 2 0  0x98000000 0x08000000   /* port 1 ext config space */
>> >> >> +                       1 3 0  0x82010000 0x00010000   /* port 1 downstream I/O */
>> >> >> +                       1 4 0  0xa8000000 0x08000000   /* port 1 non-prefetchable memory */
>> >> >> +                       1 5 0  0xb8000000 0x08000000>; /* port 1 prefetchable memory */
>> >> >
>> >> > I've been thinking about this some more. The translations for both the
>> >> > regular and extended configuration spaces are configured in the top-
>> >> > level PCIe controller. It is therefore wrong how they are passed to the
>> >> > PCI host bridges via the ranges property.
>> >> >
>> >> > I remember Mitch saying that it should be passed down to the children
>> >> > because it is partitioned among them, but since the layout is compatible
>> >> > with ECAM, the partitioning isn't as simple as what's in the tree. In
>> >> > fact the partitions will be dependent on the number of devices attached
>> >> > to the host bridges.
>> >>
>> >> I don't understand this last bit about the number of devices attached
>> >> to the host bridges.  Logically, the host bridge has a bus number
>> >> aperture that you can know up front, even before you know anything
>> >> about what devices are below it.  On x86, for example, the ACPI _CRS
>> >> method has something like "[bus 00-7f]" in it, which means that any
>> >> buses in that range are below this bridge.  That doesn't tell us
>> >> anything about which buses actually have devices on them, of course;
>> >> it's just analogous to the secondary and subordinate bus number
>> >> registers in a P2P bridge.
>> >
>> > That's one of the issues I still need to take care of. Currently no bus
>> > resource is attached to the individual bridges (nor the PCI controller
>> > for that matter), so the PCI core will assign them dynamically.
>>
>> So your PCI controller driver knows how to program the controller bus
>> number aperture?  Sometimes people start by assuming that two host
>> bridges both have [bus 00-ff] apertures, then they enumerate below the
>> first and adjust the bus number apertures based on what they found.
>> For example, if they found buses 00-12 behind the first bridge, they
>> make the apertures [bus 00-12] for the first bridge and [bus 13-ff]
>> for the second.  That might be the case, depending on what firmware
>> set up, but it seems like a dubious way to do it, and of course it
>> precludes a lot of hot-plug scenarios.
>
> No, that's not what I meant. What happens is that no pre-assigned bus
> range is specified for either of the host bridges, so that the range
> 0x00-0xff will be assigned by default in pci_scan_root_bus().

My concern is about making the kernel's idea of the host bridge bus
number aperture match what the hardware is doing.  I'm pretty sure
that the default [bus 00-ff] range assigned by pci_scan_root_bus()
doesn't actually match the hardware in most cases, at least when we
have multiple host bridges in the same PCI domain.

For example, if you don't supply a bus number range,
pci_scan_root_bus() will assume [bus 00-ff] for both host bridges.
But if you could put an analyzer on each of the root buses and then
read bus 0 config space, will you see that config transaction on
*both* buses?  I doubt it.

You have to know at least the bus number of the root bus up front
before you can even start enumerating it.  The only way to learn that
is by reading registers in the host bridge or by some external
mechanism like ACPI or device tree.  That's the beginning of the bus
number aperture.  The end of the aperture is similar: we can't
reliably determine it by enumerating devices below the host bridge, so
we have to know it up front.  You can enumerate starting with the root
bus number and assigning new subordinate bus numbers as necessary, but
unless you know the host bridge aperture to begin with, you could
inadvertently assign a new bus number that actually belongs to a
different host bridge.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thierry Reding Aug. 15, 2012, 2:57 p.m. UTC | #7
On Wed, Aug 15, 2012 at 07:36:24AM -0700, Bjorn Helgaas wrote:
> On Wed, Aug 15, 2012 at 5:30 AM, Thierry Reding
> <thierry.reding@avionic-design.de> wrote:
> > On Wed, Aug 15, 2012 at 05:18:04AM -0700, Bjorn Helgaas wrote:
> >> On Tue, Aug 14, 2012 at 11:37 PM, Thierry Reding
> >> <thierry.reding@avionic-design.de> wrote:
> >> > On Tue, Aug 14, 2012 at 04:50:26PM -0700, Bjorn Helgaas wrote:
> >> >> On Tue, Aug 14, 2012 at 1:12 PM, Thierry Reding
> >> >> <thierry.reding@avionic-design.de> wrote:
> >> >> > On Thu, Jul 26, 2012 at 09:55:12PM +0200, Thierry Reding wrote:
> >> >> >> diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
> >> >> >> index a094c97..c886dff 100644
> >> >> >> --- a/arch/arm/boot/dts/tegra20.dtsi
> >> >> >> +++ b/arch/arm/boot/dts/tegra20.dtsi
> >> >> >> @@ -199,6 +199,68 @@
> >> >> >>               #size-cells = <0>;
> >> >> >>       };
> >> >> >>
> >> >> >> +     pcie-controller {
> >> >> >> +             compatible = "nvidia,tegra20-pcie";
> >> >> >> +             reg = <0x80003000 0x00000800   /* PADS registers */
> >> >> >> +                    0x80003800 0x00000200   /* AFI registers */
> >> >> >> +                    0x81000000 0x01000000   /* configuration space */
> >> >> >> +                    0x90000000 0x10000000>; /* extended configuration space */
> >> >> >> +             interrupts = <0 98 0x04   /* controller interrupt */
> >> >> >> +                           0 99 0x04>; /* MSI interrupt */
> >> >> >> +             status = "disabled";
> >> >> >> +
> >> >> >> +             ranges = <0 0 0  0x80000000 0x00001000   /* root port 0 */
> >> >> >> +                       0 1 0  0x81000000 0x00800000   /* port 0 config space */
> >> >> >> +                       0 2 0  0x90000000 0x08000000   /* port 0 ext config space */
> >> >> >> +                       0 3 0  0x82000000 0x00010000   /* port 0 downstream I/O */
> >> >> >> +                       0 4 0  0xa0000000 0x08000000   /* port 0 non-prefetchable memory */
> >> >> >> +                       0 5 0  0xb0000000 0x08000000   /* port 0 prefetchable memory */
> >> >> >> +
> >> >> >> +                       1 0 0  0x80001000 0x00001000   /* root port 1 */
> >> >> >> +                       1 1 0  0x81800000 0x00800000   /* port 1 config space */
> >> >> >> +                       1 2 0  0x98000000 0x08000000   /* port 1 ext config space */
> >> >> >> +                       1 3 0  0x82010000 0x00010000   /* port 1 downstream I/O */
> >> >> >> +                       1 4 0  0xa8000000 0x08000000   /* port 1 non-prefetchable memory */
> >> >> >> +                       1 5 0  0xb8000000 0x08000000>; /* port 1 prefetchable memory */
> >> >> >
> >> >> > I've been thinking about this some more. The translations for both the
> >> >> > regular and extended configuration spaces are configured in the top-
> >> >> > level PCIe controller. It is therefore wrong how they are passed to the
> >> >> > PCI host bridges via the ranges property.
> >> >> >
> >> >> > I remember Mitch saying that it should be passed down to the children
> >> >> > because it is partitioned among them, but since the layout is compatible
> >> >> > with ECAM, the partitioning isn't as simple as what's in the tree. In
> >> >> > fact the partitions will be dependent on the number of devices attached
> >> >> > to the host bridges.
> >> >>
> >> >> I don't understand this last bit about the number of devices attached
> >> >> to the host bridges.  Logically, the host bridge has a bus number
> >> >> aperture that you can know up front, even before you know anything
> >> >> about what devices are below it.  On x86, for example, the ACPI _CRS
> >> >> method has something like "[bus 00-7f]" in it, which means that any
> >> >> buses in that range are below this bridge.  That doesn't tell us
> >> >> anything about which buses actually have devices on them, of course;
> >> >> it's just analogous to the secondary and subordinate bus number
> >> >> registers in a P2P bridge.
> >> >
> >> > That's one of the issues I still need to take care of. Currently no bus
> >> > resource is attached to the individual bridges (nor the PCI controller
> >> > for that matter), so the PCI core will assign them dynamically.
> >>
> >> So your PCI controller driver knows how to program the controller bus
> >> number aperture?  Sometimes people start by assuming that two host
> >> bridges both have [bus 00-ff] apertures, then they enumerate below the
> >> first and adjust the bus number apertures based on what they found.
> >> For example, if they found buses 00-12 behind the first bridge, they
> >> make the apertures [bus 00-12] for the first bridge and [bus 13-ff]
> >> for the second.  That might be the case, depending on what firmware
> >> set up, but it seems like a dubious way to do it, and of course it
> >> precludes a lot of hot-plug scenarios.
> >
> > No, that's not what I meant. What happens is that no pre-assigned bus
> > range is specified for either of the host bridges, so that the range
> > 0x00-0xff will be assigned by default in pci_scan_root_bus().
> 
> My concern is about making the kernel's idea of the host bridge bus
> number aperture match what the hardware is doing.  I'm pretty sure
> that the default [bus 00-ff] range assigned by pci_scan_root_bus()
> doesn't actually match the hardware in most cases, at least when we
> have multiple host bridges in the same PCI domain.
> 
> For example, if you don't supply a bus number range,
> pci_scan_root_bus() will assume [bus 00-ff] for both host bridges.
> But if you could put an analyzer on each of the root buses and then
> read bus 0 config space, will you see that config transaction on
> *both* buses?  I doubt it.
> 
> You have to know at least the bus number of the root bus up front
> before you can even start enumerating it.  The only way to learn that
> is by reading registers in the host bridge or by some external
> mechanism like ACPI or device tree.  That's the beginning of the bus
> number aperture.  The end of the aperture is similar: we can't
> reliably determine it by enumerating devices below the host bridge, so
> we have to know it up front.  You can enumerate starting with the root
> bus number and assigning new subordinate bus numbers as necessary, but
> unless you know the host bridge aperture to begin with, you could
> inadvertently assign a new bus number that actually belongs to a
> different host bridge.

Yes, that was my understanding as well. So currently I haven't seen any
problems with this because I only use one of the two host bridges. But I
suppose I should add code to initialize the bus number aperture properly
either via platform device resources (for the non-DT case) and the
device tree otherwise.

Thierry
Arnd Bergmann Aug. 15, 2012, 8:25 p.m. UTC | #8
On Wednesday 15 August 2012, Thierry Reding wrote:
> Yes, that was my understanding as well. So currently I haven't seen any
> problems with this because I only use one of the two host bridges. But I
> suppose I should add code to initialize the bus number aperture properly
> either via platform device resources (for the non-DT case) and the
> device tree otherwise.

I think when we last discussed this, the assumption was that each
root port has its own config space range and its own pci domain,
so you don't have to worry about bus apertures because each root port
can then have all 255 bus numbers. Has that turned out to be incorrect
now?

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Aug. 15, 2012, 8:48 p.m. UTC | #9
On Wed, Aug 15, 2012 at 2:25 PM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Wednesday 15 August 2012, Thierry Reding wrote:
>> Yes, that was my understanding as well. So currently I haven't seen any
>> problems with this because I only use one of the two host bridges. But I
>> suppose I should add code to initialize the bus number aperture properly
>> either via platform device resources (for the non-DT case) and the
>> device tree otherwise.
>
> I think when we last discussed this, the assumption was that each
> root port has its own config space range and its own pci domain,
> so you don't have to worry about bus apertures because each root port
> can then have all 255 bus numbers. Has that turned out to be incorrect
> now?

If that's the case, there's no problem.  I just want to be explicit
about the host bridge bus number aperture because I'd like to make
pci_scan_root_bus() fail if no aperture is supplied or if the aperture
overlaps with something we've already seen.  I don't know if that
means you want to add a domain and [bus 00-ff] range in device tree,
or if you want to make some device tree rule about every host bridge
being in its own domain, or what.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thierry Reding Aug. 16, 2012, 4:55 a.m. UTC | #10
On Wed, Aug 15, 2012 at 08:25:25PM +0000, Arnd Bergmann wrote:
> On Wednesday 15 August 2012, Thierry Reding wrote:
> > Yes, that was my understanding as well. So currently I haven't seen any
> > problems with this because I only use one of the two host bridges. But I
> > suppose I should add code to initialize the bus number aperture properly
> > either via platform device resources (for the non-DT case) and the
> > device tree otherwise.
> 
> I think when we last discussed this, the assumption was that each
> root port has its own config space range and its own pci domain,
> so you don't have to worry about bus apertures because each root port
> can then have all 255 bus numbers. Has that turned out to be incorrect
> now?

At least for the config space this is incorrect. There's a single region
to access the configuration space for all devices below the PCIe
controller. So it is shared by both (Tegra20) or all three (Tegra30)
root ports.

I'm not sure about PCI domains. Do you have any good pointers as to
where I could read up on them? If they need special hardware support,
then I think Tegra doesn't support them either. At least I haven't come
across any mention of domains while going through the, admittedly some-
what sparse on PCIe, Tegra documentation.

Thierry
Arnd Bergmann Aug. 16, 2012, 7:03 a.m. UTC | #11
On Thursday 16 August 2012, Thierry Reding wrote:
> At least for the config space this is incorrect. There's a single region
> to access the configuration space for all devices below the PCIe
> controller. So it is shared by both (Tegra20) or all three (Tegra30)
> root ports.
> 
> I'm not sure about PCI domains. Do you have any good pointers as to
> where I could read up on them? If they need special hardware support,
> then I think Tegra doesn't support them either. At least I haven't come
> across any mention of domains while going through the, admittedly some-
> what sparse on PCIe, Tegra documentation.

I was referring to the same thing here: if you had a separate config
space for each root port, they would by definition be separate PCI
domain. So you don't have them.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thierry Reding Aug. 16, 2012, 7:47 a.m. UTC | #12
On Thu, Aug 16, 2012 at 07:03:50AM +0000, Arnd Bergmann wrote:
> On Thursday 16 August 2012, Thierry Reding wrote:
> > At least for the config space this is incorrect. There's a single region
> > to access the configuration space for all devices below the PCIe
> > controller. So it is shared by both (Tegra20) or all three (Tegra30)
> > root ports.
> > 
> > I'm not sure about PCI domains. Do you have any good pointers as to
> > where I could read up on them? If they need special hardware support,
> > then I think Tegra doesn't support them either. At least I haven't come
> > across any mention of domains while going through the, admittedly some-
> > what sparse on PCIe, Tegra documentation.
> 
> I was referring to the same thing here: if you had a separate config
> space for each root port, they would by definition be separate PCI
> domain. So you don't have them.

I see, that makes sense. Thanks for explaining.

Thierry
Thierry Reding Aug. 16, 2012, 12:15 p.m. UTC | #13
On Wed, Aug 15, 2012 at 05:18:04AM -0700, Bjorn Helgaas wrote:
> On Tue, Aug 14, 2012 at 11:37 PM, Thierry Reding
> <thierry.reding@avionic-design.de> wrote:
> > On Tue, Aug 14, 2012 at 04:50:26PM -0700, Bjorn Helgaas wrote:
> >> On Tue, Aug 14, 2012 at 1:12 PM, Thierry Reding
> >> <thierry.reding@avionic-design.de> wrote:
> >> > On Thu, Jul 26, 2012 at 09:55:12PM +0200, Thierry Reding wrote:
> >> >> diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
> >> >> index a094c97..c886dff 100644
> >> >> --- a/arch/arm/boot/dts/tegra20.dtsi
> >> >> +++ b/arch/arm/boot/dts/tegra20.dtsi
> >> >> @@ -199,6 +199,68 @@
> >> >>               #size-cells = <0>;
> >> >>       };
> >> >>
> >> >> +     pcie-controller {
> >> >> +             compatible = "nvidia,tegra20-pcie";
> >> >> +             reg = <0x80003000 0x00000800   /* PADS registers */
> >> >> +                    0x80003800 0x00000200   /* AFI registers */
> >> >> +                    0x81000000 0x01000000   /* configuration space */
> >> >> +                    0x90000000 0x10000000>; /* extended configuration space */
> >> >> +             interrupts = <0 98 0x04   /* controller interrupt */
> >> >> +                           0 99 0x04>; /* MSI interrupt */
> >> >> +             status = "disabled";
> >> >> +
> >> >> +             ranges = <0 0 0  0x80000000 0x00001000   /* root port 0 */
> >> >> +                       0 1 0  0x81000000 0x00800000   /* port 0 config space */
> >> >> +                       0 2 0  0x90000000 0x08000000   /* port 0 ext config space */
> >> >> +                       0 3 0  0x82000000 0x00010000   /* port 0 downstream I/O */
> >> >> +                       0 4 0  0xa0000000 0x08000000   /* port 0 non-prefetchable memory */
> >> >> +                       0 5 0  0xb0000000 0x08000000   /* port 0 prefetchable memory */
> >> >> +
> >> >> +                       1 0 0  0x80001000 0x00001000   /* root port 1 */
> >> >> +                       1 1 0  0x81800000 0x00800000   /* port 1 config space */
> >> >> +                       1 2 0  0x98000000 0x08000000   /* port 1 ext config space */
> >> >> +                       1 3 0  0x82010000 0x00010000   /* port 1 downstream I/O */
> >> >> +                       1 4 0  0xa8000000 0x08000000   /* port 1 non-prefetchable memory */
> >> >> +                       1 5 0  0xb8000000 0x08000000>; /* port 1 prefetchable memory */
> >> >
> >> > I've been thinking about this some more. The translations for both the
> >> > regular and extended configuration spaces are configured in the top-
> >> > level PCIe controller. It is therefore wrong how they are passed to the
> >> > PCI host bridges via the ranges property.
> >> >
> >> > I remember Mitch saying that it should be passed down to the children
> >> > because it is partitioned among them, but since the layout is compatible
> >> > with ECAM, the partitioning isn't as simple as what's in the tree. In
> >> > fact the partitions will be dependent on the number of devices attached
> >> > to the host bridges.
> >>
> >> I don't understand this last bit about the number of devices attached
> >> to the host bridges.  Logically, the host bridge has a bus number
> >> aperture that you can know up front, even before you know anything
> >> about what devices are below it.  On x86, for example, the ACPI _CRS
> >> method has something like "[bus 00-7f]" in it, which means that any
> >> buses in that range are below this bridge.  That doesn't tell us
> >> anything about which buses actually have devices on them, of course;
> >> it's just analogous to the secondary and subordinate bus number
> >> registers in a P2P bridge.
> >
> > That's one of the issues I still need to take care of. Currently no bus
> > resource is attached to the individual bridges (nor the PCI controller
> > for that matter), so the PCI core will assign them dynamically.
> 
> So your PCI controller driver knows how to program the controller bus
> number aperture?  Sometimes people start by assuming that two host
> bridges both have [bus 00-ff] apertures, then they enumerate below the
> first and adjust the bus number apertures based on what they found.
> For example, if they found buses 00-12 behind the first bridge, they
> make the apertures [bus 00-12] for the first bridge and [bus 13-ff]
> for the second.  That might be the case, depending on what firmware
> set up, but it seems like a dubious way to do it, and of course it
> precludes a lot of hot-plug scenarios.
> 
> > If this
> > range is known at boot time we could assign ECAM ranges based on the bus
> > numbers. Standard ECAM ranges, that is. On Tegra this won't work because
> > as Stephen mentioned in a previous mail, the bus field is not the top
> > field in the ECAM addresses. Basically what you have is this:
> >
> >         [27:24] upper 4 bits of the register address for extended
> >                 configuration space
> >         [23:16] bus number
> >         [15:11] device number
> >         [10: 8] device function
> >         [ 8: 0] register
> >
> > So the ECAM space cannot be partitioned by bus number.
> 
> Ah, OK, so definitely not standard PCIe ECAM.

I had an idea about how we could maybe implement a generic ECAM
mechanism that provides for this kind of mapping as well. Basically,
every mechanism that can be covered by such an implementation would in
one way or another specify the above five fields, so how about designing
it in a way that the driver could define the fields that encode the
information, so for Tegra, something along these lines:

	struct pci_ecam_map pci_ecam_tegra = {
		.register = {  0,  7 },
		.extended = { 24, 27 },
		.function = {  8, 10 },
		.device   = { 11, 15 },
		.bus      = { 16, 23 },
	}

For standard ECAM, something like this:

	struct pci_ecam_map pci_ecam_standard = {
		.register = {  0,  7 },
		.extended = {  8, 11 },
		.function = { 12, 14 },
		.device   = { 15, 19 },
		.bus      = { 20, 27 },
	};

Thierry
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt b/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
new file mode 100644
index 0000000..b181d4c
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
@@ -0,0 +1,94 @@ 
+NVIDIA Tegra PCIe controller
+
+Required properties:
+- compatible: "nvidia,tegra20-pcie"
+- reg: physical base address and length of the controller's registers
+- interrupts: the interrupt outputs of the controller
+- pex-clk-supply: supply voltage for internal reference clock
+- vdd-supply: power supply for controller (1.05V)
+- ranges: describes the translation of addresses for root ports
+- #address-cells: address representation for root ports (must be 3)
+  - cell 0 specifies the port index
+  - cell 1 denotes the address type
+      0: root port register space
+      1: PCI configuration space
+      2: PCI extended configuration space
+      3: downstream I/O
+      4: non-prefetchable memory
+      5: prefetchable memory
+  - cell 2 provides a number space that can include the size (should be 0)
+- #size-cells: size representation for root ports (must be 1)
+
+Root ports are defined as subnodes of the PCIe controller node.
+
+Required properties:
+- device_type: must be "pciex"
+- reg: address and size of the port configuration registers
+- #address-cells: must be 3
+- #size-cells: must be 2
+- ranges: sub-ranges distributed from the PCIe controller node
+- nvidia,num-lanes: number of lanes to use for this port
+
+Example:
+
+	pcie-controller {
+		compatible = "nvidia,tegra20-pcie";
+		reg = <0x80003000 0x00000800   /* PADS registers */
+		       0x80003800 0x00000200   /* AFI registers */
+		       0x81000000 0x01000000   /* configuration space */
+		       0x90000000 0x10000000>; /* extended configuration space */
+		interrupts = <0 98 0x04   /* controller interrupt */
+		              0 99 0x04>; /* MSI interrupt */
+		status = "disabled";
+
+		ranges = <0 0 0  0x80000000 0x00001000   /* root port 0 */
+			  0 1 0  0x81000000 0x00800000   /* port 0 config space */
+			  0 2 0  0x90000000 0x08000000   /* port 0 ext config space */
+			  0 3 0  0x82000000 0x00008000   /* port 0 downstream I/O */
+			  0 4 0  0xa0000000 0x08000000   /* port 0 non-prefetchable memory */
+			  0 5 0  0xb0000000 0x08000000   /* port 0 prefetchable memory */
+
+			  1 0 0  0x80001000 0x00001000   /* root port 1 */
+			  1 1 0  0x81800000 0x00800000   /* port 1 config space */
+			  1 2 0  0x98000000 0x08000000   /* port 1 ext config space */
+			  1 3 0  0x82008000 0x00008000   /* port 1 downstream I/O */
+			  1 4 0  0xa8000000 0x08000000   /* port 1 non-prefetchable memory */
+			  1 5 0  0xb8000000 0x08000000>; /* port 1 prefetchable memory */
+
+		#address-cells = <3>;
+		#size-cells = <1>;
+
+		pci@0 {
+			device_type = "pciex";
+			reg = <0 0 0 0x1000>;
+			status = "disabled";
+
+			#address-cells = <3>;
+			#size-cells = <2>;
+
+			ranges = <0x80000000 0 0  0 1 0  0 0x00800000   /* config space */
+				  0x90000000 0 0  0 2 0  0 0x08000000   /* ext config space */
+				  0x81000000 0 0  0 3 0  0 0x00008000   /* I/O */
+				  0x82000000 0 0  0 4 0  0 0x08000000   /* non-prefetchable memory */
+				  0xc2000000 0 0  0 5 0  0 0x08000000>; /* prefetchable memory */
+
+			nvidia,num-lanes = <2>;
+		};
+
+		pci@1 {
+			device_type = "pciex";
+			reg = <1 0 0 0x1000>;
+			status = "disabled";
+
+			#address-cells = <3>;
+			#size-cells = <2>;
+
+			ranges = <0x80000000 0 0  1 1 0  0 0x00800000   /* config space */
+				  0x90000000 0 0  1 2 0  0 0x08000000   /* ext config space */
+				  0x81000000 0 0  1 3 0  0 0x00008000   /* I/O */
+				  0x82000000 0 0  1 4 0  0 0x08000000   /* non-prefetchable memory */
+				  0xc2000000 0 0  1 5 0  0 0x08000000>; /* prefetchable memory */
+
+			nvidia,num-lanes = <2>;
+		};
+	};
diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index a094c97..c886dff 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -199,6 +199,68 @@ 
 		#size-cells = <0>;
 	};
 
+	pcie-controller {
+		compatible = "nvidia,tegra20-pcie";
+		reg = <0x80003000 0x00000800   /* PADS registers */
+		       0x80003800 0x00000200   /* AFI registers */
+		       0x81000000 0x01000000   /* configuration space */
+		       0x90000000 0x10000000>; /* extended configuration space */
+		interrupts = <0 98 0x04   /* controller interrupt */
+		              0 99 0x04>; /* MSI interrupt */
+		status = "disabled";
+
+		ranges = <0 0 0  0x80000000 0x00001000   /* root port 0 */
+			  0 1 0  0x81000000 0x00800000   /* port 0 config space */
+			  0 2 0  0x90000000 0x08000000   /* port 0 ext config space */
+			  0 3 0  0x82000000 0x00010000   /* port 0 downstream I/O */
+			  0 4 0  0xa0000000 0x08000000   /* port 0 non-prefetchable memory */
+			  0 5 0  0xb0000000 0x08000000   /* port 0 prefetchable memory */
+
+			  1 0 0  0x80001000 0x00001000   /* root port 1 */
+			  1 1 0  0x81800000 0x00800000   /* port 1 config space */
+			  1 2 0  0x98000000 0x08000000   /* port 1 ext config space */
+			  1 3 0  0x82010000 0x00010000   /* port 1 downstream I/O */
+			  1 4 0  0xa8000000 0x08000000   /* port 1 non-prefetchable memory */
+			  1 5 0  0xb8000000 0x08000000>; /* port 1 prefetchable memory */
+
+		#address-cells = <3>;
+		#size-cells = <1>;
+
+		pci@0 {
+			device_type = "pciex";
+			reg = <0 0 0 0x1000>;
+			status = "disabled";
+
+			#address-cells = <3>;
+			#size-cells = <2>;
+
+			ranges = <0x80000000 0 0  0 1 0  0 0x00800000   /* config space */
+				  0x90000000 0 0  0 2 0  0 0x08000000   /* ext config space */
+				  0x81000000 0 0  0 3 0  0 0x00010000   /* I/O */
+				  0x82000000 0 0  0 4 0  0 0x08000000   /* non-prefetchable memory */
+				  0xc2000000 0 0  0 5 0  0 0x08000000>; /* prefetchable memory */
+
+			nvidia,num-lanes = <2>;
+		};
+
+		pci@1 {
+			device_type = "pciex";
+			reg = <1 0 0 0x1000>;
+			status = "disabled";
+
+			#address-cells = <3>;
+			#size-cells = <2>;
+
+			ranges = <0x80000000 0 0  1 1 0  0 0x00800000   /* config space */
+				  0x90000000 0 0  1 2 0  0 0x08000000   /* ext config space */
+				  0x81000000 0 0  1 3 0  0 0x00010000   /* I/O */
+				  0x82000000 0 0  1 4 0  0 0x08000000   /* non-prefetchable memory */
+				  0xc2000000 0 0  1 5 0  0 0x08000000>; /* prefetchable memory */
+
+			nvidia,num-lanes = <2>;
+		};
+	};
+
 	usb@c5000000 {
 		compatible = "nvidia,tegra20-ehci", "usb-ehci";
 		reg = <0xc5000000 0x4000>;
diff --git a/arch/arm/mach-tegra/board-dt-tegra20.c b/arch/arm/mach-tegra/board-dt-tegra20.c
index a8a05c1..caa377a 100644
--- a/arch/arm/mach-tegra/board-dt-tegra20.c
+++ b/arch/arm/mach-tegra/board-dt-tegra20.c
@@ -40,6 +40,7 @@ 
 
 #include <mach/iomap.h>
 #include <mach/irqs.h>
+#include <mach/pci-tegra.h>
 
 #include "board.h"
 #include "board-harmony.h"
@@ -114,11 +115,7 @@  static void __init tegra_dt_init(void)
 #ifdef CONFIG_MACH_TRIMSLICE
 static void __init trimslice_init(void)
 {
-	int ret;
-
-	ret = tegra_pcie_init(true, true);
-	if (ret)
-		pr_err("tegra_pci_init() failed: %d\n", ret);
+	platform_device_register(&tegra_pcie_device);
 }
 #endif
 
diff --git a/arch/arm/mach-tegra/pcie.c b/arch/arm/mach-tegra/pcie.c
index dab3479..2d00b1c 100644
--- a/arch/arm/mach-tegra/pcie.c
+++ b/arch/arm/mach-tegra/pcie.c
@@ -37,6 +37,10 @@ 
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/regulator/consumer.h>
 
 #include <asm/sizes.h>
 #include <asm/mach/irq.h>
@@ -220,6 +224,9 @@  struct tegra_pcie {
 	unsigned int num_ports;
 
 	struct tegra_pcie_msi *msi;
+
+	struct regulator *pex_clk_supply;
+	struct regulator *vdd_supply;
 };
 
 struct tegra_pcie_port {
@@ -1016,6 +1023,178 @@  static int tegra_pcie_disable_msi(struct tegra_pcie *pcie)
 	return 0;
 }
 
+static int tegra_pcie_dt_init(struct platform_device *pdev)
+{
+	struct tegra_pcie *pcie = platform_get_drvdata(pdev);
+	int err;
+
+	if (!IS_ERR_OR_NULL(pcie->vdd_supply)) {
+		err = regulator_enable(pcie->vdd_supply);
+		if (err < 0) {
+			dev_err(&pdev->dev,
+				"failed to enable VDD regulator: %d\n", err);
+			return err;
+		}
+	}
+
+	if (!IS_ERR_OR_NULL(pcie->pex_clk_supply)) {
+		err = regulator_enable(pcie->pex_clk_supply);
+		if (err < 0) {
+			dev_err(&pdev->dev,
+				"failed to enable pex-clk regulator: %d\n",
+				err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int tegra_pcie_dt_exit(struct platform_device *pdev)
+{
+	struct tegra_pcie *pcie = platform_get_drvdata(pdev);
+	int err;
+
+	if (!IS_ERR_OR_NULL(pcie->pex_clk_supply)) {
+		err = regulator_disable(pcie->pex_clk_supply);
+		if (err < 0) {
+			dev_err(&pdev->dev,
+				"failed to disable pex-clk regulator: %d\n",
+				err);
+			return err;
+		}
+	}
+
+	if (!IS_ERR_OR_NULL(pcie->vdd_supply)) {
+		err = regulator_disable(pcie->vdd_supply);
+		if (err < 0) {
+			dev_err(&pdev->dev,
+				"failed to disable VDD regulator: %d\n", err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+struct resource *of_parse_reg(struct device_node *np, unsigned int *countp)
+{
+	unsigned int count = 0, i;
+	struct resource *reg, res;
+	int err;
+
+	while (of_address_to_resource(np, count, &res) == 0)
+		count++;
+
+	reg = kzalloc(sizeof(*reg) * count, GFP_KERNEL);
+	if (!reg)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < count; i++) {
+		err = of_address_to_resource(np, i, &reg[i]);
+		if (err < 0) {
+			kfree(reg);
+			return ERR_PTR(err);
+		}
+	}
+
+	if (countp)
+		*countp = count;
+
+	return reg;
+}
+
+static int tegra_pcie_port_parse_dt(struct tegra_pcie *pcie,
+				    struct device_node *node,
+				    struct tegra_pcie_rp *port)
+{
+	const __be32 *values;
+	u32 value;
+	int err;
+
+	values = of_get_property(node, "reg", NULL);
+	if (!values)
+		return -ENODEV;
+
+	port->index = be32_to_cpup(values);
+
+	port->resources = of_parse_reg(node, &port->num_resources);
+	if (!port->resources)
+		return -ENOMEM;
+
+	port->ranges = of_pci_parse_ranges(node, &port->num_ranges);
+	if (!port->ranges) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	err = of_property_read_u32(node, "nvidia,num-lanes", &value);
+	if (err < 0)
+		goto free;
+
+	port->num_lanes = value;
+
+	return 0;
+
+free:
+	kfree(port->ranges);
+	kfree(port->resources);
+	return err;
+}
+
+static struct tegra_pcie_pdata *tegra_pcie_parse_dt(struct tegra_pcie *pcie)
+{
+	struct tegra_pcie_pdata *pdata;
+	struct device_node *child;
+	unsigned int i = 0;
+	size_t size;
+	int err;
+
+	pdata = devm_kzalloc(pcie->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return NULL;
+
+	pdata->init = tegra_pcie_dt_init;
+	pdata->exit = tegra_pcie_dt_exit;
+
+	pcie->vdd_supply = devm_regulator_get(pcie->dev, "vdd");
+	if (IS_ERR_OR_NULL(pcie->vdd_supply))
+		return ERR_CAST(pcie->vdd_supply);
+
+	pcie->pex_clk_supply = devm_regulator_get(pcie->dev, "pex-clk");
+	if (IS_ERR_OR_NULL(pcie->pex_clk_supply))
+		return ERR_CAST(pcie->pex_clk_supply);
+
+	/* parse root port nodes */
+	for_each_child_of_node(pcie->dev->of_node, child) {
+		if (of_device_is_available(child))
+			pdata->num_ports++;
+	}
+
+	size = pdata->num_ports * sizeof(*pdata->ports);
+
+	pdata->ports = devm_kzalloc(pcie->dev, size, GFP_KERNEL);
+	if (!pdata->ports)
+		return ERR_PTR(-ENOMEM);
+
+	for_each_child_of_node(pcie->dev->of_node, child) {
+		struct tegra_pcie_rp *port = &pdata->ports[i];
+
+		if (!of_device_is_available(child))
+			continue;
+
+		err = tegra_pcie_port_parse_dt(pcie, child, port);
+		if (err < 0)
+			return ERR_PTR(err);
+
+		i++;
+	}
+
+	pdata->num_ports = i;
+
+	return pdata;
+}
+
 static unsigned long tegra_pcie_port_get_pex_ctrl(struct tegra_pcie_port *port)
 {
 	unsigned long ret = 0;
@@ -1193,6 +1372,14 @@  static int __devinit tegra_pcie_probe(struct platform_device *pdev)
 
 	pcie->dev = &pdev->dev;
 
+	if (IS_ENABLED(CONFIG_OF)) {
+		if (!pdata && pdev->dev.of_node) {
+			pdata = tegra_pcie_parse_dt(pcie);
+			if (IS_ERR(pdata))
+				return PTR_ERR(pdata);
+		}
+	}
+
 	if (!pdata)
 		return -ENODEV;
 
@@ -1280,10 +1467,18 @@  static int __devexit tegra_pcie_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id tegra_pcie_of_match[] = {
+	{ .compatible = "nvidia,tegra20-pcie", },
+	{ },
+};
+#endif
+
 static struct platform_driver tegra_pcie_driver = {
 	.driver = {
 		.name = "tegra-pcie",
 		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(tegra_pcie_of_match),
 	},
 	.probe = tegra_pcie_probe,
 	.remove = __devexit_p(tegra_pcie_remove),