diff mbox

[v8,1/3] PCI: host: rcar: Add Renesas R-Car PCIe driver

Message ID 1399892270-25021-2-git-send-email-phil.edworthy@renesas.com (mailing list archive)
State New, archived
Headers show

Commit Message

Phil Edworthy May 12, 2014, 10:57 a.m. UTC
This PCIe Host driver currently does not support MSI, so cards
fall back to INTx interrupts.

Signed-off-by: Phil Edworthy <phil.edworthy@renesas.com>

v8:
 - Moved header file contents into c file
 - Formatting cleaned up
 - Remove bus/dev/func range checks for config access
 - Add comment about config access serialization
 - Made rcar_pcie_setup_window() return void as no errors possible
 - Remove unused register definitions
 - Removed __init markers to fix section mismatches
 - Add explicit bus number range
 - Get the root bus nr from config writes instead of sys->busnr
 - Use PCI domains

v7, v6:
 - No changes

v5:
 - Use module_platform_driver instead of subsys_initcall
 - Use the of_device_id data field for HW init function
 - Init hw_pci struct in declaration
 - Renesas SoC compatible string has peripheral before device name
 - Add PCIe bus clock reference
 - Use dma-ranges property to specify inbound memory regions
 - Support multiple IO windows and correct resources

v4:
 - Use runtime PM properly

v3:
 - Add DT support
 - Use 'of_irq_parse_and_map_pci' for '.map_irq'
 - Use pm ops to enable clocks
 - Fix checkpatch errors
 - Use subsys_initcall to overcome issues with port bus driver
 - Adjust Kconfig to match other R-Car drivers

v2:
 - Use msleep instead of udelay when waiting for the link
 - Use pm_runtime
 - Removed unused definition
 - Also replaced call to devm_request_and_ioremap with devm_ioremap_resource
   and fixed a bug with this when reporting errors.

Signed-off-by: Phil Edworthy <phil.edworthy@renesas.com>
---
 drivers/pci/host/Kconfig     |   6 +
 drivers/pci/host/Makefile    |   1 +
 drivers/pci/host/pcie-rcar.c | 768 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 775 insertions(+)
 create mode 100644 drivers/pci/host/pcie-rcar.c

Comments

Sergei Shtylyov June 18, 2014, 9:51 p.m. UTC | #1
Hello.

On 05/12/2014 02:57 PM, Phil Edworthy wrote:

    I'm investigating an imprecise external abort occurring once userland is 
started when I have NetMos PCIe serial card inserted and the '8250_pci' driver 
enabled and I have found some issues in this driver, while at it...

> This PCIe Host driver currently does not support MSI, so cards
> fall back to INTx interrupts.

> Signed-off-by: Phil Edworthy <phil.edworthy@renesas.com>

[...]

> diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
> new file mode 100644
> index 0000000..3c524b9
> --- /dev/null
> +++ b/drivers/pci/host/pcie-rcar.c
> @@ -0,0 +1,768 @@
[...]
> +#define PCI_MAX_RESOURCES 4

    As a side note, this risks collision with <linux/pci*.h>...

> +static void pci_write_reg(struct rcar_pcie *pcie, unsigned long val,
> +			  unsigned long reg)
> +{
> +	writel(val, pcie->base + reg);
> +}
> +
> +static unsigned long pci_read_reg(struct rcar_pcie *pcie, unsigned long reg)
> +{
> +	return readl(pcie->base + reg);
> +}

    As a side note, these functions are hardly needed, and risk collision too...

> +
> +enum {
> +	PCI_ACCESS_READ,
> +	PCI_ACCESS_WRITE,

    These risk collision too...

> +static void rcar_pcie_setup_window(int win, struct resource *res,
> +				   struct rcar_pcie *pcie)

    As a side note, 'res' parameter is hardly needed here, as the function 
always gets
called with the resources contained within 'struct rcar_pcie'...

> +{
> +	/* Setup PCIe address space mappings for each resource */
> +	resource_size_t size;
> +	u32 mask;
> +
> +	pci_write_reg(pcie, 0x00000000, PCIEPTCTLR(win));
> +
> +	/*
> +	 * The PAMR mask is calculated in units of 128Bytes, which
> +	 * keeps things pretty simple.
> +	 */
> +	size = resource_size(res);
> +	mask = (roundup_pow_of_two(size) / SZ_128) - 1;
> +	pci_write_reg(pcie, mask << 7, PCIEPAMR(win));
> +
> +	pci_write_reg(pcie, upper_32_bits(res->start), PCIEPARH(win));
> +	pci_write_reg(pcie, lower_32_bits(res->start), PCIEPARL(win));

    My investigation showed and printk() here confirmed that instead of a PCI 
bus address here we have CPU address written to these registers:

rcar_pcie_setup_window: window 0, resource [io  0xfe100000-0xfe1fffff]
rcar_pcie_setup_window: window 1, resource [mem 0xfe200000-0xfe3fffff]
rcar_pcie_setup_window: window 2, resource [mem 0x30000000-0x37ffffff]
rcar_pcie_setup_window: window 3, resource [mem 0x38000000-0x3fffffff pref]
rcar-pcie fe000000.pcie: PCI host bridge to bus 0000:00

> +
> +	/* First resource is for IO */
> +	mask = PAR_ENABLE;
> +	if (res->flags & IORESOURCE_IO)
> +		mask |= IO_SPACE;

    For the memory space this works OK as you're identity-mapping the memory 
ranges in your device trees. However, for the I/O space this means that it 
won't work as the BARs in the PCIe devices get programmed with the PCI bus 
addresses but the PCIe window translation register is programmed with a CPU 
address which don't at all match (given your device trees) and hence one can't 
access the card's I/O mapped registers at all...

> +
> +	pci_write_reg(pcie, mask, PCIEPTCTLR(win));
> +}
> +
> +static int rcar_pcie_setup(int nr, struct pci_sys_data *sys)
> +{
> +	struct rcar_pcie *pcie = sys_to_pcie(sys);
> +	struct resource *res;
> +	int i;
> +
> +	pcie->root_bus_nr = -1;
> +
> +	/* Setup PCI resources */
> +	for (i = 0; i < PCI_MAX_RESOURCES; i++) {
> +
> +		res = &pcie->res[i];
> +		if (!res->flags)
> +			continue;
> +
> +		rcar_pcie_setup_window(i, res, pcie);
> +
> +		if (res->flags & IORESOURCE_IO)
> +			pci_ioremap_io(nr * SZ_64K, res->start);

   I'm not sure why are you not calling pci_add_resource() for I/O space... 
Also, this sets up only 64 KiB of I/O ports while your device tree describes 
I/O space 1 MiB is size.

> +		else
> +			pci_add_resource(&sys->resources, res);
> +	}
> +	pci_add_resource(&sys->resources, &pcie->busn);
> +
> +	return 1;
> +}
[...]
> +static int rcar_pcie_hw_init(struct rcar_pcie *pcie)
> +{
> +	int err;
> +
> +	/* Begin initialization */
> +	pci_write_reg(pcie, 0, PCIETCTLR);
> +
> +	/* Set mode */
> +	pci_write_reg(pcie, 1, PCIEMSR);
> +
> +	/*
> +	 * Initial header for port config space is type 1, set the device
> +	 * class to match. Hardware takes care of propagating the IDSETR
> +	 * settings, so there is no need to bother with a quirk.
> +	 */
> +	pci_write_reg(pcie, PCI_CLASS_BRIDGE_PCI << 16, IDSETR1);

    Hm, shouldn't this be a host bridge? I've noticed that the bridge's I/O 
and memory base/limit registers are left uninitialized even though the BARs of 
the PICe devices behind this bridge are assigned.

> +
> +	/*
> +	 * Setup Secondary Bus Number & Subordinate Bus Number, even though
> +	 * they aren't used, to avoid bridge being detected as broken.
> +	 */
> +	rcar_rmw32(pcie, RCONF(PCI_SECONDARY_BUS), 0xff, 1);
> +	rcar_rmw32(pcie, RCONF(PCI_SUBORDINATE_BUS), 0xff, 1);
> +
> +	/* Initialize default capabilities. */
> +	rcar_rmw32(pcie, REXPCAP(0), 0, PCI_CAP_ID_EXP);
> +	rcar_rmw32(pcie, REXPCAP(PCI_EXP_FLAGS),
> +		PCI_EXP_FLAGS_TYPE, PCI_EXP_TYPE_ROOT_PORT << 4);
> +	rcar_rmw32(pcie, RCONF(PCI_HEADER_TYPE), 0x7f,
> +		PCI_HEADER_TYPE_BRIDGE);
> +
> +	/* Enable data link layer active state reporting */
> +	rcar_rmw32(pcie, REXPCAP(PCI_EXP_LNKCAP), 0, PCI_EXP_LNKCAP_DLLLARC);
> +
> +	/* Write out the physical slot number = 0 */
> +	rcar_rmw32(pcie, REXPCAP(PCI_EXP_SLTCAP), PCI_EXP_SLTCAP_PSN, 0);
> +
> +	/* Set the completion timer timeout to the maximum 50ms. */
> +	rcar_rmw32(pcie, TLCTLR+1, 0x3f, 50);

    Missing spaces around '+'...

> +
> +	/* Terminate list of capabilities (Next Capability Offset=0) */
> +	rcar_rmw32(pcie, RVCCAP(0), 0xfff0, 0);
> +
> +	/* Enable MAC data scrambling. */
> +	rcar_rmw32(pcie, MACCTLR, SCRAMBLE_DISABLE, 0);

    Doesn't the comment contradict the code here?

> +
> +	/* Finish initialization - establish a PCI Express link */
> +	pci_write_reg(pcie, CFINIT, PCIETCTLR);
> +
> +	/* This will timeout if we don't have a link. */
> +	err = rcar_pcie_wait_for_dl(pcie);
> +	if (err)
> +		return err;
> +
> +	/* Enable INTx interrupts */
> +	rcar_rmw32(pcie, PCIEINTXR, 0, 0xF << 8);
> +
> +	/* Enable slave Bus Mastering */
> +	rcar_rmw32(pcie, RCONF(PCI_STATUS), PCI_STATUS_DEVSEL_MASK,
> +		PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
> +		PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST);

    Hmm, you're mixing up PCI control/status registers' bits here; they're
two 16-bit registers! So you're writing to 3 reserved LSBs of the PCI status 
register...

> +static int rcar_pcie_get_resources(struct platform_device *pdev,
> +				   struct rcar_pcie *pcie)
> +{
> +	struct resource res;
> +	int err;
> +
> +	err = of_address_to_resource(pdev->dev.of_node, 0, &res);

    BTW, you could use platfrom_get_resource() and save on your local variable 
and the error check -- devm_ioremap_resource() does it.

> +	if (err)
> +		return err;
> +
> +	pcie->clk = devm_clk_get(&pdev->dev, "pcie");
> +	if (IS_ERR(pcie->clk)) {
> +		dev_err(pcie->dev, "cannot get platform clock\n");
> +		return PTR_ERR(pcie->clk);
> +	}
> +	err = clk_prepare_enable(pcie->clk);
> +	if (err)
> +		goto fail_clk;
> +
> +	pcie->bus_clk = devm_clk_get(&pdev->dev, "pcie_bus");
> +	if (IS_ERR(pcie->bus_clk)) {
> +		dev_err(pcie->dev, "cannot get pcie bus clock\n");
> +		err = PTR_ERR(pcie->bus_clk);
> +		goto fail_clk;
> +	}
> +	err = clk_prepare_enable(pcie->bus_clk);
> +	if (err)
> +		goto err_map_reg;
> +
> +	pcie->base = devm_ioremap_resource(&pdev->dev, &res);
> +	if (IS_ERR(pcie->base)) {
> +		err = PTR_ERR(pcie->base);
> +		goto err_map_reg;
> +	}
> +
> +	return 0;
> +
> +err_map_reg:
> +	clk_disable_unprepare(pcie->bus_clk);
> +fail_clk:
> +	clk_disable_unprepare(pcie->clk);
> +
> +	return err;
> +}
> +
> +static int rcar_pcie_inbound_ranges(struct rcar_pcie *pcie,
> +				    struct of_pci_range *range,
> +				    int *index)
> +{
> +	u64 restype = range->flags;
> +	u64 cpu_addr = range->cpu_addr;
> +	u64 cpu_end = range->cpu_addr + range->size;
> +	u64 pci_addr = range->pci_addr;
> +	u32 flags = LAM_64BIT | LAR_ENABLE;
> +	u64 mask;
> +	u64 size;
> +	int idx = *index;
> +
> +	if (restype & IORESOURCE_PREFETCH)
> +		flags |= LAM_PREFETCH;
> +
> +	/*
> +	 * If the size of the range is larger than the alignment of the start
> +	 * address, we have to use multiple entries to perform the mapping.
> +	 */
> +	if (cpu_addr > 0) {
> +		unsigned long nr_zeros = __ffs64(cpu_addr);
> +		u64 alignment = 1ULL << nr_zeros;

    Missing newline...

> +		size = min(range->size, alignment);
> +	} else {
> +		size = range->size;
> +	}
> +	/* Hardware supports max 4GiB inbound region */
> +	size = min(size, 1ULL << 32);
> +
> +	mask = roundup_pow_of_two(size) - 1;
> +	mask &= ~0xf;
> +
> +	while (cpu_addr < cpu_end) {
> +		/*
> +		 * Set up 64-bit inbound regions as the range parser doesn't
> +		 * distinguish between 32 and 64-bit types.
> +		 */
> +		pci_write_reg(pcie, lower_32_bits(pci_addr), PCIEPRAR(idx));
> +		pci_write_reg(pcie, lower_32_bits(cpu_addr), PCIELAR(idx));
> +		pci_write_reg(pcie, lower_32_bits(mask) | flags, PCIELAMR(idx));
> +
> +		pci_write_reg(pcie, upper_32_bits(pci_addr), PCIEPRAR(idx+1));
> +		pci_write_reg(pcie, upper_32_bits(cpu_addr), PCIELAR(idx+1));
> +		pci_write_reg(pcie, 0, PCIELAMR(idx+1));

    Missing spaces around '+'...

> +
> +		pci_addr += size;
> +		cpu_addr += size;
> +		idx += 2;
> +
> +		if (idx > MAX_NR_INBOUND_MAPS) {
> +			dev_err(pcie->dev, "Failed to map inbound regions!\n");
> +			return -EINVAL;
> +		}
> +	}
> +	*index = idx;
> +
> +	return 0;
> +}
> +
> +static int pci_dma_range_parser_init(struct of_pci_range_parser *parser,
> +				     struct device_node *node)
> +{
> +	const int na = 3, ns = 2;
> +	int rlen;
> +
> +	parser->node = node;
> +	parser->pna = of_n_addr_cells(node);
> +	parser->np = parser->pna + na + ns;
> +
> +	parser->range = of_get_property(node, "dma-ranges", &rlen);
> +	if (!parser->range)
> +		return -ENOENT;
> +
> +	parser->end = parser->range + rlen / sizeof(__be32);
> +	return 0;
> +}

    Erm, AFAIK "dma-ranges" is a standard property, shouldn't its parsing be 
placed in some generic place like drivers/of/address.c?

[...]
> +static int rcar_pcie_probe(struct platform_device *pdev)
> +{
> +	struct rcar_pcie *pcie;
> +	unsigned int data;
> +	struct of_pci_range range;
> +	struct of_pci_range_parser parser;
> +	const struct of_device_id *of_id;
> +	int err, win = 0;
> +	int (*hw_init_fn)(struct rcar_pcie *);
> +
> +	pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
> +	if (!pcie)
> +		return -ENOMEM;
> +
> +	pcie->dev = &pdev->dev;
> +	platform_set_drvdata(pdev, pcie);
> +
> +	/* Get the bus range */
> +	if (of_pci_parse_bus_range(pdev->dev.of_node, &pcie->busn)) {
> +		dev_err(&pdev->dev, "failed to parse bus-range property\n");
> +		return -EINVAL;
> +	}
> +
> +	if (of_pci_range_parser_init(&parser, pdev->dev.of_node)) {
> +		dev_err(&pdev->dev, "missing ranges property\n");
> +		return -EINVAL;
> +	}
> +
> +	err = rcar_pcie_get_resources(pdev, pcie);
> +	if (err < 0) {
> +		dev_err(&pdev->dev, "failed to request resources: %d\n", err);
> +		return err;
> +	}
> +
> +	for_each_of_pci_range(&parser, &range) {
> +		of_pci_range_to_resource(&range, pdev->dev.of_node,
> +						&pcie->res[win++]);

    This function call is probably no good here as it fetches into the 'start' 
field of a 'struct resource' a CPU address instead of a PCI address...

> +
> +		if (win > PCI_MAX_RESOURCES)
> +			break;
> +	}
> +
> +	 err = rcar_pcie_parse_map_dma_ranges(pcie, pdev->dev.of_node);
> +	 if (err)
> +		return err;
> +
> +	of_id = of_match_device(rcar_pcie_of_match, pcie->dev);
> +	if (!of_id || !of_id->data)
> +		return -EINVAL;
> +	hw_init_fn = of_id->data;
> +
> +	/* Failure to get a link might just be that no cards are inserted */
> +	err = hw_init_fn(pcie);
> +	if (err) {
> +		dev_info(&pdev->dev, "PCIe link down\n");
> +		return 0;

    Not quite sure why you exit normally here without enabling the hardware.
I think the internal bridge should be visible regardless of whether link is
detected or not...

> +	}
> +
> +	data = pci_read_reg(pcie, MACSR);
> +	dev_info(&pdev->dev, "PCIe x%d: link up\n", (data >> 20) & 0x3f);
> +
> +	rcar_pcie_enable(pcie);
> +
> +	return 0;
> +}
[...]

WBR, Sergei
Gabriel Fernandez June 20, 2014, 7:37 a.m. UTC | #2
Hi Phil,

Just a question...

On 12 May 2014 12:57, Phil Edworthy <phil.edworthy@renesas.com> wrote:
> This PCIe Host driver currently does not support MSI, so cards
> fall back to INTx interrupts.
>

> Signed-off-by: Phil Edworthy <phil.edworthy@renesas.com>
> ---
>  drivers/pci/host/Kconfig     |   6 +
>  drivers/pci/host/Makefile    |   1 +
>  drivers/pci/host/pcie-rcar.c | 768 +++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 775 insertions(+)
>  create mode 100644 drivers/pci/host/pcie-rcar.c
>
> diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
> new file mode 100644
> index 0000000..3c524b9
> --- /dev/null
> +++ b/drivers/pci/host/pcie-rcar.c

> +static void rcar_pcie_enable(struct rcar_pcie *pcie)
> +{
> +       struct platform_device *pdev = to_platform_device(pcie->dev);
> +
> +       rcar_pci.nr_controllers = 1;
> +       rcar_pci.private_data = (void **)&pcie;
> +
> +       pci_common_init_dev(&pdev->dev, &rcar_pci);
> +#ifdef CONFIG_PCI_DOMAINS
> +       rcar_pci.domain++;
> +#endif
> +}
> +

How does it work when you have 2 PCIe DT node ?
(because for my point of view pci_common_init_dev() can't be called twice)

void pci_common_init_dev(struct device *parent, struct hw_pci *hw)
{
    struct pci_sys_data *sys;
    LIST_HEAD(head);

    pci_add_flags(PCI_REASSIGN_ALL_RSRC);
    if (hw->preinit)
        hw->preinit();
    pcibios_init_hw(parent, hw, &head);
...

static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
                struct list_head *head)
{
    struct pci_sys_data *sys = NULL;
    int ret;
    int nr, busnr;

    for (nr = busnr = 0; nr < hw->nr_controllers; nr++) {
        sys = kzalloc(sizeof(struct pci_sys_data), GFP_KERNEL);
        if (!sys)
            panic("PCI: unable to allocate sys data!");

#ifdef CONFIG_PCI_DOMAINS
        sys->domain  = hw->domain;
#endif
        sys->busnr   = busnr;
...

the issue is that sys->busnr always starts to zero ...

how did you fix this problem?

Thanks.
Best Regards

Gabriel.
Phil Edworthy June 23, 2014, 4:44 p.m. UTC | #3
Hi Sergei,

On 18 June 2014 22:51, Sergei wrote:
> On 05/12/2014 02:57 PM, Phil Edworthy wrote:
> 
>     I'm investigating an imprecise external abort occurring once userland is
> started when I have NetMos PCIe serial card inserted and the '8250_pci'
> driver
> enabled and I have found some issues in this driver, while at it...
Shame they didn't come before the driver was accepted, but still, I welcome the
comments. See below.

 
>> This PCIe Host driver currently does not support MSI, so cards
>> fall back to INTx interrupts.
> 
>> Signed-off-by: Phil Edworthy <phil.edworthy@renesas.com>
> 
> [...]
> 
>> diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
>> new file mode 100644
>> index 0000000..3c524b9
>> --- /dev/null
>> +++ b/drivers/pci/host/pcie-rcar.c
>> @@ -0,0 +1,768 @@
> [...]
>> +#define PCI_MAX_RESOURCES 4
> 
>     As a side note, this risks collision with <linux/pci*.h>...
True, I'll fix this.


>> +static void pci_write_reg(struct rcar_pcie *pcie, unsigned long val,
>> +			  unsigned long reg)
>> +{
>> +	writel(val, pcie->base + reg);
>> +}
>> +
>> +static unsigned long pci_read_reg(struct rcar_pcie *pcie, unsigned long
> reg)
>> +{
>> +	return readl(pcie->base + reg);
>> +}
> 
>     As a side note, these functions are hardly needed, and risk collision too...
Ben mentioned this in his review and as I said then, I found them useful during
development, so we agreed to leave them. Since they are static, there shouldn't
be a collision risk.


>> +
>> +enum {
>> +	PCI_ACCESS_READ,
>> +	PCI_ACCESS_WRITE,
> 
>     These risk collision too...
True, I'll fix this.


>> +static void rcar_pcie_setup_window(int win, struct resource *res,
>> +				   struct rcar_pcie *pcie)
> 
>     As a side note, 'res' parameter is hardly needed here, as the function
> always gets
> called with the resources contained within 'struct rcar_pcie'...
Either I would have to pass an index to the resource in, or as I have done, a
pointer to the individual resource. I found it cleaner to pass the pointer.


>> +{
>> +	/* Setup PCIe address space mappings for each resource */
>> +	resource_size_t size;
>> +	u32 mask;
>> +
>> +	pci_write_reg(pcie, 0x00000000, PCIEPTCTLR(win));
>> +
>> +	/*
>> +	 * The PAMR mask is calculated in units of 128Bytes, which
>> +	 * keeps things pretty simple.
>> +	 */
>> +	size = resource_size(res);
>> +	mask = (roundup_pow_of_two(size) / SZ_128) - 1;
>> +	pci_write_reg(pcie, mask << 7, PCIEPAMR(win));
>> +
>> +	pci_write_reg(pcie, upper_32_bits(res->start), PCIEPARH(win));
>> +	pci_write_reg(pcie, lower_32_bits(res->start), PCIEPARL(win));
> 
>     My investigation showed and printk() here confirmed that instead of a PCI
> bus address here we have CPU address written to these registers:
> 
> rcar_pcie_setup_window: window 0, resource [io  0xfe100000-0xfe1fffff]
> rcar_pcie_setup_window: window 1, resource [mem 0xfe200000-0xfe3fffff]
> rcar_pcie_setup_window: window 2, resource [mem 0x30000000-0x37ffffff]
> rcar_pcie_setup_window: window 3, resource [mem 0x38000000-0x3fffffff
> pref]
> rcar-pcie fe000000.pcie: PCI host bridge to bus 0000:00
That is a good point, though for all but I/O, as you have found, we use an
identity mapping for CPU-PCI addresses.


>> +
>> +	/* First resource is for IO */
>> +	mask = PAR_ENABLE;
>> +	if (res->flags & IORESOURCE_IO)
>> +		mask |= IO_SPACE;
> 
>     For the memory space this works OK as you're identity-mapping the
> memory
> ranges in your device trees. However, for the I/O space this means that it
> won't work as the BARs in the PCIe devices get programmed with the PCI bus
> addresses but the PCIe window translation register is programmed with a
> CPU
> address which don't at all match (given your device trees) and hence one
> can't
> access the card's I/O mapped registers at all...
Hmm, I couldn't find any cards that supported I/O, so I wasn't able to test
this. Clearly this is an issue that needs looking into.


>> +
>> +	pci_write_reg(pcie, mask, PCIEPTCTLR(win));
>> +}
>> +
>> +static int rcar_pcie_setup(int nr, struct pci_sys_data *sys)
>> +{
>> +	struct rcar_pcie *pcie = sys_to_pcie(sys);
>> +	struct resource *res;
>> +	int i;
>> +
>> +	pcie->root_bus_nr = -1;
>> +
>> +	/* Setup PCI resources */
>> +	for (i = 0; i < PCI_MAX_RESOURCES; i++) {
>> +
>> +		res = &pcie->res[i];
>> +		if (!res->flags)
>> +			continue;
>> +
>> +		rcar_pcie_setup_window(i, res, pcie);
>> +
>> +		if (res->flags & IORESOURCE_IO)
>> +			pci_ioremap_io(nr * SZ_64K, res->start);
> 
>    I'm not sure why are you not calling pci_add_resource() for I/O space...
> Also, this sets up only 64 KiB of I/O ports while your device tree describes
> I/O space 1 MiB is size.
This driver should be able to cope with multiple host controllers, so each
allocated 64KiB for I/O. 64KiB is all you need for I/O, but the R-Car PCIe
hardware has a 1MiB region (the smallest one) that can only be used for one
type of PCIe access.


>> +		else
>> +			pci_add_resource(&sys->resources, res);
>> +	}
>> +	pci_add_resource(&sys->resources, &pcie->busn);
>> +
>> +	return 1;
>> +}
> [...]
>> +static int rcar_pcie_hw_init(struct rcar_pcie *pcie)
>> +{
>> +	int err;
>> +
>> +	/* Begin initialization */
>> +	pci_write_reg(pcie, 0, PCIETCTLR);
>> +
>> +	/* Set mode */
>> +	pci_write_reg(pcie, 1, PCIEMSR);
>> +
>> +	/*
>> +	 * Initial header for port config space is type 1, set the device
>> +	 * class to match. Hardware takes care of propagating the IDSETR
>> +	 * settings, so there is no need to bother with a quirk.
>> +	 */
>> +	pci_write_reg(pcie, PCI_CLASS_BRIDGE_PCI << 16, IDSETR1);
> 
>     Hm, shouldn't this be a host bridge? I've noticed that the bridge's I/O
> and memory base/limit registers are left uninitialized even though the BARs
> of
> the PICe devices behind this bridge are assigned.
No, I am pretty sure this is correct.


>> +
>> +	/*
>> +	 * Setup Secondary Bus Number & Subordinate Bus Number, even
> though
>> +	 * they aren't used, to avoid bridge being detected as broken.
>> +	 */
>> +	rcar_rmw32(pcie, RCONF(PCI_SECONDARY_BUS), 0xff, 1);
>> +	rcar_rmw32(pcie, RCONF(PCI_SUBORDINATE_BUS), 0xff, 1);
>> +
>> +	/* Initialize default capabilities. */
>> +	rcar_rmw32(pcie, REXPCAP(0), 0, PCI_CAP_ID_EXP);
>> +	rcar_rmw32(pcie, REXPCAP(PCI_EXP_FLAGS),
>> +		PCI_EXP_FLAGS_TYPE, PCI_EXP_TYPE_ROOT_PORT << 4);
>> +	rcar_rmw32(pcie, RCONF(PCI_HEADER_TYPE), 0x7f,
>> +		PCI_HEADER_TYPE_BRIDGE);
>> +
>> +	/* Enable data link layer active state reporting */
>> +	rcar_rmw32(pcie, REXPCAP(PCI_EXP_LNKCAP), 0,
> PCI_EXP_LNKCAP_DLLLARC);
>> +
>> +	/* Write out the physical slot number = 0 */
>> +	rcar_rmw32(pcie, REXPCAP(PCI_EXP_SLTCAP),
> PCI_EXP_SLTCAP_PSN, 0);
>> +
>> +	/* Set the completion timer timeout to the maximum 50ms. */
>> +	rcar_rmw32(pcie, TLCTLR+1, 0x3f, 50);
> 
>     Missing spaces around '+'...
Ok


>> +
>> +	/* Terminate list of capabilities (Next Capability Offset=0) */
>> +	rcar_rmw32(pcie, RVCCAP(0), 0xfff0, 0);
>> +
>> +	/* Enable MAC data scrambling. */
>> +	rcar_rmw32(pcie, MACCTLR, SCRAMBLE_DISABLE, 0);
> 
>     Doesn't the comment contradict the code here?
No, the rmw32 function is read, modify, write and the SCRAMBLE_DISABLE shown
here is the mask, not the value. If the last arg was 1, the call would set the
scramble disable bit to 1.
Anyway, scrambling is enabled by default in the HW, so I'll remove this.


>> +
>> +	/* Finish initialization - establish a PCI Express link */
>> +	pci_write_reg(pcie, CFINIT, PCIETCTLR);
>> +
>> +	/* This will timeout if we don't have a link. */
>> +	err = rcar_pcie_wait_for_dl(pcie);
>> +	if (err)
>> +		return err;
>> +
>> +	/* Enable INTx interrupts */
>> +	rcar_rmw32(pcie, PCIEINTXR, 0, 0xF << 8);
>> +
>> +	/* Enable slave Bus Mastering */
>> +	rcar_rmw32(pcie, RCONF(PCI_STATUS), PCI_STATUS_DEVSEL_MASK,
>> +		PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
> PCI_COMMAND_MASTER |
>> +		PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST);
> 
>     Hmm, you're mixing up PCI control/status registers' bits here; they're
> two 16-bit registers! So you're writing to 3 reserved LSBs of the PCI status
> register...
The mask arg should make sure that we don't write to reserved bits. However,
the bits & mask combination is clearly wrong & I'll look at this.
Somewhere along the line, the use of the mask arg to the rcar_rmw32 function
has clearly gone astray. I checked all the rmw calls and found another couple
that are wrong.


>> +static int rcar_pcie_get_resources(struct platform_device *pdev,
>> +				   struct rcar_pcie *pcie)
>> +{
>> +	struct resource res;
>> +	int err;
>> +
>> +	err = of_address_to_resource(pdev->dev.of_node, 0, &res);
> 
>     BTW, you could use platfrom_get_resource() and save on your local
> variable
> and the error check -- devm_ioremap_resource() does it.
>> +	if (err)
>> +		return err;
>> +
>> +	pcie->clk = devm_clk_get(&pdev->dev, "pcie");
>> +	if (IS_ERR(pcie->clk)) {
>> +		dev_err(pcie->dev, "cannot get platform clock\n");
>> +		return PTR_ERR(pcie->clk);
>> +	}
>> +	err = clk_prepare_enable(pcie->clk);
>> +	if (err)
>> +		goto fail_clk;
>> +
>> +	pcie->bus_clk = devm_clk_get(&pdev->dev, "pcie_bus");
>> +	if (IS_ERR(pcie->bus_clk)) {
>> +		dev_err(pcie->dev, "cannot get pcie bus clock\n");
>> +		err = PTR_ERR(pcie->bus_clk);
>> +		goto fail_clk;
>> +	}
>> +	err = clk_prepare_enable(pcie->bus_clk);
>> +	if (err)
>> +		goto err_map_reg;
>> +
>> +	pcie->base = devm_ioremap_resource(&pdev->dev, &res);
>> +	if (IS_ERR(pcie->base)) {
>> +		err = PTR_ERR(pcie->base);
>> +		goto err_map_reg;
>> +	}
>> +
>> +	return 0;
>> +
>> +err_map_reg:
>> +	clk_disable_unprepare(pcie->bus_clk);
>> +fail_clk:
>> +	clk_disable_unprepare(pcie->clk);
>> +
>> +	return err;
>> +}
>> +
>> +static int rcar_pcie_inbound_ranges(struct rcar_pcie *pcie,
>> +				    struct of_pci_range *range,
>> +				    int *index)
>> +{
>> +	u64 restype = range->flags;
>> +	u64 cpu_addr = range->cpu_addr;
>> +	u64 cpu_end = range->cpu_addr + range->size;
>> +	u64 pci_addr = range->pci_addr;
>> +	u32 flags = LAM_64BIT | LAR_ENABLE;
>> +	u64 mask;
>> +	u64 size;
>> +	int idx = *index;
>> +
>> +	if (restype & IORESOURCE_PREFETCH)
>> +		flags |= LAM_PREFETCH;
>> +
>> +	/*
>> +	 * If the size of the range is larger than the alignment of the start
>> +	 * address, we have to use multiple entries to perform the mapping.
>> +	 */
>> +	if (cpu_addr > 0) {
>> +		unsigned long nr_zeros = __ffs64(cpu_addr);
>> +		u64 alignment = 1ULL << nr_zeros;
> 
>     Missing newline...
Ok


>> +		size = min(range->size, alignment);
>> +	} else {
>> +		size = range->size;
>> +	}
>> +	/* Hardware supports max 4GiB inbound region */
>> +	size = min(size, 1ULL << 32);
>> +
>> +	mask = roundup_pow_of_two(size) - 1;
>> +	mask &= ~0xf;
>> +
>> +	while (cpu_addr < cpu_end) {
>> +		/*
>> +		 * Set up 64-bit inbound regions as the range parser doesn't
>> +		 * distinguish between 32 and 64-bit types.
>> +		 */
>> +		pci_write_reg(pcie, lower_32_bits(pci_addr),
> PCIEPRAR(idx));
>> +		pci_write_reg(pcie, lower_32_bits(cpu_addr), PCIELAR(idx));
>> +		pci_write_reg(pcie, lower_32_bits(mask) | flags,
> PCIELAMR(idx));
>> +
>> +		pci_write_reg(pcie, upper_32_bits(pci_addr),
> PCIEPRAR(idx+1));
>> +		pci_write_reg(pcie, upper_32_bits(cpu_addr),
> PCIELAR(idx+1));
>> +		pci_write_reg(pcie, 0, PCIELAMR(idx+1));
> 
>     Missing spaces around '+'...
Ok

>> +
>> +		pci_addr += size;
>> +		cpu_addr += size;
>> +		idx += 2;
>> +
>> +		if (idx > MAX_NR_INBOUND_MAPS) {
>> +			dev_err(pcie->dev, "Failed to map inbound
> regions!\n");
>> +			return -EINVAL;
>> +		}
>> +	}
>> +	*index = idx;
>> +
>> +	return 0;
>> +}
>> +
>> +static int pci_dma_range_parser_init(struct of_pci_range_parser *parser,
>> +				     struct device_node *node)
>> +{
>> +	const int na = 3, ns = 2;
>> +	int rlen;
>> +
>> +	parser->node = node;
>> +	parser->pna = of_n_addr_cells(node);
>> +	parser->np = parser->pna + na + ns;
>> +
>> +	parser->range = of_get_property(node, "dma-ranges", &rlen);
>> +	if (!parser->range)
>> +		return -ENOENT;
>> +
>> +	parser->end = parser->range + rlen / sizeof(__be32);
>> +	return 0;
>> +}
> 
>     Erm, AFAIK "dma-ranges" is a standard property, shouldn't its parsing be
> placed in some generic place like drivers/of/address.c?
I suppose you are right, something else to fix.


> [...]
>> +static int rcar_pcie_probe(struct platform_device *pdev)
>> +{
>> +	struct rcar_pcie *pcie;
>> +	unsigned int data;
>> +	struct of_pci_range range;
>> +	struct of_pci_range_parser parser;
>> +	const struct of_device_id *of_id;
>> +	int err, win = 0;
>> +	int (*hw_init_fn)(struct rcar_pcie *);
>> +
>> +	pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
>> +	if (!pcie)
>> +		return -ENOMEM;
>> +
>> +	pcie->dev = &pdev->dev;
>> +	platform_set_drvdata(pdev, pcie);
>> +
>> +	/* Get the bus range */
>> +	if (of_pci_parse_bus_range(pdev->dev.of_node, &pcie->busn)) {
>> +		dev_err(&pdev->dev, "failed to parse bus-range
> property\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (of_pci_range_parser_init(&parser, pdev->dev.of_node)) {
>> +		dev_err(&pdev->dev, "missing ranges property\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	err = rcar_pcie_get_resources(pdev, pcie);
>> +	if (err < 0) {
>> +		dev_err(&pdev->dev, "failed to request resources: %d\n",
> err);
>> +		return err;
>> +	}
>> +
>> +	for_each_of_pci_range(&parser, &range) {
>> +		of_pci_range_to_resource(&range, pdev->dev.of_node,
>> +						&pcie->res[win++]);
> 
>     This function call is probably no good here as it fetches into the 'start'
> field of a 'struct resource' a CPU address instead of a PCI address...
No, the ranges describe the CPU addresses of the PCI memory and I/O regions, so
this is correct.


>> +
>> +		if (win > PCI_MAX_RESOURCES)
>> +			break;
>> +	}
>> +
>> +	 err = rcar_pcie_parse_map_dma_ranges(pcie, pdev->dev.of_node);
>> +	 if (err)
>> +		return err;
>> +
>> +	of_id = of_match_device(rcar_pcie_of_match, pcie->dev);
>> +	if (!of_id || !of_id->data)
>> +		return -EINVAL;
>> +	hw_init_fn = of_id->data;
>> +
>> +	/* Failure to get a link might just be that no cards are inserted */
>> +	err = hw_init_fn(pcie);
>> +	if (err) {
>> +		dev_info(&pdev->dev, "PCIe link down\n");
>> +		return 0;
> 
>     Not quite sure why you exit normally here without enabling the hardware.
> I think the internal bridge should be visible regardless of whether link is
> detected or not...
Why would you want to see the bridge when you can do nothing with it? Aren't
you are just wasting resources?


>> +	}
>> +
>> +	data = pci_read_reg(pcie, MACSR);
>> +	dev_info(&pdev->dev, "PCIe x%d: link up\n", (data >> 20) & 0x3f);
>> +
>> +	rcar_pcie_enable(pcie);
>> +
>> +	return 0;
>> +}
> [...]

Thanks
Phil
Sergei Shtylyov June 23, 2014, 9:11 p.m. UTC | #4
Hello.

On 06/23/2014 08:44 PM, Phil Edworthy wrote:

>>      I'm investigating an imprecise external abort occurring once userland is
>> started when I have NetMos

    Or is it MosChip now? Can't remember all their renames. :-)

>> PCIe serial card inserted and the '8250_pci'
>> driver
>> enabled and I have found some issues in this driver, while at it...

    I should mention that the serial PCI device has both I/O port and memory 
BARs; it's the driver's choice to use the I/O ports.

> Shame they didn't come before the driver was accepted,

    Sorry, I don't usually review large patches -- it's very time consuming 
(my review took 2+ hours and yet I haven't pointed out all issues).

> but still, I welcome the comments. See below.

    Thanks. :-)

>>> This PCIe Host driver currently does not support MSI, so cards
>>> fall back to INTx interrupts.

>>> Signed-off-by: Phil Edworthy <phil.edworthy@renesas.com>

[...]

>>> diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
>>> new file mode 100644
>>> index 0000000..3c524b9
>>> --- /dev/null
>>> +++ b/drivers/pci/host/pcie-rcar.c
>>> @@ -0,0 +1,768 @@

[...]

>>> +static void pci_write_reg(struct rcar_pcie *pcie, unsigned long val,
>>> +			  unsigned long reg)
>>> +{
>>> +	writel(val, pcie->base + reg);
>>> +}
>>> +
>>> +static unsigned long pci_read_reg(struct rcar_pcie *pcie, unsigned long
>> reg)
>>> +{
>>> +	return readl(pcie->base + reg);
>>> +}

>>      As a side note, these functions are hardly needed, and risk collision too...

> Ben mentioned this in his review and as I said then, I found them useful during
> development, so we agreed to leave them. Since they are static, there shouldn't
> be a collision risk.

    You're risking clashes even at the source level, not even at object file 
level...

>>> +static void rcar_pcie_setup_window(int win, struct resource *res,
>>> +				   struct rcar_pcie *pcie)

>>      As a side note, 'res' parameter is hardly needed here, as the function
>> always gets
>> called with the resources contained within 'struct rcar_pcie'...

> Either I would have to pass an index to the resource in,

    But you already do pass it, 'win' is the index!

> or as I have done, a
> pointer to the individual resource. I found it cleaner to pass the pointer.

    You're actually pass excess parameters, both the index and the pointer.

[...]

>>> +
>>> +	/* First resource is for IO */
>>> +	mask = PAR_ENABLE;
>>> +	if (res->flags & IORESOURCE_IO)
>>> +		mask |= IO_SPACE;

>>      For the memory space this works OK as you're identity-mapping the
>> memory
>> ranges in your device trees. However, for the I/O space this means that it
>> won't work as the BARs in the PCIe devices get programmed with the PCI bus
>> addresses but the PCIe window translation register is programmed with a
>> CPU
>> address which don't at all match (given your device trees) and hence one
>> can't
>> access the card's I/O mapped registers at all...

> Hmm, I couldn't find any cards that supported I/O, so I wasn't able to test
> this. Clearly this is an issue that needs looking into.

    Will you look into it then, or should I?

>>> +
>>> +	pci_write_reg(pcie, mask, PCIEPTCTLR(win));
>>> +}
>>> +
>>> +static int rcar_pcie_setup(int nr, struct pci_sys_data *sys)
>>> +{
>>> +	struct rcar_pcie *pcie = sys_to_pcie(sys);
>>> +	struct resource *res;
>>> +	int i;
>>> +
>>> +	pcie->root_bus_nr = -1;
>>> +
>>> +	/* Setup PCI resources */
>>> +	for (i = 0; i < PCI_MAX_RESOURCES; i++) {
>>> +
>>> +		res = &pcie->res[i];
>>> +		if (!res->flags)
>>> +			continue;
>>> +
>>> +		rcar_pcie_setup_window(i, res, pcie);
>>> +
>>> +		if (res->flags & IORESOURCE_IO)
>>> +			pci_ioremap_io(nr * SZ_64K, res->start);

>>     I'm not sure why are you not calling pci_add_resource() for I/O space...

    Sorry, did you reply to that?

>> Also, this sets up only 64 KiB of I/O ports while your device tree describes
>> I/O space 1 MiB is size.

> This driver should be able to cope with multiple host controllers, so each
> allocated 64KiB for I/O. 64KiB is all you need for I/O, but the R-Car PCIe
> hardware has a 1MiB region (the smallest one) that can only be used for one
> type of PCIe access.

[...]

>>> +static int rcar_pcie_hw_init(struct rcar_pcie *pcie)
>>> +{
>>> +	int err;
>>> +
>>> +	/* Begin initialization */
>>> +	pci_write_reg(pcie, 0, PCIETCTLR);
>>> +
>>> +	/* Set mode */
>>> +	pci_write_reg(pcie, 1, PCIEMSR);
>>> +
>>> +	/*
>>> +	 * Initial header for port config space is type 1, set the device
>>> +	 * class to match. Hardware takes care of propagating the IDSETR
>>> +	 * settings, so there is no need to bother with a quirk.
>>> +	 */
>>> +	pci_write_reg(pcie, PCI_CLASS_BRIDGE_PCI << 16, IDSETR1);

>>      Hm, shouldn't this be a host bridge? I've noticed that the bridge's I/O
>> and memory base/limit registers are left uninitialized even though the BARs
>> of the PICe devices behind this bridge are assigned.

> No, I am pretty sure this is correct.

    It just looks strange. What you actually have is clearly a host-to-PCI 
bridge. Instead you have one "virtual" PCI bus consisting of only PCI-PCI 
bridge device, and the real PCIe bus hanging from the PCI-PCI bridge. Weird...

[...]

>>> +
>>> +	/* Terminate list of capabilities (Next Capability Offset=0) */
>>> +	rcar_rmw32(pcie, RVCCAP(0), 0xfff0, 0);
>>> +
>>> +	/* Enable MAC data scrambling. */

    I wonder what does MAC mean in the PCIe context...

>>> +	rcar_rmw32(pcie, MACCTLR, SCRAMBLE_DISABLE, 0);

>>      Doesn't the comment contradict the code here?

> No, the rmw32 function is read, modify, write and the SCRAMBLE_DISABLE shown
> here is the mask, not the value. If the last arg was 1, the call would set the
> scramble disable bit to 1.

    Ah, missed that, sorry.

> Anyway, scrambling is enabled by default in the HW, so I'll remove this.

    OK.

>>> +
>>> +	/* Finish initialization - establish a PCI Express link */
>>> +	pci_write_reg(pcie, CFINIT, PCIETCTLR);
>>> +
>>> +	/* This will timeout if we don't have a link. */
>>> +	err = rcar_pcie_wait_for_dl(pcie);
>>> +	if (err)
>>> +		return err;
>>> +
>>> +	/* Enable INTx interrupts */
>>> +	rcar_rmw32(pcie, PCIEINTXR, 0, 0xF << 8);
>>> +
>>> +	/* Enable slave Bus Mastering */
>>> +	rcar_rmw32(pcie, RCONF(PCI_STATUS), PCI_STATUS_DEVSEL_MASK,
>>> +		PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
>> PCI_COMMAND_MASTER |
>>> +		PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST);

>>      Hmm, you're mixing up PCI control/status registers' bits here; they're
>> two 16-bit registers! So you're writing to 3 reserved LSBs of the PCI status
>> register...

    ... and therefore not writing these bits to the PCI command (not control, 
sorry) register. Perhaps because of that PCI-PCI bridge remains inactive...

> The mask arg should make sure that we don't write to reserved bits. However,

    Look at rcar_rmw32() again -- it doesn't really do that.

> the bits & mask combination is clearly wrong & I'll look at this.
> Somewhere along the line, the use of the mask arg to the rcar_rmw32 function
> has clearly gone astray. I checked all the rmw calls and found another couple
> that are wrong.

    OK, please fix those.

[...]

>>> +static int rcar_pcie_probe(struct platform_device *pdev)
>>> +{
>>> +	struct rcar_pcie *pcie;
>>> +	unsigned int data;
>>> +	struct of_pci_range range;
>>> +	struct of_pci_range_parser parser;
>>> +	const struct of_device_id *of_id;
>>> +	int err, win = 0;
>>> +	int (*hw_init_fn)(struct rcar_pcie *);
>>> +
>>> +	pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
>>> +	if (!pcie)
>>> +		return -ENOMEM;
>>> +
>>> +	pcie->dev = &pdev->dev;
>>> +	platform_set_drvdata(pdev, pcie);
>>> +
>>> +	/* Get the bus range */
>>> +	if (of_pci_parse_bus_range(pdev->dev.of_node, &pcie->busn)) {
>>> +		dev_err(&pdev->dev, "failed to parse bus-range
>> property\n");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	if (of_pci_range_parser_init(&parser, pdev->dev.of_node)) {
>>> +		dev_err(&pdev->dev, "missing ranges property\n");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	err = rcar_pcie_get_resources(pdev, pcie);
>>> +	if (err < 0) {
>>> +		dev_err(&pdev->dev, "failed to request resources: %d\n",
>> err);
>>> +		return err;
>>> +	}
>>> +
>>> +	for_each_of_pci_range(&parser, &range) {
>>> +		of_pci_range_to_resource(&range, pdev->dev.of_node,
>>> +						&pcie->res[win++]);

>>      This function call is probably no good here as it fetches into the 'start'
>> field of a 'struct resource' a CPU address instead of a PCI address...

> No, the ranges describe the CPU addresses of the PCI memory and I/O regions, so
> this is correct.

    The problem actually is that you need to remember both CPU and PCI 
addresses, so 'struct of_pci_range' looks more fitting here...

>>> +
>>> +		if (win > PCI_MAX_RESOURCES)
>>> +			break;
>>> +	}
>>> +
>>> +	 err = rcar_pcie_parse_map_dma_ranges(pcie, pdev->dev.of_node);
>>> +	 if (err)
>>> +		return err;
>>> +
>>> +	of_id = of_match_device(rcar_pcie_of_match, pcie->dev);
>>> +	if (!of_id || !of_id->data)
>>> +		return -EINVAL;
>>> +	hw_init_fn = of_id->data;
>>> +
>>> +	/* Failure to get a link might just be that no cards are inserted */
>>> +	err = hw_init_fn(pcie);
>>> +	if (err) {
>>> +		dev_info(&pdev->dev, "PCIe link down\n");
>>> +		return 0;

>>      Not quite sure why you exit normally here without enabling the hardware.
>> I think the internal bridge should be visible regardless of whether link is
>> detected or not...

> Why would you want to see the bridge when you can do nothing with it? Aren't

    Because it's the way PCI works. You have the built-in devices always 
present and seen on a PCI bus. :-)

> you are just wasting resources?

    I think it's rather you who are wasting resources. ;-) Why not just fail 
the probe when you have no link?

WBR, Sergei
Phil Edworthy June 24, 2014, 10:01 a.m. UTC | #5
Hi Sergei,

On 23 June 2014 22:11, Sergei wrote:
> On 06/23/2014 08:44 PM, Phil Edworthy wrote:
> 
>>>      I'm investigating an imprecise external abort occurring once userland is
>>> started when I have NetMos
> 
>     Or is it MosChip now? Can't remember all their renames. :-)
Do you know of somewhere I can buy a card with this chipset in the EU? I had a
quick search but couldn't find anything.

[...]
>>>> +static void pci_write_reg(struct rcar_pcie *pcie, unsigned long val,
>>>> +			  unsigned long reg)
>>>> +{
>>>> +	writel(val, pcie->base + reg);
>>>> +}
>>>> +
>>>> +static unsigned long pci_read_reg(struct rcar_pcie *pcie, unsigned long
>>> reg)
>>>> +{
>>>> +	return readl(pcie->base + reg);
>>>> +}
> 
>>>      As a side note, these functions are hardly needed, and risk collision
> too...
> 
>> Ben mentioned this in his review and as I said then, I found them useful
> during
>> development, so we agreed to leave them. Since they are static, there
> shouldn't
>> be a collision risk.
> 
>     You're risking clashes even at the source level, not even at object file
> level...
Ah, yes you are correct. 


>>>> +static void rcar_pcie_setup_window(int win, struct resource *res,
>>>> +				   struct rcar_pcie *pcie)
> 
>>>      As a side note, 'res' parameter is hardly needed here, as the function
>>> always gets
>>> called with the resources contained within 'struct rcar_pcie'...
> 
>> Either I would have to pass an index to the resource in,
> 
>     But you already do pass it, 'win' is the index!
> 
>> or as I have done, a
>> pointer to the individual resource. I found it cleaner to pass the pointer.
> 
>     You're actually pass excess parameters, both the index and the pointer.
Ha, yes I didn't notice that :)

[...]
>>>> +
>>>> +	/* First resource is for IO */
>>>> +	mask = PAR_ENABLE;
>>>> +	if (res->flags & IORESOURCE_IO)
>>>> +		mask |= IO_SPACE;
> 
>>>      For the memory space this works OK as you're identity-mapping the
>>> memory
>>> ranges in your device trees. However, for the I/O space this means that it
>>> won't work as the BARs in the PCIe devices get programmed with the PCI
> bus
>>> addresses but the PCIe window translation register is programmed with a
>>> CPU
>>> address which don't at all match (given your device trees) and hence one
>>> can't
>>> access the card's I/O mapped registers at all...
> 
>> Hmm, I couldn't find any cards that supported I/O, so I wasn't able to test
>> this. Clearly this is an issue that needs looking into.
> 
>     Will you look into it then, or should I?
I'll look at it.

>>>> +
>>>> +	pci_write_reg(pcie, mask, PCIEPTCTLR(win));
>>>> +}
>>>> +
>>>> +static int rcar_pcie_setup(int nr, struct pci_sys_data *sys)
>>>> +{
>>>> +	struct rcar_pcie *pcie = sys_to_pcie(sys);
>>>> +	struct resource *res;
>>>> +	int i;
>>>> +
>>>> +	pcie->root_bus_nr = -1;
>>>> +
>>>> +	/* Setup PCI resources */
>>>> +	for (i = 0; i < PCI_MAX_RESOURCES; i++) {
>>>> +
>>>> +		res = &pcie->res[i];
>>>> +		if (!res->flags)
>>>> +			continue;
>>>> +
>>>> +		rcar_pcie_setup_window(i, res, pcie);
>>>> +
>>>> +		if (res->flags & IORESOURCE_IO)
>>>> +			pci_ioremap_io(nr * SZ_64K, res->start);
> 
>>>     I'm not sure why are you not calling pci_add_resource() for I/O space...
> 
>     Sorry, did you reply to that?
I used the tegra driver to inform on what I should do for this. This doesn't
call pci_add_resource() for I/O space either. However, I also see that other
drivers do call this. I think the simplest thing is for me to get a card that
supports I/O space and properly test it.

>>> Also, this sets up only 64 KiB of I/O ports while your device tree describes
>>> I/O space 1 MiB is size.
> 
>> This driver should be able to cope with multiple host controllers, so each
>> allocated 64KiB for I/O. 64KiB is all you need for I/O, but the R-Car PCIe
>> hardware has a 1MiB region (the smallest one) that can only be used for
> one
>> type of PCIe access.
> 
[...]
>>>> +
>>>> +	/* Finish initialization - establish a PCI Express link */
>>>> +	pci_write_reg(pcie, CFINIT, PCIETCTLR);
>>>> +
>>>> +	/* This will timeout if we don't have a link. */
>>>> +	err = rcar_pcie_wait_for_dl(pcie);
>>>> +	if (err)
>>>> +		return err;
>>>> +
>>>> +	/* Enable INTx interrupts */
>>>> +	rcar_rmw32(pcie, PCIEINTXR, 0, 0xF << 8);
>>>> +
>>>> +	/* Enable slave Bus Mastering */
>>>> +	rcar_rmw32(pcie, RCONF(PCI_STATUS), PCI_STATUS_DEVSEL_MASK,
>>>> +		PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
>>> PCI_COMMAND_MASTER |
>>>> +		PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST);
> 
>>>      Hmm, you're mixing up PCI control/status registers' bits here; they're
>>> two 16-bit registers! So you're writing to 3 reserved LSBs of the PCI status
>>> register...
> 
>     ... and therefore not writing these bits to the PCI command (not control,
> sorry) register. Perhaps because of that PCI-PCI bridge remains inactive...
> 
>> The mask arg should make sure that we don't write to reserved bits.
> However,
> 
>     Look at rcar_rmw32() again -- it doesn't really do that.
Yeah, that's why I said it should, not does. It only clears those bits in the
register's current value.

[...]
>>>> +static int rcar_pcie_probe(struct platform_device *pdev)
>>>> +{
>>>> +	struct rcar_pcie *pcie;
>>>> +	unsigned int data;
>>>> +	struct of_pci_range range;
>>>> +	struct of_pci_range_parser parser;
>>>> +	const struct of_device_id *of_id;
>>>> +	int err, win = 0;
>>>> +	int (*hw_init_fn)(struct rcar_pcie *);
>>>> +
>>>> +	pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
>>>> +	if (!pcie)
>>>> +		return -ENOMEM;
>>>> +
>>>> +	pcie->dev = &pdev->dev;
>>>> +	platform_set_drvdata(pdev, pcie);
>>>> +
>>>> +	/* Get the bus range */
>>>> +	if (of_pci_parse_bus_range(pdev->dev.of_node, &pcie->busn)) {
>>>> +		dev_err(&pdev->dev, "failed to parse bus-range
>>> property\n");
>>>> +		return -EINVAL;
>>>> +	}
>>>> +
>>>> +	if (of_pci_range_parser_init(&parser, pdev->dev.of_node)) {
>>>> +		dev_err(&pdev->dev, "missing ranges property\n");
>>>> +		return -EINVAL;
>>>> +	}
>>>> +
>>>> +	err = rcar_pcie_get_resources(pdev, pcie);
>>>> +	if (err < 0) {
>>>> +		dev_err(&pdev->dev, "failed to request resources: %d\n",
>>> err);
>>>> +		return err;
>>>> +	}
>>>> +
>>>> +	for_each_of_pci_range(&parser, &range) {
>>>> +		of_pci_range_to_resource(&range, pdev->dev.of_node,
>>>> +						&pcie->res[win++]);
> 
>>>      This function call is probably no good here as it fetches into the 'start'
>>> field of a 'struct resource' a CPU address instead of a PCI address...
> 
>> No, the ranges describe the CPU addresses of the PCI memory and I/O
> regions, so
>> this is correct.
> 
>     The problem actually is that you need to remember both CPU and PCI
> addresses, so 'struct of_pci_range' looks more fitting here...
Right, I see... this is rather a mess with all the host drivers!

>>>> +
>>>> +		if (win > PCI_MAX_RESOURCES)
>>>> +			break;
>>>> +	}
>>>> +
>>>> +	 err = rcar_pcie_parse_map_dma_ranges(pcie, pdev->dev.of_node);
>>>> +	 if (err)
>>>> +		return err;
>>>> +
>>>> +	of_id = of_match_device(rcar_pcie_of_match, pcie->dev);
>>>> +	if (!of_id || !of_id->data)
>>>> +		return -EINVAL;
>>>> +	hw_init_fn = of_id->data;
>>>> +
>>>> +	/* Failure to get a link might just be that no cards are inserted */
>>>> +	err = hw_init_fn(pcie);
>>>> +	if (err) {
>>>> +		dev_info(&pdev->dev, "PCIe link down\n");
>>>> +		return 0;
> 
>>>      Not quite sure why you exit normally here without enabling the
> hardware.
>>> I think the internal bridge should be visible regardless of whether link is
>>> detected or not...
> 
>> Why would you want to see the bridge when you can do nothing with it?
> Aren't
> 
>     Because it's the way PCI works. You have the built-in devices always
> present and seen on a PCI bus. :-)
> 
>> you are just wasting resources?
> 
>     I think it's rather you who are wasting resources. ;-) Why not just fail
> the probe when you have no link?
Ah, so we currently have a half-way house... not failing the probe, but not
enabling the HW. Either all or nothing would be preferable.

Thanks
Phil
Sergei Shtylyov June 24, 2014, 9:19 p.m. UTC | #6
Hello.

On 06/24/2014 02:01 PM, Phil Edworthy wrote:

>>>>       I'm investigating an imprecise external abort occurring once userland is
>>>> started when I have NetMos

>>      Or is it MosChip now? Can't remember all their renames. :-)

> Do you know of somewhere I can buy a card with this chipset in the EU? I had a
> quick search but couldn't find anything.

    No. But we probably can send such card to you.

[...]

>>>>> +
>>>>> +	/* First resource is for IO */
>>>>> +	mask = PAR_ENABLE;
>>>>> +	if (res->flags & IORESOURCE_IO)
>>>>> +		mask |= IO_SPACE;

>>>>       For the memory space this works OK as you're identity-mapping the
>>>> memory
>>>> ranges in your device trees. However, for the I/O space this means that it
>>>> won't work as the BARs in the PCIe devices get programmed with the PCI bus
>>>> addresses but the PCIe window translation register is programmed with a
>>>> CPU
>>>> address which don't at all match (given your device trees) and hence one
>>>> can't
>>>> access the card's I/O mapped registers at all...

>>> Hmm, I couldn't find any cards that supported I/O, so I wasn't able to test
>>> this. Clearly this is an issue that needs looking into.

>>      Will you look into it then, or should I?

> I'll look at it.

    Thanks.

[...]

>>>>> +
>>>>> +		if (win > PCI_MAX_RESOURCES)
>>>>> +			break;
>>>>> +	}
>>>>> +
>>>>> +	 err = rcar_pcie_parse_map_dma_ranges(pcie, pdev->dev.of_node);
>>>>> +	 if (err)
>>>>> +		return err;
>>>>> +
>>>>> +	of_id = of_match_device(rcar_pcie_of_match, pcie->dev);
>>>>> +	if (!of_id || !of_id->data)
>>>>> +		return -EINVAL;
>>>>> +	hw_init_fn = of_id->data;
>>>>> +
>>>>> +	/* Failure to get a link might just be that no cards are inserted */
>>>>> +	err = hw_init_fn(pcie);
>>>>> +	if (err) {
>>>>> +		dev_info(&pdev->dev, "PCIe link down\n");
>>>>> +		return 0;

>>>>       Not quite sure why you exit normally here without enabling the
>>>> hardware.
>>>> I think the internal bridge should be visible regardless of whether link is
>>>> detected or not...

>>> Why would you want to see the bridge when you can do nothing with it?
>> Aren't

>>      Because it's the way PCI works. You have the built-in devices always
>> present and seen on a PCI bus. :-)

>>> you are just wasting resources?

>>      I think it's rather you who are wasting resources. ;-) Why not just fail
>> the probe when you have no link?

> Ah, so we currently have a half-way house... not failing the probe, but not
> enabling the HW. Either all or nothing would be preferable.

    Actually, I tried ignoring the link test and the kernel died horrible 
death without any console output. :-/ Having enabled the earlyprintk support, 
I was able to see the reason: panic("PCI: unable to scan bus!") in 
arch/arm/kernel/bios32.c...

> Thanks
> Phil

WBR, Sergei
Phil Edworthy June 27, 2014, 4:40 p.m. UTC | #7
Hi Sergei,

On 24 June 2014 22:19, Sergei wrote:
> On 06/24/2014 02:01 PM, Phil Edworthy wrote:
> 
> >>>>       I'm investigating an imprecise external abort occurring once userland
> is
> >>>> started when I have NetMos
> 
> >>      Or is it MosChip now? Can't remember all their renames. :-)
> 
> > Do you know of somewhere I can buy a card with this chipset in the EU? I
> had a
> > quick search but couldn't find anything.
> 
>     No. But we probably can send such card to you.
That would be handy!

 
> [...]
> 
> >>>>> +
> >>>>> +	/* First resource is for IO */
> >>>>> +	mask = PAR_ENABLE;
> >>>>> +	if (res->flags & IORESOURCE_IO)
> >>>>> +		mask |= IO_SPACE;
> 
> >>>>       For the memory space this works OK as you're identity-mapping the
> >>>> memory
> >>>> ranges in your device trees. However, for the I/O space this means that
> it
> >>>> won't work as the BARs in the PCIe devices get programmed with the
> PCI bus
> >>>> addresses but the PCIe window translation register is programmed
> with a
> >>>> CPU
> >>>> address which don't at all match (given your device trees) and hence
> one
> >>>> can't
> >>>> access the card's I/O mapped registers at all...
> 
> >>> Hmm, I couldn't find any cards that supported I/O, so I wasn't able to
> test
> >>> this. Clearly this is an issue that needs looking into.
> 
> >>      Will you look into it then, or should I?
> 
> > I'll look at it.
> 
>     Thanks.
> 
> [...]
> 
> >>>>> +
> >>>>> +		if (win > PCI_MAX_RESOURCES)
> >>>>> +			break;
> >>>>> +	}
> >>>>> +
> >>>>> +	 err = rcar_pcie_parse_map_dma_ranges(pcie, pdev-
> >dev.of_node);
> >>>>> +	 if (err)
> >>>>> +		return err;
> >>>>> +
> >>>>> +	of_id = of_match_device(rcar_pcie_of_match, pcie->dev);
> >>>>> +	if (!of_id || !of_id->data)
> >>>>> +		return -EINVAL;
> >>>>> +	hw_init_fn = of_id->data;
> >>>>> +
> >>>>> +	/* Failure to get a link might just be that no cards are inserted
> */
> >>>>> +	err = hw_init_fn(pcie);
> >>>>> +	if (err) {
> >>>>> +		dev_info(&pdev->dev, "PCIe link down\n");
> >>>>> +		return 0;
> 
> >>>>       Not quite sure why you exit normally here without enabling the
> >>>> hardware.
> >>>> I think the internal bridge should be visible regardless of whether link is
> >>>> detected or not...
> 
> >>> Why would you want to see the bridge when you can do nothing with it?
> >> Aren't
> 
> >>      Because it's the way PCI works. You have the built-in devices always
> >> present and seen on a PCI bus. :-)
> 
> >>> you are just wasting resources?
> 
> >>      I think it's rather you who are wasting resources. ;-) Why not just fail
> >> the probe when you have no link?
> 
> > Ah, so we currently have a half-way house... not failing the probe, but not
> > enabling the HW. Either all or nothing would be preferable.
> 
>     Actually, I tried ignoring the link test and the kernel died horrible
> death without any console output. :-/ Having enabled the earlyprintk
> support,
> I was able to see the reason: panic("PCI: unable to scan bus!") in
> arch/arm/kernel/bios32.c...
> 
> > Thanks
> > Phil
> 
> WBR, Sergei

Thanks
Phil
diff mbox

Patch

diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index a6f67ec..24d290d 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -33,4 +33,10 @@  config PCI_RCAR_GEN2
 	  There are 3 internal PCI controllers available with a single
 	  built-in EHCI/OHCI host controller present on each one.
 
+config PCI_RCAR_GEN2_PCIE
+	bool "Renesas R-Car PCIe controller"
+	depends on ARCH_SHMOBILE || (ARM && COMPILE_TEST)
+	help
+	  Say Y here if you want PCIe controller support on R-Car Gen2 SoCs.
+
 endmenu
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index 13fb333..19946f9 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -4,3 +4,4 @@  obj-$(CONFIG_PCI_IMX6) += pci-imx6.o
 obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
 obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o
 obj-$(CONFIG_PCI_RCAR_GEN2) += pci-rcar-gen2.o
+obj-$(CONFIG_PCI_RCAR_GEN2_PCIE) += pcie-rcar.o
diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
new file mode 100644
index 0000000..3c524b9
--- /dev/null
+++ b/drivers/pci/host/pcie-rcar.c
@@ -0,0 +1,768 @@ 
+/*
+ * PCIe driver for Renesas R-Car SoCs
+ *  Copyright (C) 2014 Renesas Electronics Europe Ltd
+ *
+ * Based on:
+ *  arch/sh/drivers/pci/pcie-sh7786.c
+ *  arch/sh/drivers/pci/ops-sh7786.c
+ *  Copyright (C) 2009 - 2011  Paul Mundt
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define DRV_NAME "rcar-pcie"
+
+#define PCIECAR			0x000010
+#define PCIECCTLR		0x000018
+#define  CONFIG_SEND_ENABLE	(1 << 31)
+#define  TYPE0			(0 << 8)
+#define  TYPE1			(1 << 8)
+#define PCIECDR			0x000020
+#define PCIEMSR			0x000028
+#define PCIEINTXR		0x000400
+
+/* Transfer control */
+#define PCIETCTLR		0x02000
+#define  CFINIT			1
+#define PCIETSTR		0x02004
+#define  DATA_LINK_ACTIVE	1
+#define PCIEERRFR		0x02020
+#define  UNSUPPORTED_REQUEST	(1 << 4)
+
+/* root port address */
+#define PCIEPRAR(x)		(0x02080 + ((x) * 0x4))
+
+/* local address reg & mask */
+#define PCIELAR(x)		(0x02200 + ((x) * 0x20))
+#define PCIELAMR(x)		(0x02208 + ((x) * 0x20))
+#define  LAM_PREFETCH		(1 << 3)
+#define  LAM_64BIT		(1 << 2)
+#define  LAR_ENABLE		(1 << 1)
+
+/* PCIe address reg & mask */
+#define PCIEPARL(x)		(0x03400 + ((x) * 0x20))
+#define PCIEPARH(x)		(0x03404 + ((x) * 0x20))
+#define PCIEPAMR(x)		(0x03408 + ((x) * 0x20))
+#define PCIEPTCTLR(x)		(0x0340c + ((x) * 0x20))
+#define  PAR_ENABLE		(1 << 31)
+#define  IO_SPACE		(1 << 8)
+
+/* Configuration */
+#define PCICONF(x)		(0x010000 + ((x) * 0x4))
+#define PMCAP(x)		(0x010040 + ((x) * 0x4))
+#define EXPCAP(x)		(0x010070 + ((x) * 0x4))
+#define VCCAP(x)		(0x010100 + ((x) * 0x4))
+
+/* link layer */
+#define IDSETR1			0x011004
+#define TLCTLR			0x011048
+#define MACSR			0x011054
+#define MACCTLR			0x011058
+#define  SCRAMBLE_DISABLE	(1 << 27)
+
+/* R-Car H1 PHY */
+#define H1_PCIEPHYADRR		0x04000c
+#define  WRITE_CMD		(1 << 16)
+#define  PHY_ACK		(1 << 24)
+#define  RATE_POS		12
+#define  LANE_POS		8
+#define  ADR_POS		0
+#define H1_PCIEPHYDOUTR		0x040014
+#define H1_PCIEPHYSR		0x040018
+
+#define RCONF(x)	(PCICONF(0)+(x))
+#define RPMCAP(x)	(PMCAP(0)+(x))
+#define REXPCAP(x)	(EXPCAP(0)+(x))
+#define RVCCAP(x)	(VCCAP(0)+(x))
+
+#define  PCIE_CONF_BUS(b)	(((b) & 0xff) << 24)
+#define  PCIE_CONF_DEV(d)	(((d) & 0x1f) << 19)
+#define  PCIE_CONF_FUNC(f)	(((f) & 0x7) << 16)
+
+#define PCI_MAX_RESOURCES 4
+#define MAX_NR_INBOUND_MAPS 6
+
+/* Structure representing the PCIe interface */
+struct rcar_pcie {
+	struct device		*dev;
+	void __iomem		*base;
+	struct resource		res[PCI_MAX_RESOURCES];
+	struct resource		busn;
+	int			root_bus_nr;
+	struct clk		*clk;
+	struct clk		*bus_clk;
+};
+
+static inline struct rcar_pcie *sys_to_pcie(struct pci_sys_data *sys)
+{
+	return sys->private_data;
+}
+
+static void pci_write_reg(struct rcar_pcie *pcie, unsigned long val,
+			  unsigned long reg)
+{
+	writel(val, pcie->base + reg);
+}
+
+static unsigned long pci_read_reg(struct rcar_pcie *pcie, unsigned long reg)
+{
+	return readl(pcie->base + reg);
+}
+
+enum {
+	PCI_ACCESS_READ,
+	PCI_ACCESS_WRITE,
+};
+
+static void rcar_rmw32(struct rcar_pcie *pcie, int where, u32 mask, u32 data)
+{
+	int shift = 8 * (where & 3);
+	u32 val = pci_read_reg(pcie, where & ~3);
+
+	val &= ~(mask << shift);
+	val |= data << shift;
+	pci_write_reg(pcie, val, where & ~3);
+}
+
+static u32 rcar_read_conf(struct rcar_pcie *pcie, int where)
+{
+	int shift = 8 * (where & 3);
+	u32 val = pci_read_reg(pcie, where & ~3);
+
+	return val >> shift;
+}
+
+/* Serialization is provided by 'pci_lock' in drivers/pci/access.c */
+static int rcar_pcie_config_access(struct rcar_pcie *pcie,
+		unsigned char access_type, struct pci_bus *bus,
+		unsigned int devfn, int where, u32 *data)
+{
+	int dev, func, reg, index;
+
+	dev = PCI_SLOT(devfn);
+	func = PCI_FUNC(devfn);
+	reg = where & ~3;
+	index = reg / 4;
+
+	/*
+	 * While each channel has its own memory-mapped extended config
+	 * space, it's generally only accessible when in endpoint mode.
+	 * When in root complex mode, the controller is unable to target
+	 * itself with either type 0 or type 1 accesses, and indeed, any
+	 * controller initiated target transfer to its own config space
+	 * result in a completer abort.
+	 *
+	 * Each channel effectively only supports a single device, but as
+	 * the same channel <-> device access works for any PCI_SLOT()
+	 * value, we cheat a bit here and bind the controller's config
+	 * space to devfn 0 in order to enable self-enumeration. In this
+	 * case the regular ECAR/ECDR path is sidelined and the mangled
+	 * config access itself is initiated as an internal bus transaction.
+	 */
+	if (pci_is_root_bus(bus)) {
+		if (dev != 0)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		if (access_type == PCI_ACCESS_READ) {
+			*data = pci_read_reg(pcie, PCICONF(index));
+		} else {
+			/* Keep an eye out for changes to the root bus number */
+			if (pci_is_root_bus(bus) && (reg == PCI_PRIMARY_BUS))
+				pcie->root_bus_nr = *data & 0xff;
+
+			pci_write_reg(pcie, *data, PCICONF(index));
+		}
+
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	if (pcie->root_bus_nr < 0)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/* Clear errors */
+	pci_write_reg(pcie, pci_read_reg(pcie, PCIEERRFR), PCIEERRFR);
+
+	/* Set the PIO address */
+	pci_write_reg(pcie, PCIE_CONF_BUS(bus->number) | PCIE_CONF_DEV(dev) |
+				PCIE_CONF_FUNC(func) | reg, PCIECAR);
+
+	/* Enable the configuration access */
+	if (bus->parent->number == pcie->root_bus_nr)
+		pci_write_reg(pcie, CONFIG_SEND_ENABLE | TYPE0, PCIECCTLR);
+	else
+		pci_write_reg(pcie, CONFIG_SEND_ENABLE | TYPE1, PCIECCTLR);
+
+	/* Check for errors */
+	if (pci_read_reg(pcie, PCIEERRFR) & UNSUPPORTED_REQUEST)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/* Check for master and target aborts */
+	if (rcar_read_conf(pcie, RCONF(PCI_STATUS)) &
+		(PCI_STATUS_REC_MASTER_ABORT | PCI_STATUS_REC_TARGET_ABORT))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (access_type == PCI_ACCESS_READ)
+		*data = pci_read_reg(pcie, PCIECDR);
+	else
+		pci_write_reg(pcie, *data, PCIECDR);
+
+	/* Disable the configuration access */
+	pci_write_reg(pcie, 0, PCIECCTLR);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int rcar_pcie_read_conf(struct pci_bus *bus, unsigned int devfn,
+			       int where, int size, u32 *val)
+{
+	struct rcar_pcie *pcie = sys_to_pcie(bus->sysdata);
+	int ret;
+
+	if ((size == 2) && (where & 1))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	else if ((size == 4) && (where & 3))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	ret = rcar_pcie_config_access(pcie, PCI_ACCESS_READ,
+				      bus, devfn, where, val);
+	if (ret != PCIBIOS_SUCCESSFUL) {
+		*val = 0xffffffff;
+		return ret;
+	}
+
+	if (size == 1)
+		*val = (*val >> (8 * (where & 3))) & 0xff;
+	else if (size == 2)
+		*val = (*val >> (8 * (where & 2))) & 0xffff;
+
+	dev_dbg(&bus->dev, "pcie-config-read: bus=%3d devfn=0x%04x "
+		"where=0x%04x size=%d val=0x%08lx\n", bus->number,
+		devfn, where, size, (unsigned long)*val);
+
+	return ret;
+}
+
+/* Serialization is provided by 'pci_lock' in drivers/pci/access.c */
+static int rcar_pcie_write_conf(struct pci_bus *bus, unsigned int devfn,
+				int where, int size, u32 val)
+{
+	struct rcar_pcie *pcie = sys_to_pcie(bus->sysdata);
+	int shift, ret;
+	u32 data;
+
+	if ((size == 2) && (where & 1))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	else if ((size == 4) && (where & 3))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	ret = rcar_pcie_config_access(pcie, PCI_ACCESS_READ,
+				      bus, devfn, where, &data);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		return ret;
+
+	dev_dbg(&bus->dev, "pcie-config-write: bus=%3d devfn=0x%04x "
+		"where=0x%04x size=%d val=0x%08lx\n", bus->number,
+		devfn, where, size, (unsigned long)val);
+
+	if (size == 1) {
+		shift = 8 * (where & 3);
+		data &= ~(0xff << shift);
+		data |= ((val & 0xff) << shift);
+	} else if (size == 2) {
+		shift = 8 * (where & 2);
+		data &= ~(0xffff << shift);
+		data |= ((val & 0xffff) << shift);
+	} else
+		data = val;
+
+	ret = rcar_pcie_config_access(pcie, PCI_ACCESS_WRITE,
+				      bus, devfn, where, &data);
+
+	return ret;
+}
+
+static struct pci_ops rcar_pcie_ops = {
+	.read	= rcar_pcie_read_conf,
+	.write	= rcar_pcie_write_conf,
+};
+
+static void rcar_pcie_setup_window(int win, struct resource *res,
+				   struct rcar_pcie *pcie)
+{
+	/* Setup PCIe address space mappings for each resource */
+	resource_size_t size;
+	u32 mask;
+
+	pci_write_reg(pcie, 0x00000000, PCIEPTCTLR(win));
+
+	/*
+	 * The PAMR mask is calculated in units of 128Bytes, which
+	 * keeps things pretty simple.
+	 */
+	size = resource_size(res);
+	mask = (roundup_pow_of_two(size) / SZ_128) - 1;
+	pci_write_reg(pcie, mask << 7, PCIEPAMR(win));
+
+	pci_write_reg(pcie, upper_32_bits(res->start), PCIEPARH(win));
+	pci_write_reg(pcie, lower_32_bits(res->start), PCIEPARL(win));
+
+	/* First resource is for IO */
+	mask = PAR_ENABLE;
+	if (res->flags & IORESOURCE_IO)
+		mask |= IO_SPACE;
+
+	pci_write_reg(pcie, mask, PCIEPTCTLR(win));
+}
+
+static int rcar_pcie_setup(int nr, struct pci_sys_data *sys)
+{
+	struct rcar_pcie *pcie = sys_to_pcie(sys);
+	struct resource *res;
+	int i;
+
+	pcie->root_bus_nr = -1;
+
+	/* Setup PCI resources */
+	for (i = 0; i < PCI_MAX_RESOURCES; i++) {
+
+		res = &pcie->res[i];
+		if (!res->flags)
+			continue;
+
+		rcar_pcie_setup_window(i, res, pcie);
+
+		if (res->flags & IORESOURCE_IO)
+			pci_ioremap_io(nr * SZ_64K, res->start);
+		else
+			pci_add_resource(&sys->resources, res);
+	}
+	pci_add_resource(&sys->resources, &pcie->busn);
+
+	return 1;
+}
+
+struct hw_pci rcar_pci = {
+	.setup          = rcar_pcie_setup,
+	.map_irq        = of_irq_parse_and_map_pci,
+	.ops            = &rcar_pcie_ops,
+};
+
+static void rcar_pcie_enable(struct rcar_pcie *pcie)
+{
+	struct platform_device *pdev = to_platform_device(pcie->dev);
+
+	rcar_pci.nr_controllers = 1;
+	rcar_pci.private_data = (void **)&pcie;
+
+	pci_common_init_dev(&pdev->dev, &rcar_pci);
+#ifdef CONFIG_PCI_DOMAINS
+	rcar_pci.domain++;
+#endif
+}
+
+static int phy_wait_for_ack(struct rcar_pcie *pcie)
+{
+	unsigned int timeout = 100;
+
+	while (timeout--) {
+		if (pci_read_reg(pcie, H1_PCIEPHYADRR) & PHY_ACK)
+			return 0;
+
+		udelay(100);
+	}
+
+	dev_err(pcie->dev, "Access to PCIe phy timed out\n");
+
+	return -ETIMEDOUT;
+}
+
+static void phy_write_reg(struct rcar_pcie *pcie,
+				 unsigned int rate, unsigned int addr,
+				 unsigned int lane, unsigned int data)
+{
+	unsigned long phyaddr;
+
+	phyaddr = WRITE_CMD |
+		((rate & 1) << RATE_POS) |
+		((lane & 0xf) << LANE_POS) |
+		((addr & 0xff) << ADR_POS);
+
+	/* Set write data */
+	pci_write_reg(pcie, data, H1_PCIEPHYDOUTR);
+	pci_write_reg(pcie, phyaddr, H1_PCIEPHYADRR);
+
+	/* Ignore errors as they will be dealt with if the data link is down */
+	phy_wait_for_ack(pcie);
+
+	/* Clear command */
+	pci_write_reg(pcie, 0, H1_PCIEPHYDOUTR);
+	pci_write_reg(pcie, 0, H1_PCIEPHYADRR);
+
+	/* Ignore errors as they will be dealt with if the data link is down */
+	phy_wait_for_ack(pcie);
+}
+
+static int rcar_pcie_wait_for_dl(struct rcar_pcie *pcie)
+{
+	unsigned int timeout = 10;
+
+	while (timeout--) {
+		if ((pci_read_reg(pcie, PCIETSTR) & DATA_LINK_ACTIVE))
+			return 0;
+
+		msleep(5);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int rcar_pcie_hw_init(struct rcar_pcie *pcie)
+{
+	int err;
+
+	/* Begin initialization */
+	pci_write_reg(pcie, 0, PCIETCTLR);
+
+	/* Set mode */
+	pci_write_reg(pcie, 1, PCIEMSR);
+
+	/*
+	 * Initial header for port config space is type 1, set the device
+	 * class to match. Hardware takes care of propagating the IDSETR
+	 * settings, so there is no need to bother with a quirk.
+	 */
+	pci_write_reg(pcie, PCI_CLASS_BRIDGE_PCI << 16, IDSETR1);
+
+	/*
+	 * Setup Secondary Bus Number & Subordinate Bus Number, even though
+	 * they aren't used, to avoid bridge being detected as broken.
+	 */
+	rcar_rmw32(pcie, RCONF(PCI_SECONDARY_BUS), 0xff, 1);
+	rcar_rmw32(pcie, RCONF(PCI_SUBORDINATE_BUS), 0xff, 1);
+
+	/* Initialize default capabilities. */
+	rcar_rmw32(pcie, REXPCAP(0), 0, PCI_CAP_ID_EXP);
+	rcar_rmw32(pcie, REXPCAP(PCI_EXP_FLAGS),
+		PCI_EXP_FLAGS_TYPE, PCI_EXP_TYPE_ROOT_PORT << 4);
+	rcar_rmw32(pcie, RCONF(PCI_HEADER_TYPE), 0x7f,
+		PCI_HEADER_TYPE_BRIDGE);
+
+	/* Enable data link layer active state reporting */
+	rcar_rmw32(pcie, REXPCAP(PCI_EXP_LNKCAP), 0, PCI_EXP_LNKCAP_DLLLARC);
+
+	/* Write out the physical slot number = 0 */
+	rcar_rmw32(pcie, REXPCAP(PCI_EXP_SLTCAP), PCI_EXP_SLTCAP_PSN, 0);
+
+	/* Set the completion timer timeout to the maximum 50ms. */
+	rcar_rmw32(pcie, TLCTLR+1, 0x3f, 50);
+
+	/* Terminate list of capabilities (Next Capability Offset=0) */
+	rcar_rmw32(pcie, RVCCAP(0), 0xfff0, 0);
+
+	/* Enable MAC data scrambling. */
+	rcar_rmw32(pcie, MACCTLR, SCRAMBLE_DISABLE, 0);
+
+	/* Finish initialization - establish a PCI Express link */
+	pci_write_reg(pcie, CFINIT, PCIETCTLR);
+
+	/* This will timeout if we don't have a link. */
+	err = rcar_pcie_wait_for_dl(pcie);
+	if (err)
+		return err;
+
+	/* Enable INTx interrupts */
+	rcar_rmw32(pcie, PCIEINTXR, 0, 0xF << 8);
+
+	/* Enable slave Bus Mastering */
+	rcar_rmw32(pcie, RCONF(PCI_STATUS), PCI_STATUS_DEVSEL_MASK,
+		PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
+		PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST);
+
+	wmb();
+
+	return 0;
+}
+
+static int rcar_pcie_hw_init_h1(struct rcar_pcie *pcie)
+{
+	unsigned int timeout = 10;
+
+	/* Initialize the phy */
+	phy_write_reg(pcie, 0, 0x42, 0x1, 0x0EC34191);
+	phy_write_reg(pcie, 1, 0x42, 0x1, 0x0EC34180);
+	phy_write_reg(pcie, 0, 0x43, 0x1, 0x00210188);
+	phy_write_reg(pcie, 1, 0x43, 0x1, 0x00210188);
+	phy_write_reg(pcie, 0, 0x44, 0x1, 0x015C0014);
+	phy_write_reg(pcie, 1, 0x44, 0x1, 0x015C0014);
+	phy_write_reg(pcie, 1, 0x4C, 0x1, 0x786174A0);
+	phy_write_reg(pcie, 1, 0x4D, 0x1, 0x048000BB);
+	phy_write_reg(pcie, 0, 0x51, 0x1, 0x079EC062);
+	phy_write_reg(pcie, 0, 0x52, 0x1, 0x20000000);
+	phy_write_reg(pcie, 1, 0x52, 0x1, 0x20000000);
+	phy_write_reg(pcie, 1, 0x56, 0x1, 0x00003806);
+
+	phy_write_reg(pcie, 0, 0x60, 0x1, 0x004B03A5);
+	phy_write_reg(pcie, 0, 0x64, 0x1, 0x3F0F1F0F);
+	phy_write_reg(pcie, 0, 0x66, 0x1, 0x00008000);
+
+	while (timeout--) {
+		if (pci_read_reg(pcie, H1_PCIEPHYSR))
+			return rcar_pcie_hw_init(pcie);
+
+		msleep(5);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int rcar_pcie_get_resources(struct platform_device *pdev,
+				   struct rcar_pcie *pcie)
+{
+	struct resource res;
+	int err;
+
+	err = of_address_to_resource(pdev->dev.of_node, 0, &res);
+	if (err)
+		return err;
+
+	pcie->clk = devm_clk_get(&pdev->dev, "pcie");
+	if (IS_ERR(pcie->clk)) {
+		dev_err(pcie->dev, "cannot get platform clock\n");
+		return PTR_ERR(pcie->clk);
+	}
+	err = clk_prepare_enable(pcie->clk);
+	if (err)
+		goto fail_clk;
+
+	pcie->bus_clk = devm_clk_get(&pdev->dev, "pcie_bus");
+	if (IS_ERR(pcie->bus_clk)) {
+		dev_err(pcie->dev, "cannot get pcie bus clock\n");
+		err = PTR_ERR(pcie->bus_clk);
+		goto fail_clk;
+	}
+	err = clk_prepare_enable(pcie->bus_clk);
+	if (err)
+		goto err_map_reg;
+
+	pcie->base = devm_ioremap_resource(&pdev->dev, &res);
+	if (IS_ERR(pcie->base)) {
+		err = PTR_ERR(pcie->base);
+		goto err_map_reg;
+	}
+
+	return 0;
+
+err_map_reg:
+	clk_disable_unprepare(pcie->bus_clk);
+fail_clk:
+	clk_disable_unprepare(pcie->clk);
+
+	return err;
+}
+
+static int rcar_pcie_inbound_ranges(struct rcar_pcie *pcie,
+				    struct of_pci_range *range,
+				    int *index)
+{
+	u64 restype = range->flags;
+	u64 cpu_addr = range->cpu_addr;
+	u64 cpu_end = range->cpu_addr + range->size;
+	u64 pci_addr = range->pci_addr;
+	u32 flags = LAM_64BIT | LAR_ENABLE;
+	u64 mask;
+	u64 size;
+	int idx = *index;
+
+	if (restype & IORESOURCE_PREFETCH)
+		flags |= LAM_PREFETCH;
+
+	/*
+	 * If the size of the range is larger than the alignment of the start
+	 * address, we have to use multiple entries to perform the mapping.
+	 */
+	if (cpu_addr > 0) {
+		unsigned long nr_zeros = __ffs64(cpu_addr);
+		u64 alignment = 1ULL << nr_zeros;
+		size = min(range->size, alignment);
+	} else {
+		size = range->size;
+	}
+	/* Hardware supports max 4GiB inbound region */
+	size = min(size, 1ULL << 32);
+
+	mask = roundup_pow_of_two(size) - 1;
+	mask &= ~0xf;
+
+	while (cpu_addr < cpu_end) {
+		/*
+		 * Set up 64-bit inbound regions as the range parser doesn't
+		 * distinguish between 32 and 64-bit types.
+		 */
+		pci_write_reg(pcie, lower_32_bits(pci_addr), PCIEPRAR(idx));
+		pci_write_reg(pcie, lower_32_bits(cpu_addr), PCIELAR(idx));
+		pci_write_reg(pcie, lower_32_bits(mask) | flags, PCIELAMR(idx));
+
+		pci_write_reg(pcie, upper_32_bits(pci_addr), PCIEPRAR(idx+1));
+		pci_write_reg(pcie, upper_32_bits(cpu_addr), PCIELAR(idx+1));
+		pci_write_reg(pcie, 0, PCIELAMR(idx+1));
+
+		pci_addr += size;
+		cpu_addr += size;
+		idx += 2;
+
+		if (idx > MAX_NR_INBOUND_MAPS) {
+			dev_err(pcie->dev, "Failed to map inbound regions!\n");
+			return -EINVAL;
+		}
+	}
+	*index = idx;
+
+	return 0;
+}
+
+static int pci_dma_range_parser_init(struct of_pci_range_parser *parser,
+				     struct device_node *node)
+{
+	const int na = 3, ns = 2;
+	int rlen;
+
+	parser->node = node;
+	parser->pna = of_n_addr_cells(node);
+	parser->np = parser->pna + na + ns;
+
+	parser->range = of_get_property(node, "dma-ranges", &rlen);
+	if (!parser->range)
+		return -ENOENT;
+
+	parser->end = parser->range + rlen / sizeof(__be32);
+	return 0;
+}
+
+static int rcar_pcie_parse_map_dma_ranges(struct rcar_pcie *pcie,
+					  struct device_node *np)
+{
+	struct of_pci_range range;
+	struct of_pci_range_parser parser;
+	int index = 0;
+	int err;
+
+	if (pci_dma_range_parser_init(&parser, np))
+		return -EINVAL;
+
+	/* Get the dma-ranges from DT */
+	for_each_of_pci_range(&parser, &range) {
+		u64 end = range.cpu_addr + range.size - 1;
+		dev_dbg(pcie->dev, "0x%08x 0x%016llx..0x%016llx -> 0x%016llx\n",
+			range.flags, range.cpu_addr, end, range.pci_addr);
+
+		err = rcar_pcie_inbound_ranges(pcie, &range, &index);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id rcar_pcie_of_match[] = {
+	{ .compatible = "renesas,pcie-r8a7779", .data = rcar_pcie_hw_init_h1 },
+	{ .compatible = "renesas,pcie-r8a7790", .data = rcar_pcie_hw_init },
+	{ .compatible = "renesas,pcie-r8a7791", .data = rcar_pcie_hw_init },
+	{},
+};
+MODULE_DEVICE_TABLE(of, rcar_pcie_of_match);
+
+static int rcar_pcie_probe(struct platform_device *pdev)
+{
+	struct rcar_pcie *pcie;
+	unsigned int data;
+	struct of_pci_range range;
+	struct of_pci_range_parser parser;
+	const struct of_device_id *of_id;
+	int err, win = 0;
+	int (*hw_init_fn)(struct rcar_pcie *);
+
+	pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pcie->dev = &pdev->dev;
+	platform_set_drvdata(pdev, pcie);
+
+	/* Get the bus range */
+	if (of_pci_parse_bus_range(pdev->dev.of_node, &pcie->busn)) {
+		dev_err(&pdev->dev, "failed to parse bus-range property\n");
+		return -EINVAL;
+	}
+
+	if (of_pci_range_parser_init(&parser, pdev->dev.of_node)) {
+		dev_err(&pdev->dev, "missing ranges property\n");
+		return -EINVAL;
+	}
+
+	err = rcar_pcie_get_resources(pdev, pcie);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to request resources: %d\n", err);
+		return err;
+	}
+
+	for_each_of_pci_range(&parser, &range) {
+		of_pci_range_to_resource(&range, pdev->dev.of_node,
+						&pcie->res[win++]);
+
+		if (win > PCI_MAX_RESOURCES)
+			break;
+	}
+
+	 err = rcar_pcie_parse_map_dma_ranges(pcie, pdev->dev.of_node);
+	 if (err)
+		return err;
+
+	of_id = of_match_device(rcar_pcie_of_match, pcie->dev);
+	if (!of_id || !of_id->data)
+		return -EINVAL;
+	hw_init_fn = of_id->data;
+
+	/* Failure to get a link might just be that no cards are inserted */
+	err = hw_init_fn(pcie);
+	if (err) {
+		dev_info(&pdev->dev, "PCIe link down\n");
+		return 0;
+	}
+
+	data = pci_read_reg(pcie, MACSR);
+	dev_info(&pdev->dev, "PCIe x%d: link up\n", (data >> 20) & 0x3f);
+
+	rcar_pcie_enable(pcie);
+
+	return 0;
+}
+
+static struct platform_driver rcar_pcie_driver = {
+	.driver = {
+		.name = DRV_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table = rcar_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = rcar_pcie_probe,
+};
+module_platform_driver(rcar_pcie_driver);
+
+MODULE_AUTHOR("Phil Edworthy <phil.edworthy@renesas.com>");
+MODULE_DESCRIPTION("Renesas R-Car PCIe driver");
+MODULE_LICENSE("GPLv2");