diff mbox series

[v16,19/22] PCI: rcar-gen4: Add R-Car Gen4 PCIe Host support

Message ID 20230510062234.201499-20-yoshihiro.shimoda.uh@renesas.com (mailing list archive)
State Superseded
Delegated to: Geert Uytterhoeven
Headers show
Series PCI: rcar-gen4: Add R-Car Gen4 PCIe support | expand

Commit Message

Yoshihiro Shimoda May 10, 2023, 6:22 a.m. UTC
Add R-Car Gen4 PCIe Host support. This controller is based on
Synopsys DesignWare PCIe, but this controller has vendor-specific
registers so that requires initialization code like mode setting
and retraining and so on.

To reduce code delta, adds some helper functions which are used by
both the host driver and the endpoint driver (which is added
immediately afterwards) into a separate file.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
---
 drivers/pci/controller/dwc/Kconfig            |   9 +
 drivers/pci/controller/dwc/Makefile           |   2 +
 .../pci/controller/dwc/pcie-rcar-gen4-host.c  | 141 +++++++++++++
 drivers/pci/controller/dwc/pcie-rcar-gen4.c   | 190 ++++++++++++++++++
 drivers/pci/controller/dwc/pcie-rcar-gen4.h   |  46 +++++
 5 files changed, 388 insertions(+)
 create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
 create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.c
 create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.h

Comments

Serge Semin June 5, 2023, 2:39 p.m. UTC | #1
On Wed, May 10, 2023 at 03:22:31PM +0900, Yoshihiro Shimoda wrote:
> Add R-Car Gen4 PCIe Host support. This controller is based on
> Synopsys DesignWare PCIe, but this controller has vendor-specific
> registers so that requires initialization code like mode setting
> and retraining and so on.
> 
> To reduce code delta, adds some helper functions which are used by
> both the host driver and the endpoint driver (which is added
> immediately afterwards) into a separate file.
> 
> Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> ---
>  drivers/pci/controller/dwc/Kconfig            |   9 +
>  drivers/pci/controller/dwc/Makefile           |   2 +
>  .../pci/controller/dwc/pcie-rcar-gen4-host.c  | 141 +++++++++++++
>  drivers/pci/controller/dwc/pcie-rcar-gen4.c   | 190 ++++++++++++++++++
>  drivers/pci/controller/dwc/pcie-rcar-gen4.h   |  46 +++++
>  5 files changed, 388 insertions(+)
>  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
>  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.c
>  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.h
> 
> diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
> index ab96da43e0c2..64d4d37bc891 100644
> --- a/drivers/pci/controller/dwc/Kconfig
> +++ b/drivers/pci/controller/dwc/Kconfig
> @@ -415,4 +415,13 @@ config PCIE_VISCONTI_HOST
>  	  Say Y here if you want PCIe controller support on Toshiba Visconti SoC.
>  	  This driver supports TMPV7708 SoC.
>  
> +config PCIE_RCAR_GEN4
> +	tristate "Renesas R-Car Gen4 PCIe Host controller"
> +	depends on ARCH_RENESAS || COMPILE_TEST
> +	depends on PCI_MSI
> +	select PCIE_DW_HOST
> +	help
> +	  Say Y here if you want PCIe host controller support on R-Car Gen4 SoCs.
> +	  This uses the DesignWare core.
> +
>  endmenu
> diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
> index bf5c311875a1..486cf706b53d 100644
> --- a/drivers/pci/controller/dwc/Makefile
> +++ b/drivers/pci/controller/dwc/Makefile
> @@ -26,6 +26,8 @@ obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
>  obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
>  obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
>  obj-$(CONFIG_PCIE_VISCONTI_HOST) += pcie-visconti.o
> +pcie-rcar-gen4-host-drv-objs := pcie-rcar-gen4.o pcie-rcar-gen4-host.o
> +obj-$(CONFIG_PCIE_RCAR_GEN4) += pcie-rcar-gen4-host-drv.o
>  
>  # The following drivers are for devices that use the generic ACPI
>  # pci_root.c driver but don't support standard ECAM config access.
> diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> new file mode 100644
> index 000000000000..df7d80f1874f
> --- /dev/null
> +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> @@ -0,0 +1,141 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * PCIe host controller driver for Renesas R-Car Gen4 Series SoCs
> + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> + */
> +
> +#include <linux/delay.h>
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <linux/of_device.h>
> +#include <linux/pci.h>
> +#include <linux/platform_device.h>
> +
> +#include "pcie-rcar-gen4.h"
> +#include "pcie-designware.h"
> +
> +static int rcar_gen4_pcie_host_init(struct dw_pcie_rp *pp)
> +{
> +	struct dw_pcie *dw = to_dw_pcie_from_pp(pp);
> +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> +	int ret;
> +	u32 val;
> +
> +	gpiod_set_value_cansleep(dw->pe_rst, 1);
> +
> +	ret = rcar_gen4_pcie_set_device_type(rcar, true, dw->num_lanes);
> +	if (ret < 0)
> +		return ret;
> +

> +	dw_pcie_dbi_ro_wr_en(dw);

Are you sure dw_pcie_dbi_ro_wr_en() and dw_pcie_dbi_ro_wr_dis() are
needed? In accordance with the DW PCIe Dual-mode HW manual the BARx
registers are W-only over the DBI2 map with no need in setting the
DBI_RO_WR_EN flag.

Please check that on your hardware.

> +
> +	/*
> +	 * According to the section 3.5.7.2 "RC Mode" in DWC PCIe Dual Mode
> +	 * Rev.5.20a, we should disable two BARs to avoid unnecessary memory
> +	 * assignment during device enumeration.
> +	 */
> +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_0, 0x0);
> +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_1, 0x0);
> +

> +	dw_pcie_dbi_ro_wr_dis(dw);

ditto

> +
> +	if (IS_ENABLED(CONFIG_PCI_MSI)) {
> +		/* Enable MSI interrupt signal */
> +		val = readl(rcar->base + PCIEINTSTS0EN);
> +		val |= MSI_CTRL_INT;
> +		writel(val, rcar->base + PCIEINTSTS0EN);
> +	}
> +
> +	msleep(100);	/* pe_rst requires 100msec delay */
> +
> +	gpiod_set_value_cansleep(dw->pe_rst, 0);
> +
> +	return 0;
> +}
> +
> +static const struct dw_pcie_host_ops rcar_gen4_pcie_host_ops = {
> +	.host_init = rcar_gen4_pcie_host_init,
> +};
> +
> +static int rcar_gen4_add_dw_pcie_rp(struct rcar_gen4_pcie *rcar,
> +				   struct platform_device *pdev)
> +{
> +	struct dw_pcie *dw = &rcar->dw;
> +	struct dw_pcie_rp *pp = &dw->pp;
> +
> +	pp->num_vectors = MAX_MSI_IRQS;
> +	pp->ops = &rcar_gen4_pcie_host_ops;
> +	dw_pcie_cap_set(dw, REQ_RES);
> +
> +	return dw_pcie_host_init(pp);
> +}
> +
> +static void rcar_gen4_remove_dw_pcie_rp(struct rcar_gen4_pcie *rcar)
> +{
> +	dw_pcie_host_deinit(&rcar->dw.pp);
> +	gpiod_set_value_cansleep(rcar->dw.pe_rst, 1);
> +}
> +
> +static int rcar_gen4_pcie_probe(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct rcar_gen4_pcie *rcar;
> +	int err;
> +
> +	rcar = rcar_gen4_pcie_devm_alloc(dev);
> +	if (!rcar)
> +		return -ENOMEM;
> +
> +	err = rcar_gen4_pcie_get_resources(rcar, pdev);
> +	if (err < 0) {
> +		dev_err(dev, "Failed to request resource: %d\n", err);
> +		return err;
> +	}
> +
> +	platform_set_drvdata(pdev, rcar);
> +
> +	err = rcar_gen4_pcie_prepare(rcar);
> +	if (err < 0)
> +		return err;
> +
> +	rcar->needs_retrain = true;
> +	err = rcar_gen4_add_dw_pcie_rp(rcar, pdev);
> +	if (err < 0)
> +		goto err_add;
> +
> +	return 0;
> +
> +err_add:
> +	rcar_gen4_pcie_unprepare(rcar);
> +
> +	return err;
> +}
> +
> +static int rcar_gen4_pcie_remove(struct platform_device *pdev)
> +{
> +	struct rcar_gen4_pcie *rcar = platform_get_drvdata(pdev);
> +
> +	rcar_gen4_remove_dw_pcie_rp(rcar);
> +	rcar_gen4_pcie_unprepare(rcar);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id rcar_gen4_pcie_of_match[] = {
> +	{ .compatible = "renesas,rcar-gen4-pcie", },
> +	{},
> +};
> +
> +static struct platform_driver rcar_gen4_pcie_driver = {
> +	.driver = {
> +		.name = "pcie-rcar-gen4",
> +		.of_match_table = rcar_gen4_pcie_of_match,
> +		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
> +	},
> +	.probe = rcar_gen4_pcie_probe,
> +	.remove = rcar_gen4_pcie_remove,
> +};
> +module_platform_driver(rcar_gen4_pcie_driver);
> +
> +MODULE_DESCRIPTION("Renesas R-Car Gen4 PCIe host controller driver");
> +MODULE_LICENSE("GPL");
> diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.c b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> new file mode 100644
> index 000000000000..35923fda8ed5
> --- /dev/null
> +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> @@ -0,0 +1,190 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> + */
> +
> +#include <linux/delay.h>
> +#include <linux/io.h>
> +#include <linux/of_device.h>
> +#include <linux/pci.h>
> +#include <linux/pm_runtime.h>
> +#include <linux/reset.h>
> +
> +#include "pcie-rcar-gen4.h"
> +#include "pcie-designware.h"
> +
> +/* Renesas-specific */
> +#define PCIERSTCTRL1		0x0014
> +#define  APP_HOLD_PHY_RST	BIT(16)
> +#define  APP_LTSSM_ENABLE	BIT(0)
> +
> +#define RETRAIN_MAX_CHECK	10
> +#define RETRAIN_MAX_RETRIES	10
> +
> +static void rcar_gen4_pcie_ltssm_enable(struct rcar_gen4_pcie *rcar,
> +					bool enable)
> +{
> +	u32 val;
> +
> +	val = readl(rcar->base + PCIERSTCTRL1);
> +	if (enable) {
> +		val |= APP_LTSSM_ENABLE;

> +		val &= ~APP_HOLD_PHY_RST;

What about moving the APP_HOLD_PHY_RST de-assertion to the
rcar_gen4_pcie_set_device_type() method? In accordance with the
"3.1 Initialization" chapter it's supposed to be done before
performing the DBI programming and activating the link training.

> +	} else {
> +		val &= ~APP_LTSSM_ENABLE;
> +		val |= APP_HOLD_PHY_RST;
> +	}
> +	writel(val, rcar->base + PCIERSTCTRL1);
> +}
> +
> +static bool rcar_gen4_pcie_check_retrain_link(struct dw_pcie *dw)
> +{
> +	u8 offset = dw_pcie_find_capability(dw, PCI_CAP_ID_EXP);
> +	u32 lnkcap = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCAP);
> +	u32 lnkctl = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCTL);
> +	u16 lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> +	int i;
> +

> +	if ((lnksta & PCI_EXP_LNKSTA_CLS) == (lnkcap & PCI_EXP_LNKCAP_SLS))
> +		return true;
> +
> +	lnkctl |= PCI_EXP_LNKCTL_RL;
> +	dw_pcie_writel_dbi(dw, offset + PCI_EXP_LNKCTL, lnkctl);
> +
> +	for (i = 0; i < RETRAIN_MAX_CHECK; i++) {
> +		lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> +		if (lnksta & PCI_EXP_LNKSTA_LT)
> +			return true;
> +		usleep_range(1000, 1100);
> +	}

I'll ask one more time because you didn't respond to my previous note
about this. Are you sure that this is needed? Did you try
the approach described in "3.13 Gen2/3/4/5 Speed Modes" with
de-asserting/asserting the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag?
I keep asking because the same problem we used to have on our hardware
until we found out that the DIRECT_SPEED_CHANGE flag helped to train
the link right to the speed specified in the capabilities.

So here is what presumably you'll need to do (based on the
"3.1 Initialization" and "3.13 Gen2/3/4/5 Speed Modes" chapters of
the DW PCIe DM hw-manual):
1. Make sure the controller is in the power-down/reset state.
2. Select device_type (EP or RP).
3. De-assert the controller reset.
4. Clear PHY-reset flag in the app registers.
5. Perform some controller initializations.
6. Enable LTSSM to start link training.
7. Set GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag one more time.

1-4 are supposed to be done in rcar_gen4_pcie_host_init().
5 is performed in the framework of the DW PCIe core driver.
6-7 should be done in rcar_gen4_pcie_start_link().

Note 1. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is already set on stage
5 in the framework of the dw_pcie_setup_rc() method. But in our case
it only caused having the Gen.2 link speed. Adding stage 7 helped to
get stable Gen.3 link. So please try the denoted approach. If it works
what about adding stage 7 twice in order to get Gen.4 speed?
(waiting for the DIRECT_SPEED_CHANGE flag being auto-cleared and then
set it up again?)

Note 2. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is defined as
PCIE_LINK_WIDTH_SPEED_CONTROL.PORT_LOGIC_SPEED_CHANGE macros in the DW
PCIe core driver.

Note 3. If what is suggested above works well then you won't need to
have the heavy rcar_gen4_pcie_check_retrain_link() method in the way
you have it defined.

> +
> +	return false;
> +}
> +
> +static int rcar_gen4_pcie_link_up(struct dw_pcie *dw)
> +{
> +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> +	u32 val, mask;
> +
> +	val = readl(rcar->base + PCIEINTSTS0);
> +	mask = RDLH_LINK_UP | SMLH_LINK_UP;
> +
> +	return (val & mask) == mask;
> +}
> +
> +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> +{
> +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> +	int i;
> +
> +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> +
> +	/*
> +	 * Require retraining here. Otherwise RDLH_LINK_UP of PCIEINTSTS0 which
> +	 * is this controller specific register may not be set.
> +	 */
> +	if (rcar->needs_retrain) {
> +		for (i = 0; i < RETRAIN_MAX_RETRIES; i++) {
> +			if (rcar_gen4_pcie_check_retrain_link(dw))
> +				return 0;
> +			msleep(100);
> +		}
> +
> +		return -ETIMEDOUT;	/* Failed */
> +	}
> +
> +	return 0;
> +}
> +
> +static void rcar_gen4_pcie_stop_link(struct dw_pcie *dw)
> +{
> +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> +
> +	rcar_gen4_pcie_ltssm_enable(rcar, false);
> +}
> +

> +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> +				   int num_lanes)

1. Number of lanes is already defined in the rcar_gen4_pcie.dw.num_lanes field.
What about using it from there instead of passing it as an argument?
2. DW PCIe core driver has a very handy enum defined:
dw_pcie_device_mode. It describes the controller modes (End-point,
Root port, etc). What about adding the mode field right to the
rcar_gen4_pcie structure and initializing it in someplace in probe() ?
3. Based on the function semantic it's better to be named as something
like rcar_gen4_pcie_init_device() or even rcar_gen4_pcie_basic_init().


> +{
> +	u32 val;
> +

> +	/* Note: Assume the rcar->rst which is Cold-reset is asserted here */

What about directly asserting it here then? In accordance with the DW
PCIe DM manual the "device_type" input must be set before the DM
controller is powered up (basically un-reset). What if the controller
reset is already de-asserted, but you are going to changes its mode?
In that case the mode won't be changed and you'll end up with
unpredictable results.

> +	val = readl(rcar->base + PCIEMSR0);
> +	if (rc)
> +		val |= DEVICE_TYPE_RC;
> +	else
> +		val |= DEVICE_TYPE_EP;
> +
> +	if (num_lanes < 4)
> +		val |= BIFUR_MOD_SET_ON;
> +
> +	writel(val, rcar->base + PCIEMSR0);
> +
> +	return reset_control_deassert(rcar->rst);
> +}
> +
> +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *rcar)
> +{
> +	struct device *dev = rcar->dw.dev;
> +	int err;
> +
> +	pm_runtime_enable(dev);
> +	err = pm_runtime_resume_and_get(dev);
> +	if (err < 0) {
> +		dev_err(dev, "Failed to resume/get Runtime PM\n");
> +		pm_runtime_disable(dev);
> +	}
> +
> +	return err;
> +}
> +
> +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *rcar)
> +{
> +	struct device *dev = rcar->dw.dev;
> +
> +	if (!reset_control_status(rcar->rst))
> +		reset_control_assert(rcar->rst);
> +	pm_runtime_put(dev);
> +	pm_runtime_disable(dev);
> +}
> +
> +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> +				 struct platform_device *pdev)
> +{
> +	struct device *dev = rcar->dw.dev;
> +
> +	/* Renesas-specific registers */
> +	rcar->base = devm_platform_ioremap_resource_byname(pdev, "app");
> +	if (IS_ERR(rcar->base))
> +		return PTR_ERR(rcar->base);
> +

> +	rcar->rst = devm_reset_control_get(dev, NULL);
> +	if (IS_ERR(rcar->rst)) {
> +		dev_err(dev, "Failed to get Cold-reset\n");

So AFAICS your platform is equipped with the DWC_pcie_clkrst.v module.
Thus all the resets are appropriately cleared by a single flag:
power_up_rst_n. What about using the named reset in this case with the
"pwr" name? Thus you'll be able to drop the manual
devm_reset_control_get() invocation and instead use the reset-resources
requested in the framework of the generic dw_pcie_get_resources()
method? Note you'll need to move the dw_pcie_cap_set(dw, REQ_RES);
statement to rcar_gen4_pcie_devm_alloc() then and drop the
rcar_gen4_pcie.rst field afterwords.

By the way I don't see you requesting and enabling the reference
clock in your driver but the bindings imply the clock source. How
come?

> +		return PTR_ERR(rcar->rst);
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct dw_pcie_ops dw_pcie_ops = {
> +	.start_link = rcar_gen4_pcie_start_link,
> +	.stop_link = rcar_gen4_pcie_stop_link,
> +	.link_up = rcar_gen4_pcie_link_up,
> +};
> +
> +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev)
> +{
> +	struct rcar_gen4_pcie *rcar;
> +
> +	rcar = devm_kzalloc(dev, sizeof(*rcar), GFP_KERNEL);
> +	if (!rcar)
> +		return NULL;
> +
> +	rcar->dw.dev = dev;
> +	rcar->dw.ops = &dw_pcie_ops;
> +	dw_pcie_cap_set(&rcar->dw, EDMA_UNROLL);
> +
> +	return rcar;
> +}
> diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.h b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> new file mode 100644
> index 000000000000..fec3f18609f4
> --- /dev/null
> +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> + */
> +
> +#ifndef _PCIE_RCAR_GEN4_H_
> +#define _PCIE_RCAR_GEN4_H_
> +
> +#include <linux/io.h>
> +#include <linux/pci.h>
> +#include <linux/reset.h>
> +
> +#include "pcie-designware.h"
> +
> +/* Renesas-specific */
> +#define PCIEMSR0		0x0000
> +#define  BIFUR_MOD_SET_ON	BIT(0)
> +#define  DEVICE_TYPE_EP		0
> +#define  DEVICE_TYPE_RC		BIT(4)
> +
> +#define PCIEINTSTS0		0x0084
> +#define PCIEINTSTS0EN		0x0310
> +#define  MSI_CTRL_INT		BIT(26)
> +#define  SMLH_LINK_UP		BIT(7)
> +#define  RDLH_LINK_UP		BIT(6)
> +#define PCIEDMAINTSTSEN		0x0314
> +#define  PCIEDMAINTSTSEN_INIT	GENMASK(15, 0)
> +

> +struct rcar_gen4_pcie {

As I mentioned above this structure can be extended with the enum
dw_pcie_device_mode field thus dropping the boolean argument from the
rcar_gen4_pcie_set_device_type() method.

> +	struct dw_pcie		dw;

As I already mentioned above the dw.num_lanes could be used instead of
passing it as the rcar_gen4_pcie_set_device_type() argument.

-Serge(y)

> +	void __iomem		*base;
> +	struct reset_control	*rst;
> +	bool			needs_retrain;
> +};
> +#define to_rcar_gen4_pcie(x)	dev_get_drvdata((x)->dev)
> +
> +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> +				   int num_lanes);
> +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *pcie);
> +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *pcie);
> +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> +				 struct platform_device *pdev);
> +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev);
> +
> +#endif /* _PCIE_RCAR_GEN4_H_ */
> -- 
> 2.25.1
>
Yoshihiro Shimoda June 7, 2023, 2:59 a.m. UTC | #2
Hello Serge,

> From: Serge Semin, Sent: Monday, June 5, 2023 11:39 PM
> 
> On Wed, May 10, 2023 at 03:22:31PM +0900, Yoshihiro Shimoda wrote:
> > Add R-Car Gen4 PCIe Host support. This controller is based on
> > Synopsys DesignWare PCIe, but this controller has vendor-specific
> > registers so that requires initialization code like mode setting
> > and retraining and so on.
> >
> > To reduce code delta, adds some helper functions which are used by
> > both the host driver and the endpoint driver (which is added
> > immediately afterwards) into a separate file.
> >
> > Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> > ---
> >  drivers/pci/controller/dwc/Kconfig            |   9 +
> >  drivers/pci/controller/dwc/Makefile           |   2 +
> >  .../pci/controller/dwc/pcie-rcar-gen4-host.c  | 141 +++++++++++++
> >  drivers/pci/controller/dwc/pcie-rcar-gen4.c   | 190 ++++++++++++++++++
> >  drivers/pci/controller/dwc/pcie-rcar-gen4.h   |  46 +++++
> >  5 files changed, 388 insertions(+)
> >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.c
> >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.h
> >
> > diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
> > index ab96da43e0c2..64d4d37bc891 100644
> > --- a/drivers/pci/controller/dwc/Kconfig
> > +++ b/drivers/pci/controller/dwc/Kconfig
> > @@ -415,4 +415,13 @@ config PCIE_VISCONTI_HOST
> >  	  Say Y here if you want PCIe controller support on Toshiba Visconti SoC.
> >  	  This driver supports TMPV7708 SoC.
> >
> > +config PCIE_RCAR_GEN4
> > +	tristate "Renesas R-Car Gen4 PCIe Host controller"
> > +	depends on ARCH_RENESAS || COMPILE_TEST
> > +	depends on PCI_MSI
> > +	select PCIE_DW_HOST
> > +	help
> > +	  Say Y here if you want PCIe host controller support on R-Car Gen4 SoCs.
> > +	  This uses the DesignWare core.
> > +
> >  endmenu
> > diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
> > index bf5c311875a1..486cf706b53d 100644
> > --- a/drivers/pci/controller/dwc/Makefile
> > +++ b/drivers/pci/controller/dwc/Makefile
> > @@ -26,6 +26,8 @@ obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
> >  obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
> >  obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
> >  obj-$(CONFIG_PCIE_VISCONTI_HOST) += pcie-visconti.o
> > +pcie-rcar-gen4-host-drv-objs := pcie-rcar-gen4.o pcie-rcar-gen4-host.o
> > +obj-$(CONFIG_PCIE_RCAR_GEN4) += pcie-rcar-gen4-host-drv.o
> >
> >  # The following drivers are for devices that use the generic ACPI
> >  # pci_root.c driver but don't support standard ECAM config access.
> > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > new file mode 100644
> > index 000000000000..df7d80f1874f
> > --- /dev/null
> > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > @@ -0,0 +1,141 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * PCIe host controller driver for Renesas R-Car Gen4 Series SoCs
> > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > + */
> > +
> > +#include <linux/delay.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/module.h>
> > +#include <linux/of_device.h>
> > +#include <linux/pci.h>
> > +#include <linux/platform_device.h>
> > +
> > +#include "pcie-rcar-gen4.h"
> > +#include "pcie-designware.h"
> > +
> > +static int rcar_gen4_pcie_host_init(struct dw_pcie_rp *pp)
> > +{
> > +	struct dw_pcie *dw = to_dw_pcie_from_pp(pp);
> > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > +	int ret;
> > +	u32 val;
> > +
> > +	gpiod_set_value_cansleep(dw->pe_rst, 1);
> > +
> > +	ret = rcar_gen4_pcie_set_device_type(rcar, true, dw->num_lanes);
> > +	if (ret < 0)
> > +		return ret;
> > +
> 
> > +	dw_pcie_dbi_ro_wr_en(dw);
> 
> Are you sure dw_pcie_dbi_ro_wr_en() and dw_pcie_dbi_ro_wr_dis() are
> needed? In accordance with the DW PCIe Dual-mode HW manual the BARx
> registers are W-only over the DBI2 map with no need in setting the
> DBI_RO_WR_EN flag.
> 
> Please check that on your hardware.

You're correct. They are not needed. So, I'll drop this on v17.

> > +
> > +	/*
> > +	 * According to the section 3.5.7.2 "RC Mode" in DWC PCIe Dual Mode
> > +	 * Rev.5.20a, we should disable two BARs to avoid unnecessary memory
> > +	 * assignment during device enumeration.
> > +	 */
> > +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_0, 0x0);
> > +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_1, 0x0);
> > +
> 
> > +	dw_pcie_dbi_ro_wr_dis(dw);
> 
> ditto

I'll drop this too.

> > +
> > +	if (IS_ENABLED(CONFIG_PCI_MSI)) {
> > +		/* Enable MSI interrupt signal */
> > +		val = readl(rcar->base + PCIEINTSTS0EN);
> > +		val |= MSI_CTRL_INT;
> > +		writel(val, rcar->base + PCIEINTSTS0EN);
> > +	}
> > +
> > +	msleep(100);	/* pe_rst requires 100msec delay */
> > +
> > +	gpiod_set_value_cansleep(dw->pe_rst, 0);
> > +
> > +	return 0;
> > +}
> > +
> > +static const struct dw_pcie_host_ops rcar_gen4_pcie_host_ops = {
> > +	.host_init = rcar_gen4_pcie_host_init,
> > +};
> > +
> > +static int rcar_gen4_add_dw_pcie_rp(struct rcar_gen4_pcie *rcar,
> > +				   struct platform_device *pdev)
> > +{
> > +	struct dw_pcie *dw = &rcar->dw;
> > +	struct dw_pcie_rp *pp = &dw->pp;
> > +
> > +	pp->num_vectors = MAX_MSI_IRQS;
> > +	pp->ops = &rcar_gen4_pcie_host_ops;
> > +	dw_pcie_cap_set(dw, REQ_RES);
> > +
> > +	return dw_pcie_host_init(pp);
> > +}
> > +
> > +static void rcar_gen4_remove_dw_pcie_rp(struct rcar_gen4_pcie *rcar)
> > +{
> > +	dw_pcie_host_deinit(&rcar->dw.pp);
> > +	gpiod_set_value_cansleep(rcar->dw.pe_rst, 1);
> > +}
> > +
> > +static int rcar_gen4_pcie_probe(struct platform_device *pdev)
> > +{
> > +	struct device *dev = &pdev->dev;
> > +	struct rcar_gen4_pcie *rcar;
> > +	int err;
> > +
> > +	rcar = rcar_gen4_pcie_devm_alloc(dev);
> > +	if (!rcar)
> > +		return -ENOMEM;
> > +
> > +	err = rcar_gen4_pcie_get_resources(rcar, pdev);
> > +	if (err < 0) {
> > +		dev_err(dev, "Failed to request resource: %d\n", err);
> > +		return err;
> > +	}
> > +
> > +	platform_set_drvdata(pdev, rcar);
> > +
> > +	err = rcar_gen4_pcie_prepare(rcar);
> > +	if (err < 0)
> > +		return err;
> > +
> > +	rcar->needs_retrain = true;
> > +	err = rcar_gen4_add_dw_pcie_rp(rcar, pdev);
> > +	if (err < 0)
> > +		goto err_add;
> > +
> > +	return 0;
> > +
> > +err_add:
> > +	rcar_gen4_pcie_unprepare(rcar);
> > +
> > +	return err;
> > +}
> > +
> > +static int rcar_gen4_pcie_remove(struct platform_device *pdev)
> > +{
> > +	struct rcar_gen4_pcie *rcar = platform_get_drvdata(pdev);
> > +
> > +	rcar_gen4_remove_dw_pcie_rp(rcar);
> > +	rcar_gen4_pcie_unprepare(rcar);
> > +
> > +	return 0;
> > +}
> > +
> > +static const struct of_device_id rcar_gen4_pcie_of_match[] = {
> > +	{ .compatible = "renesas,rcar-gen4-pcie", },
> > +	{},
> > +};
> > +
> > +static struct platform_driver rcar_gen4_pcie_driver = {
> > +	.driver = {
> > +		.name = "pcie-rcar-gen4",
> > +		.of_match_table = rcar_gen4_pcie_of_match,
> > +		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
> > +	},
> > +	.probe = rcar_gen4_pcie_probe,
> > +	.remove = rcar_gen4_pcie_remove,
> > +};
> > +module_platform_driver(rcar_gen4_pcie_driver);
> > +
> > +MODULE_DESCRIPTION("Renesas R-Car Gen4 PCIe host controller driver");
> > +MODULE_LICENSE("GPL");
> > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.c b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > new file mode 100644
> > index 000000000000..35923fda8ed5
> > --- /dev/null
> > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > @@ -0,0 +1,190 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > + */
> > +
> > +#include <linux/delay.h>
> > +#include <linux/io.h>
> > +#include <linux/of_device.h>
> > +#include <linux/pci.h>
> > +#include <linux/pm_runtime.h>
> > +#include <linux/reset.h>
> > +
> > +#include "pcie-rcar-gen4.h"
> > +#include "pcie-designware.h"
> > +
> > +/* Renesas-specific */
> > +#define PCIERSTCTRL1		0x0014
> > +#define  APP_HOLD_PHY_RST	BIT(16)
> > +#define  APP_LTSSM_ENABLE	BIT(0)
> > +
> > +#define RETRAIN_MAX_CHECK	10
> > +#define RETRAIN_MAX_RETRIES	10
> > +
> > +static void rcar_gen4_pcie_ltssm_enable(struct rcar_gen4_pcie *rcar,
> > +					bool enable)
> > +{
> > +	u32 val;
> > +
> > +	val = readl(rcar->base + PCIERSTCTRL1);
> > +	if (enable) {
> > +		val |= APP_LTSSM_ENABLE;
> 
> > +		val &= ~APP_HOLD_PHY_RST;
> 
> What about moving the APP_HOLD_PHY_RST de-assertion to the
> rcar_gen4_pcie_set_device_type() method? In accordance with the
> "3.1 Initialization" chapter it's supposed to be done before
> performing the DBI programming and activating the link training.

IIUC, the "3.1 Initialization" said app_hold_phy_rst = 1 before
performing the DBI programming. So, it is assertion. Also, the SoC
documentation described the initializing procedure as the follows:
 app_ltssm_enable = 1
 app_hold_phy_rst = 0
So, I would like to keep them in the function.

> > +	} else {
> > +		val &= ~APP_LTSSM_ENABLE;
> > +		val |= APP_HOLD_PHY_RST;
> > +	}
> > +	writel(val, rcar->base + PCIERSTCTRL1);
> > +}
> > +
> > +static bool rcar_gen4_pcie_check_retrain_link(struct dw_pcie *dw)
> > +{
> > +	u8 offset = dw_pcie_find_capability(dw, PCI_CAP_ID_EXP);
> > +	u32 lnkcap = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCAP);
> > +	u32 lnkctl = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCTL);
> > +	u16 lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > +	int i;
> > +
> 
> > +	if ((lnksta & PCI_EXP_LNKSTA_CLS) == (lnkcap & PCI_EXP_LNKCAP_SLS))
> > +		return true;
> > +
> > +	lnkctl |= PCI_EXP_LNKCTL_RL;
> > +	dw_pcie_writel_dbi(dw, offset + PCI_EXP_LNKCTL, lnkctl);
> > +
> > +	for (i = 0; i < RETRAIN_MAX_CHECK; i++) {
> > +		lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > +		if (lnksta & PCI_EXP_LNKSTA_LT)
> > +			return true;
> > +		usleep_range(1000, 1100);
> > +	}
> 
> I'll ask one more time because you didn't respond to my previous note
> about this.

I'm sorry. I completely overlooked the previous note.

> Are you sure that this is needed? Did you try
> the approach described in "3.13 Gen2/3/4/5 Speed Modes" with
> de-asserting/asserting the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag?

I tried this setting, but it doesn't work. I'll investigate this setting more.

> I keep asking because the same problem we used to have on our hardware
> until we found out that the DIRECT_SPEED_CHANGE flag helped to train
> the link right to the speed specified in the capabilities.
> 
> So here is what presumably you'll need to do (based on the
> "3.1 Initialization" and "3.13 Gen2/3/4/5 Speed Modes" chapters of
> the DW PCIe DM hw-manual):
> 1. Make sure the controller is in the power-down/reset state.
> 2. Select device_type (EP or RP).
> 3. De-assert the controller reset.
> 4. Clear PHY-reset flag in the app registers.
> 5. Perform some controller initializations.
> 6. Enable LTSSM to start link training.
> 7. Set GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag one more time.
> 
> 1-4 are supposed to be done in rcar_gen4_pcie_host_init().
> 5 is performed in the framework of the DW PCIe core driver.
> 6-7 should be done in rcar_gen4_pcie_start_link().
> 
> Note 1. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is already set on stage
> 5 in the framework of the dw_pcie_setup_rc() method. But in our case
> it only caused having the Gen.2 link speed. Adding stage 7 helped to
> get stable Gen.3 link. So please try the denoted approach. If it works
> what about adding stage 7 twice in order to get Gen.4 speed?
> (waiting for the DIRECT_SPEED_CHANGE flag being auto-cleared and then
> set it up again?)
> 
> Note 2. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is defined as
> PCIE_LINK_WIDTH_SPEED_CONTROL.PORT_LOGIC_SPEED_CHANGE macros in the DW
> PCIe core driver.
> 
> Note 3. If what is suggested above works well then you won't need to
> have the heavy rcar_gen4_pcie_check_retrain_link() method in the way
> you have it defined.

Thank you very much for your comments!

> > +
> > +	return false;
> > +}
> > +
> > +static int rcar_gen4_pcie_link_up(struct dw_pcie *dw)
> > +{
> > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > +	u32 val, mask;
> > +
> > +	val = readl(rcar->base + PCIEINTSTS0);
> > +	mask = RDLH_LINK_UP | SMLH_LINK_UP;
> > +
> > +	return (val & mask) == mask;
> > +}
> > +
> > +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > +{
> > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > +	int i;
> > +
> > +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > +
> > +	/*
> > +	 * Require retraining here. Otherwise RDLH_LINK_UP of PCIEINTSTS0 which
> > +	 * is this controller specific register may not be set.
> > +	 */
> > +	if (rcar->needs_retrain) {
> > +		for (i = 0; i < RETRAIN_MAX_RETRIES; i++) {
> > +			if (rcar_gen4_pcie_check_retrain_link(dw))
> > +				return 0;
> > +			msleep(100);
> > +		}
> > +
> > +		return -ETIMEDOUT;	/* Failed */
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void rcar_gen4_pcie_stop_link(struct dw_pcie *dw)
> > +{
> > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > +
> > +	rcar_gen4_pcie_ltssm_enable(rcar, false);
> > +}
> > +
> 
> > +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> > +				   int num_lanes)
> 
> 1. Number of lanes is already defined in the rcar_gen4_pcie.dw.num_lanes field.
> What about using it from there instead of passing it as an argument?
> 2. DW PCIe core driver has a very handy enum defined:
> dw_pcie_device_mode. It describes the controller modes (End-point,
> Root port, etc). What about adding the mode field right to the
> rcar_gen4_pcie structure and initializing it in someplace in probe() ?
> 3. Based on the function semantic it's better to be named as something
> like rcar_gen4_pcie_init_device() or even rcar_gen4_pcie_basic_init().

Thank you for your comments! I'll modify the function.

> 
> > +{
> > +	u32 val;
> > +
> 
> > +	/* Note: Assume the rcar->rst which is Cold-reset is asserted here */
> 
> What about directly asserting it here then? In accordance with the DW
> PCIe DM manual the "device_type" input must be set before the DM
> controller is powered up (basically un-reset). What if the controller
> reset is already de-asserted, but you are going to changes its mode?
> In that case the mode won't be changed and you'll end up with
> unpredictable results.

Thank you for your comment. We should add asserting it here as you mentioned.

> > +	val = readl(rcar->base + PCIEMSR0);
> > +	if (rc)
> > +		val |= DEVICE_TYPE_RC;
> > +	else
> > +		val |= DEVICE_TYPE_EP;
> > +
> > +	if (num_lanes < 4)
> > +		val |= BIFUR_MOD_SET_ON;
> > +
> > +	writel(val, rcar->base + PCIEMSR0);
> > +
> > +	return reset_control_deassert(rcar->rst);
> > +}
> > +
> > +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *rcar)
> > +{
> > +	struct device *dev = rcar->dw.dev;
> > +	int err;
> > +
> > +	pm_runtime_enable(dev);
> > +	err = pm_runtime_resume_and_get(dev);
> > +	if (err < 0) {
> > +		dev_err(dev, "Failed to resume/get Runtime PM\n");
> > +		pm_runtime_disable(dev);
> > +	}
> > +
> > +	return err;
> > +}
> > +
> > +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *rcar)
> > +{
> > +	struct device *dev = rcar->dw.dev;
> > +
> > +	if (!reset_control_status(rcar->rst))
> > +		reset_control_assert(rcar->rst);
> > +	pm_runtime_put(dev);
> > +	pm_runtime_disable(dev);
> > +}
> > +
> > +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> > +				 struct platform_device *pdev)
> > +{
> > +	struct device *dev = rcar->dw.dev;
> > +
> > +	/* Renesas-specific registers */
> > +	rcar->base = devm_platform_ioremap_resource_byname(pdev, "app");
> > +	if (IS_ERR(rcar->base))
> > +		return PTR_ERR(rcar->base);
> > +
> 
> > +	rcar->rst = devm_reset_control_get(dev, NULL);
> > +	if (IS_ERR(rcar->rst)) {
> > +		dev_err(dev, "Failed to get Cold-reset\n");
> 
> So AFAICS your platform is equipped with the DWC_pcie_clkrst.v module.
> Thus all the resets are appropriately cleared by a single flag:
> power_up_rst_n. What about using the named reset in this case with the
> "pwr" name? Thus you'll be able to drop the manual
> devm_reset_control_get() invocation and instead use the reset-resources
> requested in the framework of the generic dw_pcie_get_resources()
> method? Note you'll need to move the dw_pcie_cap_set(dw, REQ_RES);
> statement to rcar_gen4_pcie_devm_alloc() then and drop the
> rcar_gen4_pcie.rst field afterwords.

Thank you for your suggestion! Using "pwr" can work on my environment.

> By the way I don't see you requesting and enabling the reference
> clock in your driver but the bindings imply the clock source. How
> come?

For now, I used gpio-hog to enable the reference clock. But, it seem
I should use "ref" clock for it. So, I'll fix it too.

> > +		return PTR_ERR(rcar->rst);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static const struct dw_pcie_ops dw_pcie_ops = {
> > +	.start_link = rcar_gen4_pcie_start_link,
> > +	.stop_link = rcar_gen4_pcie_stop_link,
> > +	.link_up = rcar_gen4_pcie_link_up,
> > +};
> > +
> > +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev)
> > +{
> > +	struct rcar_gen4_pcie *rcar;
> > +
> > +	rcar = devm_kzalloc(dev, sizeof(*rcar), GFP_KERNEL);
> > +	if (!rcar)
> > +		return NULL;
> > +
> > +	rcar->dw.dev = dev;
> > +	rcar->dw.ops = &dw_pcie_ops;
> > +	dw_pcie_cap_set(&rcar->dw, EDMA_UNROLL);
> > +
> > +	return rcar;
> > +}
> > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.h b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > new file mode 100644
> > index 000000000000..fec3f18609f4
> > --- /dev/null
> > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > @@ -0,0 +1,46 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > + */
> > +
> > +#ifndef _PCIE_RCAR_GEN4_H_
> > +#define _PCIE_RCAR_GEN4_H_
> > +
> > +#include <linux/io.h>
> > +#include <linux/pci.h>
> > +#include <linux/reset.h>
> > +
> > +#include "pcie-designware.h"
> > +
> > +/* Renesas-specific */
> > +#define PCIEMSR0		0x0000
> > +#define  BIFUR_MOD_SET_ON	BIT(0)
> > +#define  DEVICE_TYPE_EP		0
> > +#define  DEVICE_TYPE_RC		BIT(4)
> > +
> > +#define PCIEINTSTS0		0x0084
> > +#define PCIEINTSTS0EN		0x0310
> > +#define  MSI_CTRL_INT		BIT(26)
> > +#define  SMLH_LINK_UP		BIT(7)
> > +#define  RDLH_LINK_UP		BIT(6)
> > +#define PCIEDMAINTSTSEN		0x0314
> > +#define  PCIEDMAINTSTSEN_INIT	GENMASK(15, 0)
> > +
> 
> > +struct rcar_gen4_pcie {
> 
> As I mentioned above this structure can be extended with the enum
> dw_pcie_device_mode field thus dropping the boolean argument from the
> rcar_gen4_pcie_set_device_type() method.

I got it. I'll fix this.

> > +	struct dw_pcie		dw;
> 
> As I already mentioned above the dw.num_lanes could be used instead of
> passing it as the rcar_gen4_pcie_set_device_type() argument.

I'll fix this too.

Best regards,
Yoshihiro Shimoda

> -Serge(y)
> 
> > +	void __iomem		*base;
> > +	struct reset_control	*rst;
> > +	bool			needs_retrain;
> > +};
> > +#define to_rcar_gen4_pcie(x)	dev_get_drvdata((x)->dev)
> > +
> > +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> > +				   int num_lanes);
> > +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *pcie);
> > +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *pcie);
> > +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> > +				 struct platform_device *pdev);
> > +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev);
> > +
> > +#endif /* _PCIE_RCAR_GEN4_H_ */
> > --
> > 2.25.1
> >
Serge Semin June 7, 2023, 12:15 p.m. UTC | #3
On Wed, Jun 07, 2023 at 02:59:20AM +0000, Yoshihiro Shimoda wrote:
> Hello Serge,
> 
> > From: Serge Semin, Sent: Monday, June 5, 2023 11:39 PM
> > 
> > On Wed, May 10, 2023 at 03:22:31PM +0900, Yoshihiro Shimoda wrote:
> > > Add R-Car Gen4 PCIe Host support. This controller is based on
> > > Synopsys DesignWare PCIe, but this controller has vendor-specific
> > > registers so that requires initialization code like mode setting
> > > and retraining and so on.
> > >
> > > To reduce code delta, adds some helper functions which are used by
> > > both the host driver and the endpoint driver (which is added
> > > immediately afterwards) into a separate file.
> > >
> > > Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> > > ---
> > >  drivers/pci/controller/dwc/Kconfig            |   9 +
> > >  drivers/pci/controller/dwc/Makefile           |   2 +
> > >  .../pci/controller/dwc/pcie-rcar-gen4-host.c  | 141 +++++++++++++
> > >  drivers/pci/controller/dwc/pcie-rcar-gen4.c   | 190 ++++++++++++++++++
> > >  drivers/pci/controller/dwc/pcie-rcar-gen4.h   |  46 +++++
> > >  5 files changed, 388 insertions(+)
> > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > >
> > > diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
> > > index ab96da43e0c2..64d4d37bc891 100644
> > > --- a/drivers/pci/controller/dwc/Kconfig
> > > +++ b/drivers/pci/controller/dwc/Kconfig
> > > @@ -415,4 +415,13 @@ config PCIE_VISCONTI_HOST
> > >  	  Say Y here if you want PCIe controller support on Toshiba Visconti SoC.
> > >  	  This driver supports TMPV7708 SoC.
> > >
> > > +config PCIE_RCAR_GEN4
> > > +	tristate "Renesas R-Car Gen4 PCIe Host controller"
> > > +	depends on ARCH_RENESAS || COMPILE_TEST
> > > +	depends on PCI_MSI
> > > +	select PCIE_DW_HOST
> > > +	help
> > > +	  Say Y here if you want PCIe host controller support on R-Car Gen4 SoCs.
> > > +	  This uses the DesignWare core.
> > > +
> > >  endmenu
> > > diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
> > > index bf5c311875a1..486cf706b53d 100644
> > > --- a/drivers/pci/controller/dwc/Makefile
> > > +++ b/drivers/pci/controller/dwc/Makefile
> > > @@ -26,6 +26,8 @@ obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
> > >  obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
> > >  obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
> > >  obj-$(CONFIG_PCIE_VISCONTI_HOST) += pcie-visconti.o
> > > +pcie-rcar-gen4-host-drv-objs := pcie-rcar-gen4.o pcie-rcar-gen4-host.o
> > > +obj-$(CONFIG_PCIE_RCAR_GEN4) += pcie-rcar-gen4-host-drv.o
> > >
> > >  # The following drivers are for devices that use the generic ACPI
> > >  # pci_root.c driver but don't support standard ECAM config access.
> > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > new file mode 100644
> > > index 000000000000..df7d80f1874f
> > > --- /dev/null
> > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > @@ -0,0 +1,141 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +/*
> > > + * PCIe host controller driver for Renesas R-Car Gen4 Series SoCs
> > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > + */
> > > +
> > > +#include <linux/delay.h>
> > > +#include <linux/interrupt.h>
> > > +#include <linux/module.h>
> > > +#include <linux/of_device.h>
> > > +#include <linux/pci.h>
> > > +#include <linux/platform_device.h>
> > > +
> > > +#include "pcie-rcar-gen4.h"
> > > +#include "pcie-designware.h"
> > > +
> > > +static int rcar_gen4_pcie_host_init(struct dw_pcie_rp *pp)
> > > +{
> > > +	struct dw_pcie *dw = to_dw_pcie_from_pp(pp);
> > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > +	int ret;
> > > +	u32 val;
> > > +
> > > +	gpiod_set_value_cansleep(dw->pe_rst, 1);
> > > +
> > > +	ret = rcar_gen4_pcie_set_device_type(rcar, true, dw->num_lanes);
> > > +	if (ret < 0)
> > > +		return ret;
> > > +
> > 
> > > +	dw_pcie_dbi_ro_wr_en(dw);
> > 
> > Are you sure dw_pcie_dbi_ro_wr_en() and dw_pcie_dbi_ro_wr_dis() are
> > needed? In accordance with the DW PCIe Dual-mode HW manual the BARx
> > registers are W-only over the DBI2 map with no need in setting the
> > DBI_RO_WR_EN flag.
> > 
> > Please check that on your hardware.
> 
> You're correct. They are not needed. So, I'll drop this on v17.
> 
> > > +
> > > +	/*
> > > +	 * According to the section 3.5.7.2 "RC Mode" in DWC PCIe Dual Mode
> > > +	 * Rev.5.20a, we should disable two BARs to avoid unnecessary memory
> > > +	 * assignment during device enumeration.
> > > +	 */
> > > +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_0, 0x0);
> > > +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_1, 0x0);
> > > +
> > 
> > > +	dw_pcie_dbi_ro_wr_dis(dw);
> > 
> > ditto
> 
> I'll drop this too.
> 
> > > +
> > > +	if (IS_ENABLED(CONFIG_PCI_MSI)) {
> > > +		/* Enable MSI interrupt signal */
> > > +		val = readl(rcar->base + PCIEINTSTS0EN);
> > > +		val |= MSI_CTRL_INT;
> > > +		writel(val, rcar->base + PCIEINTSTS0EN);
> > > +	}
> > > +
> > > +	msleep(100);	/* pe_rst requires 100msec delay */
> > > +
> > > +	gpiod_set_value_cansleep(dw->pe_rst, 0);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static const struct dw_pcie_host_ops rcar_gen4_pcie_host_ops = {
> > > +	.host_init = rcar_gen4_pcie_host_init,
> > > +};
> > > +
> > > +static int rcar_gen4_add_dw_pcie_rp(struct rcar_gen4_pcie *rcar,
> > > +				   struct platform_device *pdev)
> > > +{
> > > +	struct dw_pcie *dw = &rcar->dw;
> > > +	struct dw_pcie_rp *pp = &dw->pp;
> > > +
> > > +	pp->num_vectors = MAX_MSI_IRQS;
> > > +	pp->ops = &rcar_gen4_pcie_host_ops;
> > > +	dw_pcie_cap_set(dw, REQ_RES);
> > > +
> > > +	return dw_pcie_host_init(pp);
> > > +}
> > > +
> > > +static void rcar_gen4_remove_dw_pcie_rp(struct rcar_gen4_pcie *rcar)
> > > +{
> > > +	dw_pcie_host_deinit(&rcar->dw.pp);
> > > +	gpiod_set_value_cansleep(rcar->dw.pe_rst, 1);
> > > +}
> > > +
> > > +static int rcar_gen4_pcie_probe(struct platform_device *pdev)
> > > +{
> > > +	struct device *dev = &pdev->dev;
> > > +	struct rcar_gen4_pcie *rcar;
> > > +	int err;
> > > +
> > > +	rcar = rcar_gen4_pcie_devm_alloc(dev);
> > > +	if (!rcar)
> > > +		return -ENOMEM;
> > > +
> > > +	err = rcar_gen4_pcie_get_resources(rcar, pdev);
> > > +	if (err < 0) {
> > > +		dev_err(dev, "Failed to request resource: %d\n", err);
> > > +		return err;
> > > +	}
> > > +
> > > +	platform_set_drvdata(pdev, rcar);
> > > +
> > > +	err = rcar_gen4_pcie_prepare(rcar);
> > > +	if (err < 0)
> > > +		return err;
> > > +
> > > +	rcar->needs_retrain = true;
> > > +	err = rcar_gen4_add_dw_pcie_rp(rcar, pdev);
> > > +	if (err < 0)
> > > +		goto err_add;
> > > +
> > > +	return 0;
> > > +
> > > +err_add:
> > > +	rcar_gen4_pcie_unprepare(rcar);
> > > +
> > > +	return err;
> > > +}
> > > +
> > > +static int rcar_gen4_pcie_remove(struct platform_device *pdev)
> > > +{
> > > +	struct rcar_gen4_pcie *rcar = platform_get_drvdata(pdev);
> > > +
> > > +	rcar_gen4_remove_dw_pcie_rp(rcar);
> > > +	rcar_gen4_pcie_unprepare(rcar);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static const struct of_device_id rcar_gen4_pcie_of_match[] = {
> > > +	{ .compatible = "renesas,rcar-gen4-pcie", },
> > > +	{},
> > > +};
> > > +
> > > +static struct platform_driver rcar_gen4_pcie_driver = {
> > > +	.driver = {
> > > +		.name = "pcie-rcar-gen4",
> > > +		.of_match_table = rcar_gen4_pcie_of_match,
> > > +		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
> > > +	},
> > > +	.probe = rcar_gen4_pcie_probe,
> > > +	.remove = rcar_gen4_pcie_remove,
> > > +};
> > > +module_platform_driver(rcar_gen4_pcie_driver);
> > > +
> > > +MODULE_DESCRIPTION("Renesas R-Car Gen4 PCIe host controller driver");
> > > +MODULE_LICENSE("GPL");
> > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.c b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > new file mode 100644
> > > index 000000000000..35923fda8ed5
> > > --- /dev/null
> > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > @@ -0,0 +1,190 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +/*
> > > + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > + */
> > > +
> > > +#include <linux/delay.h>
> > > +#include <linux/io.h>
> > > +#include <linux/of_device.h>
> > > +#include <linux/pci.h>
> > > +#include <linux/pm_runtime.h>
> > > +#include <linux/reset.h>
> > > +
> > > +#include "pcie-rcar-gen4.h"
> > > +#include "pcie-designware.h"
> > > +
> > > +/* Renesas-specific */
> > > +#define PCIERSTCTRL1		0x0014
> > > +#define  APP_HOLD_PHY_RST	BIT(16)
> > > +#define  APP_LTSSM_ENABLE	BIT(0)
> > > +
> > > +#define RETRAIN_MAX_CHECK	10
> > > +#define RETRAIN_MAX_RETRIES	10
> > > +
> > > +static void rcar_gen4_pcie_ltssm_enable(struct rcar_gen4_pcie *rcar,
> > > +					bool enable)
> > > +{
> > > +	u32 val;
> > > +
> > > +	val = readl(rcar->base + PCIERSTCTRL1);
> > > +	if (enable) {
> > > +		val |= APP_LTSSM_ENABLE;
> > 
> > > +		val &= ~APP_HOLD_PHY_RST;
> > 
> > What about moving the APP_HOLD_PHY_RST de-assertion to the
> > rcar_gen4_pcie_set_device_type() method? In accordance with the
> > "3.1 Initialization" chapter it's supposed to be done before
> > performing the DBI programming and activating the link training.
> 

> IIUC, the "3.1 Initialization" said app_hold_phy_rst = 1 before
> performing the DBI programming. So, it is assertion. Also, the SoC
> documentation described the initializing procedure as the follows:
>  app_ltssm_enable = 1
>  app_hold_phy_rst = 0
> So, I would like to keep them in the function.

Indeed. I was wrong. Sorry for the misleading comment.

> 
> > > +	} else {
> > > +		val &= ~APP_LTSSM_ENABLE;
> > > +		val |= APP_HOLD_PHY_RST;
> > > +	}
> > > +	writel(val, rcar->base + PCIERSTCTRL1);
> > > +}
> > > +
> > > +static bool rcar_gen4_pcie_check_retrain_link(struct dw_pcie *dw)
> > > +{
> > > +	u8 offset = dw_pcie_find_capability(dw, PCI_CAP_ID_EXP);
> > > +	u32 lnkcap = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCAP);
> > > +	u32 lnkctl = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCTL);
> > > +	u16 lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > > +	int i;
> > > +
> > 
> > > +	if ((lnksta & PCI_EXP_LNKSTA_CLS) == (lnkcap & PCI_EXP_LNKCAP_SLS))
> > > +		return true;
> > > +
> > > +	lnkctl |= PCI_EXP_LNKCTL_RL;
> > > +	dw_pcie_writel_dbi(dw, offset + PCI_EXP_LNKCTL, lnkctl);
> > > +
> > > +	for (i = 0; i < RETRAIN_MAX_CHECK; i++) {
> > > +		lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > > +		if (lnksta & PCI_EXP_LNKSTA_LT)
> > > +			return true;
> > > +		usleep_range(1000, 1100);
> > > +	}
> > 
> > I'll ask one more time because you didn't respond to my previous note
> > about this.
> 
> I'm sorry. I completely overlooked the previous note.
> 
> > Are you sure that this is needed? Did you try
> > the approach described in "3.13 Gen2/3/4/5 Speed Modes" with
> > de-asserting/asserting the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag?
> 
> I tried this setting, but it doesn't work. I'll investigate this setting more.
> 
> > I keep asking because the same problem we used to have on our hardware
> > until we found out that the DIRECT_SPEED_CHANGE flag helped to train
> > the link right to the speed specified in the capabilities.
> > 
> > So here is what presumably you'll need to do (based on the
> > "3.1 Initialization" and "3.13 Gen2/3/4/5 Speed Modes" chapters of
> > the DW PCIe DM hw-manual):
> > 1. Make sure the controller is in the power-down/reset state.
> > 2. Select device_type (EP or RP).
> > 3. De-assert the controller reset.
> > 4. Clear PHY-reset flag in the app registers.
> > 5. Perform some controller initializations.
> > 6. Enable LTSSM to start link training.
> > 7. Set GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag one more time.
> > 
> > 1-4 are supposed to be done in rcar_gen4_pcie_host_init().
> > 5 is performed in the framework of the DW PCIe core driver.
> > 6-7 should be done in rcar_gen4_pcie_start_link().
> > 
> > Note 1. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is already set on stage
> > 5 in the framework of the dw_pcie_setup_rc() method. But in our case
> > it only caused having the Gen.2 link speed. Adding stage 7 helped to
> > get stable Gen.3 link. So please try the denoted approach. If it works
> > what about adding stage 7 twice in order to get Gen.4 speed?
> > (waiting for the DIRECT_SPEED_CHANGE flag being auto-cleared and then
> > set it up again?)
> > 
> > Note 2. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is defined as
> > PCIE_LINK_WIDTH_SPEED_CONTROL.PORT_LOGIC_SPEED_CHANGE macros in the DW
> > PCIe core driver.
> > 
> > Note 3. If what is suggested above works well then you won't need to
> > have the heavy rcar_gen4_pcie_check_retrain_link() method in the way
> > you have it defined.
> 
> Thank you very much for your comments!

Please see the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE description for details
of how the flag is supposed to be de-asserted and asserted in order to
initiate the direct speed change.

> 
> > > +
> > > +	return false;
> > > +}
> > > +
> > > +static int rcar_gen4_pcie_link_up(struct dw_pcie *dw)
> > > +{
> > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > +	u32 val, mask;
> > > +
> > > +	val = readl(rcar->base + PCIEINTSTS0);
> > > +	mask = RDLH_LINK_UP | SMLH_LINK_UP;
> > > +
> > > +	return (val & mask) == mask;
> > > +}
> > > +
> > > +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > +{
> > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > +	int i;
> > > +
> > > +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > +
> > > +	/*
> > > +	 * Require retraining here. Otherwise RDLH_LINK_UP of PCIEINTSTS0 which
> > > +	 * is this controller specific register may not be set.
> > > +	 */
> > > +	if (rcar->needs_retrain) {
> > > +		for (i = 0; i < RETRAIN_MAX_RETRIES; i++) {
> > > +			if (rcar_gen4_pcie_check_retrain_link(dw))
> > > +				return 0;
> > > +			msleep(100);
> > > +		}
> > > +
> > > +		return -ETIMEDOUT;	/* Failed */
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static void rcar_gen4_pcie_stop_link(struct dw_pcie *dw)
> > > +{
> > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > +
> > > +	rcar_gen4_pcie_ltssm_enable(rcar, false);
> > > +}
> > > +
> > 
> > > +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> > > +				   int num_lanes)
> > 
> > 1. Number of lanes is already defined in the rcar_gen4_pcie.dw.num_lanes field.
> > What about using it from there instead of passing it as an argument?
> > 2. DW PCIe core driver has a very handy enum defined:
> > dw_pcie_device_mode. It describes the controller modes (End-point,
> > Root port, etc). What about adding the mode field right to the
> > rcar_gen4_pcie structure and initializing it in someplace in probe() ?
> > 3. Based on the function semantic it's better to be named as something
> > like rcar_gen4_pcie_init_device() or even rcar_gen4_pcie_basic_init().
> 
> Thank you for your comments! I'll modify the function.
> 
> > 
> > > +{
> > > +	u32 val;
> > > +
> > 
> > > +	/* Note: Assume the rcar->rst which is Cold-reset is asserted here */
> > 
> > What about directly asserting it here then? In accordance with the DW
> > PCIe DM manual the "device_type" input must be set before the DM
> > controller is powered up (basically un-reset). What if the controller
> > reset is already de-asserted, but you are going to changes its mode?
> > In that case the mode won't be changed and you'll end up with
> > unpredictable results.
> 
> Thank you for your comment. We should add asserting it here as you mentioned.
> 
> > > +	val = readl(rcar->base + PCIEMSR0);
> > > +	if (rc)
> > > +		val |= DEVICE_TYPE_RC;
> > > +	else
> > > +		val |= DEVICE_TYPE_EP;
> > > +
> > > +	if (num_lanes < 4)
> > > +		val |= BIFUR_MOD_SET_ON;
> > > +
> > > +	writel(val, rcar->base + PCIEMSR0);
> > > +
> > > +	return reset_control_deassert(rcar->rst);
> > > +}
> > > +
> > > +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *rcar)
> > > +{
> > > +	struct device *dev = rcar->dw.dev;
> > > +	int err;
> > > +
> > > +	pm_runtime_enable(dev);
> > > +	err = pm_runtime_resume_and_get(dev);
> > > +	if (err < 0) {
> > > +		dev_err(dev, "Failed to resume/get Runtime PM\n");
> > > +		pm_runtime_disable(dev);
> > > +	}
> > > +
> > > +	return err;
> > > +}
> > > +
> > > +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *rcar)
> > > +{
> > > +	struct device *dev = rcar->dw.dev;
> > > +
> > > +	if (!reset_control_status(rcar->rst))
> > > +		reset_control_assert(rcar->rst);
> > > +	pm_runtime_put(dev);
> > > +	pm_runtime_disable(dev);
> > > +}
> > > +
> > > +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> > > +				 struct platform_device *pdev)
> > > +{
> > > +	struct device *dev = rcar->dw.dev;
> > > +
> > > +	/* Renesas-specific registers */
> > > +	rcar->base = devm_platform_ioremap_resource_byname(pdev, "app");
> > > +	if (IS_ERR(rcar->base))
> > > +		return PTR_ERR(rcar->base);
> > > +
> > 
> > > +	rcar->rst = devm_reset_control_get(dev, NULL);
> > > +	if (IS_ERR(rcar->rst)) {
> > > +		dev_err(dev, "Failed to get Cold-reset\n");
> > 
> > So AFAICS your platform is equipped with the DWC_pcie_clkrst.v module.
> > Thus all the resets are appropriately cleared by a single flag:
> > power_up_rst_n. What about using the named reset in this case with the
> > "pwr" name? Thus you'll be able to drop the manual
> > devm_reset_control_get() invocation and instead use the reset-resources
> > requested in the framework of the generic dw_pcie_get_resources()
> > method? Note you'll need to move the dw_pcie_cap_set(dw, REQ_RES);
> > statement to rcar_gen4_pcie_devm_alloc() then and drop the
> > rcar_gen4_pcie.rst field afterwords.
> 
> Thank you for your suggestion! Using "pwr" can work on my environment.
> 
> > By the way I don't see you requesting and enabling the reference
> > clock in your driver but the bindings imply the clock source. How
> > come?
> 

> For now, I used gpio-hog to enable the reference clock. But, it seem
> I should use "ref" clock for it. So, I'll fix it too.

Not sure what gpio-hog you are talking about. Do you mean the pe_rst
signal or some another gpio? I failed to see of how pe_rst is
connected to the clock source. In anyway directly handling the clock
source would be more portable choice.

-Serge(y)

> 
> > > +		return PTR_ERR(rcar->rst);
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static const struct dw_pcie_ops dw_pcie_ops = {
> > > +	.start_link = rcar_gen4_pcie_start_link,
> > > +	.stop_link = rcar_gen4_pcie_stop_link,
> > > +	.link_up = rcar_gen4_pcie_link_up,
> > > +};
> > > +
> > > +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev)
> > > +{
> > > +	struct rcar_gen4_pcie *rcar;
> > > +
> > > +	rcar = devm_kzalloc(dev, sizeof(*rcar), GFP_KERNEL);
> > > +	if (!rcar)
> > > +		return NULL;
> > > +
> > > +	rcar->dw.dev = dev;
> > > +	rcar->dw.ops = &dw_pcie_ops;
> > > +	dw_pcie_cap_set(&rcar->dw, EDMA_UNROLL);
> > > +
> > > +	return rcar;
> > > +}
> > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.h b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > new file mode 100644
> > > index 000000000000..fec3f18609f4
> > > --- /dev/null
> > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > @@ -0,0 +1,46 @@
> > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > +/*
> > > + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > + */
> > > +
> > > +#ifndef _PCIE_RCAR_GEN4_H_
> > > +#define _PCIE_RCAR_GEN4_H_
> > > +
> > > +#include <linux/io.h>
> > > +#include <linux/pci.h>
> > > +#include <linux/reset.h>
> > > +
> > > +#include "pcie-designware.h"
> > > +
> > > +/* Renesas-specific */
> > > +#define PCIEMSR0		0x0000
> > > +#define  BIFUR_MOD_SET_ON	BIT(0)
> > > +#define  DEVICE_TYPE_EP		0
> > > +#define  DEVICE_TYPE_RC		BIT(4)
> > > +
> > > +#define PCIEINTSTS0		0x0084
> > > +#define PCIEINTSTS0EN		0x0310
> > > +#define  MSI_CTRL_INT		BIT(26)
> > > +#define  SMLH_LINK_UP		BIT(7)
> > > +#define  RDLH_LINK_UP		BIT(6)
> > > +#define PCIEDMAINTSTSEN		0x0314
> > > +#define  PCIEDMAINTSTSEN_INIT	GENMASK(15, 0)
> > > +
> > 
> > > +struct rcar_gen4_pcie {
> > 
> > As I mentioned above this structure can be extended with the enum
> > dw_pcie_device_mode field thus dropping the boolean argument from the
> > rcar_gen4_pcie_set_device_type() method.
> 
> I got it. I'll fix this.
> 
> > > +	struct dw_pcie		dw;
> > 
> > As I already mentioned above the dw.num_lanes could be used instead of
> > passing it as the rcar_gen4_pcie_set_device_type() argument.
> 
> I'll fix this too.
> 
> Best regards,
> Yoshihiro Shimoda
> 
> > -Serge(y)
> > 
> > > +	void __iomem		*base;
> > > +	struct reset_control	*rst;
> > > +	bool			needs_retrain;
> > > +};
> > > +#define to_rcar_gen4_pcie(x)	dev_get_drvdata((x)->dev)
> > > +
> > > +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> > > +				   int num_lanes);
> > > +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *pcie);
> > > +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *pcie);
> > > +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> > > +				 struct platform_device *pdev);
> > > +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev);
> > > +
> > > +#endif /* _PCIE_RCAR_GEN4_H_ */
> > > --
> > > 2.25.1
> > >
Yoshihiro Shimoda June 8, 2023, 8:47 a.m. UTC | #4
Hello Serge,

> From: Serge Semin, Sent: Wednesday, June 7, 2023 9:16 PM
> 
> On Wed, Jun 07, 2023 at 02:59:20AM +0000, Yoshihiro Shimoda wrote:
> > Hello Serge,
> >
> > > From: Serge Semin, Sent: Monday, June 5, 2023 11:39 PM
> > >
> > > On Wed, May 10, 2023 at 03:22:31PM +0900, Yoshihiro Shimoda wrote:
> > > > Add R-Car Gen4 PCIe Host support. This controller is based on
> > > > Synopsys DesignWare PCIe, but this controller has vendor-specific
> > > > registers so that requires initialization code like mode setting
> > > > and retraining and so on.
> > > >
> > > > To reduce code delta, adds some helper functions which are used by
> > > > both the host driver and the endpoint driver (which is added
> > > > immediately afterwards) into a separate file.
> > > >
> > > > Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> > > > ---
> > > >  drivers/pci/controller/dwc/Kconfig            |   9 +
> > > >  drivers/pci/controller/dwc/Makefile           |   2 +
> > > >  .../pci/controller/dwc/pcie-rcar-gen4-host.c  | 141 +++++++++++++
> > > >  drivers/pci/controller/dwc/pcie-rcar-gen4.c   | 190 ++++++++++++++++++
> > > >  drivers/pci/controller/dwc/pcie-rcar-gen4.h   |  46 +++++
> > > >  5 files changed, 388 insertions(+)
> > > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > >
> > > > diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
> > > > index ab96da43e0c2..64d4d37bc891 100644
> > > > --- a/drivers/pci/controller/dwc/Kconfig
> > > > +++ b/drivers/pci/controller/dwc/Kconfig
> > > > @@ -415,4 +415,13 @@ config PCIE_VISCONTI_HOST
> > > >  	  Say Y here if you want PCIe controller support on Toshiba Visconti SoC.
> > > >  	  This driver supports TMPV7708 SoC.
> > > >
> > > > +config PCIE_RCAR_GEN4
> > > > +	tristate "Renesas R-Car Gen4 PCIe Host controller"
> > > > +	depends on ARCH_RENESAS || COMPILE_TEST
> > > > +	depends on PCI_MSI
> > > > +	select PCIE_DW_HOST
> > > > +	help
> > > > +	  Say Y here if you want PCIe host controller support on R-Car Gen4 SoCs.
> > > > +	  This uses the DesignWare core.
> > > > +
> > > >  endmenu
> > > > diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
> > > > index bf5c311875a1..486cf706b53d 100644
> > > > --- a/drivers/pci/controller/dwc/Makefile
> > > > +++ b/drivers/pci/controller/dwc/Makefile
> > > > @@ -26,6 +26,8 @@ obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
> > > >  obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
> > > >  obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
> > > >  obj-$(CONFIG_PCIE_VISCONTI_HOST) += pcie-visconti.o
> > > > +pcie-rcar-gen4-host-drv-objs := pcie-rcar-gen4.o pcie-rcar-gen4-host.o
> > > > +obj-$(CONFIG_PCIE_RCAR_GEN4) += pcie-rcar-gen4-host-drv.o
> > > >
> > > >  # The following drivers are for devices that use the generic ACPI
> > > >  # pci_root.c driver but don't support standard ECAM config access.
> > > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > > new file mode 100644
> > > > index 000000000000..df7d80f1874f
> > > > --- /dev/null
> > > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > > @@ -0,0 +1,141 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > +/*
> > > > + * PCIe host controller driver for Renesas R-Car Gen4 Series SoCs
> > > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > > + */
> > > > +
> > > > +#include <linux/delay.h>
> > > > +#include <linux/interrupt.h>
> > > > +#include <linux/module.h>
> > > > +#include <linux/of_device.h>
> > > > +#include <linux/pci.h>
> > > > +#include <linux/platform_device.h>
> > > > +
> > > > +#include "pcie-rcar-gen4.h"
> > > > +#include "pcie-designware.h"
> > > > +
> > > > +static int rcar_gen4_pcie_host_init(struct dw_pcie_rp *pp)
> > > > +{
> > > > +	struct dw_pcie *dw = to_dw_pcie_from_pp(pp);
> > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > +	int ret;
> > > > +	u32 val;
> > > > +
> > > > +	gpiod_set_value_cansleep(dw->pe_rst, 1);
> > > > +
> > > > +	ret = rcar_gen4_pcie_set_device_type(rcar, true, dw->num_lanes);
> > > > +	if (ret < 0)
> > > > +		return ret;
> > > > +
> > >
> > > > +	dw_pcie_dbi_ro_wr_en(dw);
> > >
> > > Are you sure dw_pcie_dbi_ro_wr_en() and dw_pcie_dbi_ro_wr_dis() are
> > > needed? In accordance with the DW PCIe Dual-mode HW manual the BARx
> > > registers are W-only over the DBI2 map with no need in setting the
> > > DBI_RO_WR_EN flag.
> > >
> > > Please check that on your hardware.
> >
> > You're correct. They are not needed. So, I'll drop this on v17.
> >
> > > > +
> > > > +	/*
> > > > +	 * According to the section 3.5.7.2 "RC Mode" in DWC PCIe Dual Mode
> > > > +	 * Rev.5.20a, we should disable two BARs to avoid unnecessary memory
> > > > +	 * assignment during device enumeration.
> > > > +	 */
> > > > +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_0, 0x0);
> > > > +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_1, 0x0);
> > > > +
> > >
> > > > +	dw_pcie_dbi_ro_wr_dis(dw);
> > >
> > > ditto
> >
> > I'll drop this too.
> >
> > > > +
> > > > +	if (IS_ENABLED(CONFIG_PCI_MSI)) {
> > > > +		/* Enable MSI interrupt signal */
> > > > +		val = readl(rcar->base + PCIEINTSTS0EN);
> > > > +		val |= MSI_CTRL_INT;
> > > > +		writel(val, rcar->base + PCIEINTSTS0EN);
> > > > +	}
> > > > +
> > > > +	msleep(100);	/* pe_rst requires 100msec delay */
> > > > +
> > > > +	gpiod_set_value_cansleep(dw->pe_rst, 0);
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +static const struct dw_pcie_host_ops rcar_gen4_pcie_host_ops = {
> > > > +	.host_init = rcar_gen4_pcie_host_init,
> > > > +};
> > > > +
> > > > +static int rcar_gen4_add_dw_pcie_rp(struct rcar_gen4_pcie *rcar,
> > > > +				   struct platform_device *pdev)
> > > > +{
> > > > +	struct dw_pcie *dw = &rcar->dw;
> > > > +	struct dw_pcie_rp *pp = &dw->pp;
> > > > +
> > > > +	pp->num_vectors = MAX_MSI_IRQS;
> > > > +	pp->ops = &rcar_gen4_pcie_host_ops;
> > > > +	dw_pcie_cap_set(dw, REQ_RES);
> > > > +
> > > > +	return dw_pcie_host_init(pp);
> > > > +}
> > > > +
> > > > +static void rcar_gen4_remove_dw_pcie_rp(struct rcar_gen4_pcie *rcar)
> > > > +{
> > > > +	dw_pcie_host_deinit(&rcar->dw.pp);
> > > > +	gpiod_set_value_cansleep(rcar->dw.pe_rst, 1);
> > > > +}
> > > > +
> > > > +static int rcar_gen4_pcie_probe(struct platform_device *pdev)
> > > > +{
> > > > +	struct device *dev = &pdev->dev;
> > > > +	struct rcar_gen4_pcie *rcar;
> > > > +	int err;
> > > > +
> > > > +	rcar = rcar_gen4_pcie_devm_alloc(dev);
> > > > +	if (!rcar)
> > > > +		return -ENOMEM;
> > > > +
> > > > +	err = rcar_gen4_pcie_get_resources(rcar, pdev);
> > > > +	if (err < 0) {
> > > > +		dev_err(dev, "Failed to request resource: %d\n", err);
> > > > +		return err;
> > > > +	}
> > > > +
> > > > +	platform_set_drvdata(pdev, rcar);
> > > > +
> > > > +	err = rcar_gen4_pcie_prepare(rcar);
> > > > +	if (err < 0)
> > > > +		return err;
> > > > +
> > > > +	rcar->needs_retrain = true;
> > > > +	err = rcar_gen4_add_dw_pcie_rp(rcar, pdev);
> > > > +	if (err < 0)
> > > > +		goto err_add;
> > > > +
> > > > +	return 0;
> > > > +
> > > > +err_add:
> > > > +	rcar_gen4_pcie_unprepare(rcar);
> > > > +
> > > > +	return err;
> > > > +}
> > > > +
> > > > +static int rcar_gen4_pcie_remove(struct platform_device *pdev)
> > > > +{
> > > > +	struct rcar_gen4_pcie *rcar = platform_get_drvdata(pdev);
> > > > +
> > > > +	rcar_gen4_remove_dw_pcie_rp(rcar);
> > > > +	rcar_gen4_pcie_unprepare(rcar);
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +static const struct of_device_id rcar_gen4_pcie_of_match[] = {
> > > > +	{ .compatible = "renesas,rcar-gen4-pcie", },
> > > > +	{},
> > > > +};
> > > > +
> > > > +static struct platform_driver rcar_gen4_pcie_driver = {
> > > > +	.driver = {
> > > > +		.name = "pcie-rcar-gen4",
> > > > +		.of_match_table = rcar_gen4_pcie_of_match,
> > > > +		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
> > > > +	},
> > > > +	.probe = rcar_gen4_pcie_probe,
> > > > +	.remove = rcar_gen4_pcie_remove,
> > > > +};
> > > > +module_platform_driver(rcar_gen4_pcie_driver);
> > > > +
> > > > +MODULE_DESCRIPTION("Renesas R-Car Gen4 PCIe host controller driver");
> > > > +MODULE_LICENSE("GPL");
> > > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.c b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > > new file mode 100644
> > > > index 000000000000..35923fda8ed5
> > > > --- /dev/null
> > > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > > @@ -0,0 +1,190 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > +/*
> > > > + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> > > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > > + */
> > > > +
> > > > +#include <linux/delay.h>
> > > > +#include <linux/io.h>
> > > > +#include <linux/of_device.h>
> > > > +#include <linux/pci.h>
> > > > +#include <linux/pm_runtime.h>
> > > > +#include <linux/reset.h>
> > > > +
> > > > +#include "pcie-rcar-gen4.h"
> > > > +#include "pcie-designware.h"
> > > > +
> > > > +/* Renesas-specific */
> > > > +#define PCIERSTCTRL1		0x0014
> > > > +#define  APP_HOLD_PHY_RST	BIT(16)
> > > > +#define  APP_LTSSM_ENABLE	BIT(0)
> > > > +
> > > > +#define RETRAIN_MAX_CHECK	10
> > > > +#define RETRAIN_MAX_RETRIES	10
> > > > +
> > > > +static void rcar_gen4_pcie_ltssm_enable(struct rcar_gen4_pcie *rcar,
> > > > +					bool enable)
> > > > +{
> > > > +	u32 val;
> > > > +
> > > > +	val = readl(rcar->base + PCIERSTCTRL1);
> > > > +	if (enable) {
> > > > +		val |= APP_LTSSM_ENABLE;
> > >
> > > > +		val &= ~APP_HOLD_PHY_RST;
> > >
> > > What about moving the APP_HOLD_PHY_RST de-assertion to the
> > > rcar_gen4_pcie_set_device_type() method? In accordance with the
> > > "3.1 Initialization" chapter it's supposed to be done before
> > > performing the DBI programming and activating the link training.
> >
> 
> > IIUC, the "3.1 Initialization" said app_hold_phy_rst = 1 before
> > performing the DBI programming. So, it is assertion. Also, the SoC
> > documentation described the initializing procedure as the follows:
> >  app_ltssm_enable = 1
> >  app_hold_phy_rst = 0
> > So, I would like to keep them in the function.
> 
> Indeed. I was wrong. Sorry for the misleading comment.

No problem. Thank you for the confirmation!

> >
> > > > +	} else {
> > > > +		val &= ~APP_LTSSM_ENABLE;
> > > > +		val |= APP_HOLD_PHY_RST;
> > > > +	}
> > > > +	writel(val, rcar->base + PCIERSTCTRL1);
> > > > +}
> > > > +
> > > > +static bool rcar_gen4_pcie_check_retrain_link(struct dw_pcie *dw)
> > > > +{
> > > > +	u8 offset = dw_pcie_find_capability(dw, PCI_CAP_ID_EXP);
> > > > +	u32 lnkcap = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCAP);
> > > > +	u32 lnkctl = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCTL);
> > > > +	u16 lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > > > +	int i;
> > > > +
> > >
> > > > +	if ((lnksta & PCI_EXP_LNKSTA_CLS) == (lnkcap & PCI_EXP_LNKCAP_SLS))
> > > > +		return true;
> > > > +
> > > > +	lnkctl |= PCI_EXP_LNKCTL_RL;
> > > > +	dw_pcie_writel_dbi(dw, offset + PCI_EXP_LNKCTL, lnkctl);
> > > > +
> > > > +	for (i = 0; i < RETRAIN_MAX_CHECK; i++) {
> > > > +		lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > > > +		if (lnksta & PCI_EXP_LNKSTA_LT)
> > > > +			return true;
> > > > +		usleep_range(1000, 1100);
> > > > +	}
> > >
> > > I'll ask one more time because you didn't respond to my previous note
> > > about this.
> >
> > I'm sorry. I completely overlooked the previous note.
> >
> > > Are you sure that this is needed? Did you try
> > > the approach described in "3.13 Gen2/3/4/5 Speed Modes" with
> > > de-asserting/asserting the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag?
> >
> > I tried this setting, but it doesn't work. I'll investigate this setting more.
> >
> > > I keep asking because the same problem we used to have on our hardware
> > > until we found out that the DIRECT_SPEED_CHANGE flag helped to train
> > > the link right to the speed specified in the capabilities.
> > >
> > > So here is what presumably you'll need to do (based on the
> > > "3.1 Initialization" and "3.13 Gen2/3/4/5 Speed Modes" chapters of
> > > the DW PCIe DM hw-manual):
> > > 1. Make sure the controller is in the power-down/reset state.
> > > 2. Select device_type (EP or RP).
> > > 3. De-assert the controller reset.
> > > 4. Clear PHY-reset flag in the app registers.
> > > 5. Perform some controller initializations.
> > > 6. Enable LTSSM to start link training.
> > > 7. Set GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag one more time.
> > >
> > > 1-4 are supposed to be done in rcar_gen4_pcie_host_init().
> > > 5 is performed in the framework of the DW PCIe core driver.
> > > 6-7 should be done in rcar_gen4_pcie_start_link().
> > >
> > > Note 1. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is already set on stage
> > > 5 in the framework of the dw_pcie_setup_rc() method. But in our case
> > > it only caused having the Gen.2 link speed. Adding stage 7 helped to
> > > get stable Gen.3 link. So please try the denoted approach. If it works
> > > what about adding stage 7 twice in order to get Gen.4 speed?
> > > (waiting for the DIRECT_SPEED_CHANGE flag being auto-cleared and then
> > > set it up again?)
> > >
> > > Note 2. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is defined as
> > > PCIE_LINK_WIDTH_SPEED_CONTROL.PORT_LOGIC_SPEED_CHANGE macros in the DW
> > > PCIe core driver.
> > >
> > > Note 3. If what is suggested above works well then you won't need to
> > > have the heavy rcar_gen4_pcie_check_retrain_link() method in the way
> > > you have it defined.
> >
> > Thank you very much for your comments!
> 
> Please see the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE description for details
> of how the flag is supposed to be de-asserted and asserted in order to
> initiate the direct speed change.

After I modified the start_link() like below, it also works. Is the code
acceptable? (Sorry all tabs are replaced to spaces...)
----------------------------------------------------------------------------
static bool rcar_gen4_pcie_check_current_link(struct dw_pcie *dw)
{
        u8 offset = dw_pcie_find_capability(dw, PCI_CAP_ID_EXP);
        u32 lnkcap = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCAP);
        u16 lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);

        if ((lnksta & PCI_EXP_LNKSTA_CLS) == (lnkcap & PCI_EXP_LNKCAP_SLS))
                return true;

        return false;
}

static void rcar_gen4_pcie_speed_change(struct dw_pcie *dw)
{
        u32 val;

        val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
        val &= ~PORT_LOGIC_SPEED_CHANGE;
        dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);

        val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
        val |= PORT_LOGIC_SPEED_CHANGE;
        dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
}

static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
{
        struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
        int i;

        rcar_gen4_pcie_ltssm_enable(rcar, true);

        /*
         * Require direct speed change here. Otherwise RDLH_LINK_UP of
         * PCIEINTSTS0 which is this controller specific register may not
         * be set.
         */
        if (rcar->needs_speed_change) {
                for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
                        rcar_gen4_pcie_speed_change(dw);
                        msleep(100);
                        if (rcar_gen4_pcie_check_current_link(dw))
                                return 0;
                }

                return -ETIMEDOUT;      /* Failed */
        }
------------------------------------------------------------------

> >
> > > > +
> > > > +	return false;
> > > > +}
> > > > +
> > > > +static int rcar_gen4_pcie_link_up(struct dw_pcie *dw)
> > > > +{
> > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > +	u32 val, mask;
> > > > +
> > > > +	val = readl(rcar->base + PCIEINTSTS0);
> > > > +	mask = RDLH_LINK_UP | SMLH_LINK_UP;
> > > > +
> > > > +	return (val & mask) == mask;
> > > > +}
> > > > +
> > > > +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > +{
> > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > +	int i;
> > > > +
> > > > +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > +
> > > > +	/*
> > > > +	 * Require retraining here. Otherwise RDLH_LINK_UP of PCIEINTSTS0 which
> > > > +	 * is this controller specific register may not be set.
> > > > +	 */
> > > > +	if (rcar->needs_retrain) {
> > > > +		for (i = 0; i < RETRAIN_MAX_RETRIES; i++) {
> > > > +			if (rcar_gen4_pcie_check_retrain_link(dw))
> > > > +				return 0;
> > > > +			msleep(100);
> > > > +		}
> > > > +
> > > > +		return -ETIMEDOUT;	/* Failed */
> > > > +	}
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +static void rcar_gen4_pcie_stop_link(struct dw_pcie *dw)
> > > > +{
> > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > +
> > > > +	rcar_gen4_pcie_ltssm_enable(rcar, false);
> > > > +}
> > > > +
> > >
> > > > +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> > > > +				   int num_lanes)
> > >
> > > 1. Number of lanes is already defined in the rcar_gen4_pcie.dw.num_lanes field.
> > > What about using it from there instead of passing it as an argument?
> > > 2. DW PCIe core driver has a very handy enum defined:
> > > dw_pcie_device_mode. It describes the controller modes (End-point,
> > > Root port, etc). What about adding the mode field right to the
> > > rcar_gen4_pcie structure and initializing it in someplace in probe() ?
> > > 3. Based on the function semantic it's better to be named as something
> > > like rcar_gen4_pcie_init_device() or even rcar_gen4_pcie_basic_init().
> >
> > Thank you for your comments! I'll modify the function.
> >
> > >
> > > > +{
> > > > +	u32 val;
> > > > +
> > >
> > > > +	/* Note: Assume the rcar->rst which is Cold-reset is asserted here */
> > >
> > > What about directly asserting it here then? In accordance with the DW
> > > PCIe DM manual the "device_type" input must be set before the DM
> > > controller is powered up (basically un-reset). What if the controller
> > > reset is already de-asserted, but you are going to changes its mode?
> > > In that case the mode won't be changed and you'll end up with
> > > unpredictable results.
> >
> > Thank you for your comment. We should add asserting it here as you mentioned.
> >
> > > > +	val = readl(rcar->base + PCIEMSR0);
> > > > +	if (rc)
> > > > +		val |= DEVICE_TYPE_RC;
> > > > +	else
> > > > +		val |= DEVICE_TYPE_EP;
> > > > +
> > > > +	if (num_lanes < 4)
> > > > +		val |= BIFUR_MOD_SET_ON;
> > > > +
> > > > +	writel(val, rcar->base + PCIEMSR0);
> > > > +
> > > > +	return reset_control_deassert(rcar->rst);
> > > > +}
> > > > +
> > > > +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *rcar)
> > > > +{
> > > > +	struct device *dev = rcar->dw.dev;
> > > > +	int err;
> > > > +
> > > > +	pm_runtime_enable(dev);
> > > > +	err = pm_runtime_resume_and_get(dev);
> > > > +	if (err < 0) {
> > > > +		dev_err(dev, "Failed to resume/get Runtime PM\n");
> > > > +		pm_runtime_disable(dev);
> > > > +	}
> > > > +
> > > > +	return err;
> > > > +}
> > > > +
> > > > +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *rcar)
> > > > +{
> > > > +	struct device *dev = rcar->dw.dev;
> > > > +
> > > > +	if (!reset_control_status(rcar->rst))
> > > > +		reset_control_assert(rcar->rst);
> > > > +	pm_runtime_put(dev);
> > > > +	pm_runtime_disable(dev);
> > > > +}
> > > > +
> > > > +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> > > > +				 struct platform_device *pdev)
> > > > +{
> > > > +	struct device *dev = rcar->dw.dev;
> > > > +
> > > > +	/* Renesas-specific registers */
> > > > +	rcar->base = devm_platform_ioremap_resource_byname(pdev, "app");
> > > > +	if (IS_ERR(rcar->base))
> > > > +		return PTR_ERR(rcar->base);
> > > > +
> > >
> > > > +	rcar->rst = devm_reset_control_get(dev, NULL);
> > > > +	if (IS_ERR(rcar->rst)) {
> > > > +		dev_err(dev, "Failed to get Cold-reset\n");
> > >
> > > So AFAICS your platform is equipped with the DWC_pcie_clkrst.v module.
> > > Thus all the resets are appropriately cleared by a single flag:
> > > power_up_rst_n. What about using the named reset in this case with the
> > > "pwr" name? Thus you'll be able to drop the manual
> > > devm_reset_control_get() invocation and instead use the reset-resources
> > > requested in the framework of the generic dw_pcie_get_resources()
> > > method? Note you'll need to move the dw_pcie_cap_set(dw, REQ_RES);
> > > statement to rcar_gen4_pcie_devm_alloc() then and drop the
> > > rcar_gen4_pcie.rst field afterwords.
> >
> > Thank you for your suggestion! Using "pwr" can work on my environment.
> >
> > > By the way I don't see you requesting and enabling the reference
> > > clock in your driver but the bindings imply the clock source. How
> > > come?
> >
> 
> > For now, I used gpio-hog to enable the reference clock. But, it seem
> > I should use "ref" clock for it. So, I'll fix it too.
> 
> Not sure what gpio-hog you are talking about. Do you mean the pe_rst
> signal or some another gpio? I failed to see of how pe_rst is
> connected to the clock source. In anyway directly handling the clock
> source would be more portable choice.

Sorry for lacking information. I described a gpio node like below
and then the gpio will be high automatically, and the reference clock
will be output. But, this is completely independent from pci.
---
&gpio2 {
        pci-clkreq0-hog {
                gpio-hog;
                gpios = <15 GPIO_ACTIVE_LOW>;
                output-high;
        };
};
---

Now I could implement the clock handling by using "gpio-gate-clock".
So, I'll drop the gpio-hog for the reference clock.

Best regards,
Yoshihiro Shimoda

> -Serge(y)
> 
> >
> > > > +		return PTR_ERR(rcar->rst);
> > > > +	}
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +static const struct dw_pcie_ops dw_pcie_ops = {
> > > > +	.start_link = rcar_gen4_pcie_start_link,
> > > > +	.stop_link = rcar_gen4_pcie_stop_link,
> > > > +	.link_up = rcar_gen4_pcie_link_up,
> > > > +};
> > > > +
> > > > +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev)
> > > > +{
> > > > +	struct rcar_gen4_pcie *rcar;
> > > > +
> > > > +	rcar = devm_kzalloc(dev, sizeof(*rcar), GFP_KERNEL);
> > > > +	if (!rcar)
> > > > +		return NULL;
> > > > +
> > > > +	rcar->dw.dev = dev;
> > > > +	rcar->dw.ops = &dw_pcie_ops;
> > > > +	dw_pcie_cap_set(&rcar->dw, EDMA_UNROLL);
> > > > +
> > > > +	return rcar;
> > > > +}
> > > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.h b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > > new file mode 100644
> > > > index 000000000000..fec3f18609f4
> > > > --- /dev/null
> > > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > > @@ -0,0 +1,46 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > > +/*
> > > > + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> > > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > > + */
> > > > +
> > > > +#ifndef _PCIE_RCAR_GEN4_H_
> > > > +#define _PCIE_RCAR_GEN4_H_
> > > > +
> > > > +#include <linux/io.h>
> > > > +#include <linux/pci.h>
> > > > +#include <linux/reset.h>
> > > > +
> > > > +#include "pcie-designware.h"
> > > > +
> > > > +/* Renesas-specific */
> > > > +#define PCIEMSR0		0x0000
> > > > +#define  BIFUR_MOD_SET_ON	BIT(0)
> > > > +#define  DEVICE_TYPE_EP		0
> > > > +#define  DEVICE_TYPE_RC		BIT(4)
> > > > +
> > > > +#define PCIEINTSTS0		0x0084
> > > > +#define PCIEINTSTS0EN		0x0310
> > > > +#define  MSI_CTRL_INT		BIT(26)
> > > > +#define  SMLH_LINK_UP		BIT(7)
> > > > +#define  RDLH_LINK_UP		BIT(6)
> > > > +#define PCIEDMAINTSTSEN		0x0314
> > > > +#define  PCIEDMAINTSTSEN_INIT	GENMASK(15, 0)
> > > > +
> > >
> > > > +struct rcar_gen4_pcie {
> > >
> > > As I mentioned above this structure can be extended with the enum
> > > dw_pcie_device_mode field thus dropping the boolean argument from the
> > > rcar_gen4_pcie_set_device_type() method.
> >
> > I got it. I'll fix this.
> >
> > > > +	struct dw_pcie		dw;
> > >
> > > As I already mentioned above the dw.num_lanes could be used instead of
> > > passing it as the rcar_gen4_pcie_set_device_type() argument.
> >
> > I'll fix this too.
> >
> > Best regards,
> > Yoshihiro Shimoda
> >
> > > -Serge(y)
> > >
> > > > +	void __iomem		*base;
> > > > +	struct reset_control	*rst;
> > > > +	bool			needs_retrain;
> > > > +};
> > > > +#define to_rcar_gen4_pcie(x)	dev_get_drvdata((x)->dev)
> > > > +
> > > > +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> > > > +				   int num_lanes);
> > > > +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *pcie);
> > > > +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *pcie);
> > > > +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> > > > +				 struct platform_device *pdev);
> > > > +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev);
> > > > +
> > > > +#endif /* _PCIE_RCAR_GEN4_H_ */
> > > > --
> > > > 2.25.1
> > > >
Serge Semin June 8, 2023, 12:11 p.m. UTC | #5
On Thu, Jun 08, 2023 at 08:47:16AM +0000, Yoshihiro Shimoda wrote:
> Hello Serge,
> 
> > From: Serge Semin, Sent: Wednesday, June 7, 2023 9:16 PM
> > 
> > On Wed, Jun 07, 2023 at 02:59:20AM +0000, Yoshihiro Shimoda wrote:
> > > Hello Serge,
> > >
> > > > From: Serge Semin, Sent: Monday, June 5, 2023 11:39 PM
> > > >
> > > > On Wed, May 10, 2023 at 03:22:31PM +0900, Yoshihiro Shimoda wrote:
> > > > > Add R-Car Gen4 PCIe Host support. This controller is based on
> > > > > Synopsys DesignWare PCIe, but this controller has vendor-specific
> > > > > registers so that requires initialization code like mode setting
> > > > > and retraining and so on.
> > > > >
> > > > > To reduce code delta, adds some helper functions which are used by
> > > > > both the host driver and the endpoint driver (which is added
> > > > > immediately afterwards) into a separate file.
> > > > >
> > > > > Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> > > > > ---
> > > > >  drivers/pci/controller/dwc/Kconfig            |   9 +
> > > > >  drivers/pci/controller/dwc/Makefile           |   2 +
> > > > >  .../pci/controller/dwc/pcie-rcar-gen4-host.c  | 141 +++++++++++++
> > > > >  drivers/pci/controller/dwc/pcie-rcar-gen4.c   | 190 ++++++++++++++++++
> > > > >  drivers/pci/controller/dwc/pcie-rcar-gen4.h   |  46 +++++
> > > > >  5 files changed, 388 insertions(+)
> > > > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > > >
> > > > > diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
> > > > > index ab96da43e0c2..64d4d37bc891 100644
> > > > > --- a/drivers/pci/controller/dwc/Kconfig
> > > > > +++ b/drivers/pci/controller/dwc/Kconfig
> > > > > @@ -415,4 +415,13 @@ config PCIE_VISCONTI_HOST
> > > > >  	  Say Y here if you want PCIe controller support on Toshiba Visconti SoC.
> > > > >  	  This driver supports TMPV7708 SoC.
> > > > >
> > > > > +config PCIE_RCAR_GEN4
> > > > > +	tristate "Renesas R-Car Gen4 PCIe Host controller"
> > > > > +	depends on ARCH_RENESAS || COMPILE_TEST
> > > > > +	depends on PCI_MSI
> > > > > +	select PCIE_DW_HOST
> > > > > +	help
> > > > > +	  Say Y here if you want PCIe host controller support on R-Car Gen4 SoCs.
> > > > > +	  This uses the DesignWare core.
> > > > > +
> > > > >  endmenu
> > > > > diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
> > > > > index bf5c311875a1..486cf706b53d 100644
> > > > > --- a/drivers/pci/controller/dwc/Makefile
> > > > > +++ b/drivers/pci/controller/dwc/Makefile
> > > > > @@ -26,6 +26,8 @@ obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
> > > > >  obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
> > > > >  obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
> > > > >  obj-$(CONFIG_PCIE_VISCONTI_HOST) += pcie-visconti.o
> > > > > +pcie-rcar-gen4-host-drv-objs := pcie-rcar-gen4.o pcie-rcar-gen4-host.o
> > > > > +obj-$(CONFIG_PCIE_RCAR_GEN4) += pcie-rcar-gen4-host-drv.o
> > > > >
> > > > >  # The following drivers are for devices that use the generic ACPI
> > > > >  # pci_root.c driver but don't support standard ECAM config access.
> > > > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > > > new file mode 100644
> > > > > index 000000000000..df7d80f1874f
> > > > > --- /dev/null
> > > > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > > > @@ -0,0 +1,141 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > > +/*
> > > > > + * PCIe host controller driver for Renesas R-Car Gen4 Series SoCs
> > > > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > > > + */
> > > > > +
> > > > > +#include <linux/delay.h>
> > > > > +#include <linux/interrupt.h>
> > > > > +#include <linux/module.h>
> > > > > +#include <linux/of_device.h>
> > > > > +#include <linux/pci.h>
> > > > > +#include <linux/platform_device.h>
> > > > > +
> > > > > +#include "pcie-rcar-gen4.h"
> > > > > +#include "pcie-designware.h"
> > > > > +
> > > > > +static int rcar_gen4_pcie_host_init(struct dw_pcie_rp *pp)
> > > > > +{
> > > > > +	struct dw_pcie *dw = to_dw_pcie_from_pp(pp);
> > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > +	int ret;
> > > > > +	u32 val;
> > > > > +
> > > > > +	gpiod_set_value_cansleep(dw->pe_rst, 1);
> > > > > +
> > > > > +	ret = rcar_gen4_pcie_set_device_type(rcar, true, dw->num_lanes);
> > > > > +	if (ret < 0)
> > > > > +		return ret;
> > > > > +
> > > >
> > > > > +	dw_pcie_dbi_ro_wr_en(dw);
> > > >
> > > > Are you sure dw_pcie_dbi_ro_wr_en() and dw_pcie_dbi_ro_wr_dis() are
> > > > needed? In accordance with the DW PCIe Dual-mode HW manual the BARx
> > > > registers are W-only over the DBI2 map with no need in setting the
> > > > DBI_RO_WR_EN flag.
> > > >
> > > > Please check that on your hardware.
> > >
> > > You're correct. They are not needed. So, I'll drop this on v17.
> > >
> > > > > +
> > > > > +	/*
> > > > > +	 * According to the section 3.5.7.2 "RC Mode" in DWC PCIe Dual Mode
> > > > > +	 * Rev.5.20a, we should disable two BARs to avoid unnecessary memory
> > > > > +	 * assignment during device enumeration.
> > > > > +	 */
> > > > > +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_0, 0x0);
> > > > > +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_1, 0x0);
> > > > > +
> > > >
> > > > > +	dw_pcie_dbi_ro_wr_dis(dw);
> > > >
> > > > ditto
> > >
> > > I'll drop this too.
> > >
> > > > > +
> > > > > +	if (IS_ENABLED(CONFIG_PCI_MSI)) {
> > > > > +		/* Enable MSI interrupt signal */
> > > > > +		val = readl(rcar->base + PCIEINTSTS0EN);
> > > > > +		val |= MSI_CTRL_INT;
> > > > > +		writel(val, rcar->base + PCIEINTSTS0EN);
> > > > > +	}
> > > > > +
> > > > > +	msleep(100);	/* pe_rst requires 100msec delay */
> > > > > +
> > > > > +	gpiod_set_value_cansleep(dw->pe_rst, 0);
> > > > > +
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > > +static const struct dw_pcie_host_ops rcar_gen4_pcie_host_ops = {
> > > > > +	.host_init = rcar_gen4_pcie_host_init,
> > > > > +};
> > > > > +
> > > > > +static int rcar_gen4_add_dw_pcie_rp(struct rcar_gen4_pcie *rcar,
> > > > > +				   struct platform_device *pdev)
> > > > > +{
> > > > > +	struct dw_pcie *dw = &rcar->dw;
> > > > > +	struct dw_pcie_rp *pp = &dw->pp;
> > > > > +
> > > > > +	pp->num_vectors = MAX_MSI_IRQS;
> > > > > +	pp->ops = &rcar_gen4_pcie_host_ops;
> > > > > +	dw_pcie_cap_set(dw, REQ_RES);
> > > > > +
> > > > > +	return dw_pcie_host_init(pp);
> > > > > +}
> > > > > +
> > > > > +static void rcar_gen4_remove_dw_pcie_rp(struct rcar_gen4_pcie *rcar)
> > > > > +{
> > > > > +	dw_pcie_host_deinit(&rcar->dw.pp);
> > > > > +	gpiod_set_value_cansleep(rcar->dw.pe_rst, 1);
> > > > > +}
> > > > > +
> > > > > +static int rcar_gen4_pcie_probe(struct platform_device *pdev)
> > > > > +{
> > > > > +	struct device *dev = &pdev->dev;
> > > > > +	struct rcar_gen4_pcie *rcar;
> > > > > +	int err;
> > > > > +
> > > > > +	rcar = rcar_gen4_pcie_devm_alloc(dev);
> > > > > +	if (!rcar)
> > > > > +		return -ENOMEM;
> > > > > +
> > > > > +	err = rcar_gen4_pcie_get_resources(rcar, pdev);
> > > > > +	if (err < 0) {
> > > > > +		dev_err(dev, "Failed to request resource: %d\n", err);
> > > > > +		return err;
> > > > > +	}
> > > > > +
> > > > > +	platform_set_drvdata(pdev, rcar);
> > > > > +
> > > > > +	err = rcar_gen4_pcie_prepare(rcar);
> > > > > +	if (err < 0)
> > > > > +		return err;
> > > > > +
> > > > > +	rcar->needs_retrain = true;
> > > > > +	err = rcar_gen4_add_dw_pcie_rp(rcar, pdev);
> > > > > +	if (err < 0)
> > > > > +		goto err_add;
> > > > > +
> > > > > +	return 0;
> > > > > +
> > > > > +err_add:
> > > > > +	rcar_gen4_pcie_unprepare(rcar);
> > > > > +
> > > > > +	return err;
> > > > > +}
> > > > > +
> > > > > +static int rcar_gen4_pcie_remove(struct platform_device *pdev)
> > > > > +{
> > > > > +	struct rcar_gen4_pcie *rcar = platform_get_drvdata(pdev);
> > > > > +
> > > > > +	rcar_gen4_remove_dw_pcie_rp(rcar);
> > > > > +	rcar_gen4_pcie_unprepare(rcar);
> > > > > +
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > > +static const struct of_device_id rcar_gen4_pcie_of_match[] = {
> > > > > +	{ .compatible = "renesas,rcar-gen4-pcie", },
> > > > > +	{},
> > > > > +};
> > > > > +
> > > > > +static struct platform_driver rcar_gen4_pcie_driver = {
> > > > > +	.driver = {
> > > > > +		.name = "pcie-rcar-gen4",
> > > > > +		.of_match_table = rcar_gen4_pcie_of_match,
> > > > > +		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
> > > > > +	},
> > > > > +	.probe = rcar_gen4_pcie_probe,
> > > > > +	.remove = rcar_gen4_pcie_remove,
> > > > > +};
> > > > > +module_platform_driver(rcar_gen4_pcie_driver);
> > > > > +
> > > > > +MODULE_DESCRIPTION("Renesas R-Car Gen4 PCIe host controller driver");
> > > > > +MODULE_LICENSE("GPL");
> > > > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.c b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > > > new file mode 100644
> > > > > index 000000000000..35923fda8ed5
> > > > > --- /dev/null
> > > > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > > > @@ -0,0 +1,190 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > > +/*
> > > > > + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> > > > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > > > + */
> > > > > +
> > > > > +#include <linux/delay.h>
> > > > > +#include <linux/io.h>
> > > > > +#include <linux/of_device.h>
> > > > > +#include <linux/pci.h>
> > > > > +#include <linux/pm_runtime.h>
> > > > > +#include <linux/reset.h>
> > > > > +
> > > > > +#include "pcie-rcar-gen4.h"
> > > > > +#include "pcie-designware.h"
> > > > > +
> > > > > +/* Renesas-specific */
> > > > > +#define PCIERSTCTRL1		0x0014
> > > > > +#define  APP_HOLD_PHY_RST	BIT(16)
> > > > > +#define  APP_LTSSM_ENABLE	BIT(0)
> > > > > +
> > > > > +#define RETRAIN_MAX_CHECK	10
> > > > > +#define RETRAIN_MAX_RETRIES	10
> > > > > +
> > > > > +static void rcar_gen4_pcie_ltssm_enable(struct rcar_gen4_pcie *rcar,
> > > > > +					bool enable)
> > > > > +{
> > > > > +	u32 val;
> > > > > +
> > > > > +	val = readl(rcar->base + PCIERSTCTRL1);
> > > > > +	if (enable) {
> > > > > +		val |= APP_LTSSM_ENABLE;
> > > >
> > > > > +		val &= ~APP_HOLD_PHY_RST;
> > > >
> > > > What about moving the APP_HOLD_PHY_RST de-assertion to the
> > > > rcar_gen4_pcie_set_device_type() method? In accordance with the
> > > > "3.1 Initialization" chapter it's supposed to be done before
> > > > performing the DBI programming and activating the link training.
> > >
> > 
> > > IIUC, the "3.1 Initialization" said app_hold_phy_rst = 1 before
> > > performing the DBI programming. So, it is assertion. Also, the SoC
> > > documentation described the initializing procedure as the follows:
> > >  app_ltssm_enable = 1
> > >  app_hold_phy_rst = 0
> > > So, I would like to keep them in the function.
> > 
> > Indeed. I was wrong. Sorry for the misleading comment.
> 
> No problem. Thank you for the confirmation!
> 
> > >
> > > > > +	} else {
> > > > > +		val &= ~APP_LTSSM_ENABLE;
> > > > > +		val |= APP_HOLD_PHY_RST;
> > > > > +	}
> > > > > +	writel(val, rcar->base + PCIERSTCTRL1);
> > > > > +}
> > > > > +
> > > > > +static bool rcar_gen4_pcie_check_retrain_link(struct dw_pcie *dw)
> > > > > +{
> > > > > +	u8 offset = dw_pcie_find_capability(dw, PCI_CAP_ID_EXP);
> > > > > +	u32 lnkcap = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCAP);
> > > > > +	u32 lnkctl = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCTL);
> > > > > +	u16 lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > > > > +	int i;
> > > > > +
> > > >
> > > > > +	if ((lnksta & PCI_EXP_LNKSTA_CLS) == (lnkcap & PCI_EXP_LNKCAP_SLS))
> > > > > +		return true;
> > > > > +
> > > > > +	lnkctl |= PCI_EXP_LNKCTL_RL;
> > > > > +	dw_pcie_writel_dbi(dw, offset + PCI_EXP_LNKCTL, lnkctl);
> > > > > +
> > > > > +	for (i = 0; i < RETRAIN_MAX_CHECK; i++) {
> > > > > +		lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > > > > +		if (lnksta & PCI_EXP_LNKSTA_LT)
> > > > > +			return true;
> > > > > +		usleep_range(1000, 1100);
> > > > > +	}
> > > >
> > > > I'll ask one more time because you didn't respond to my previous note
> > > > about this.
> > >
> > > I'm sorry. I completely overlooked the previous note.
> > >
> > > > Are you sure that this is needed? Did you try
> > > > the approach described in "3.13 Gen2/3/4/5 Speed Modes" with
> > > > de-asserting/asserting the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag?
> > >
> > > I tried this setting, but it doesn't work. I'll investigate this setting more.
> > >
> > > > I keep asking because the same problem we used to have on our hardware
> > > > until we found out that the DIRECT_SPEED_CHANGE flag helped to train
> > > > the link right to the speed specified in the capabilities.
> > > >
> > > > So here is what presumably you'll need to do (based on the
> > > > "3.1 Initialization" and "3.13 Gen2/3/4/5 Speed Modes" chapters of
> > > > the DW PCIe DM hw-manual):
> > > > 1. Make sure the controller is in the power-down/reset state.
> > > > 2. Select device_type (EP or RP).
> > > > 3. De-assert the controller reset.
> > > > 4. Clear PHY-reset flag in the app registers.
> > > > 5. Perform some controller initializations.
> > > > 6. Enable LTSSM to start link training.
> > > > 7. Set GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag one more time.
> > > >
> > > > 1-4 are supposed to be done in rcar_gen4_pcie_host_init().
> > > > 5 is performed in the framework of the DW PCIe core driver.
> > > > 6-7 should be done in rcar_gen4_pcie_start_link().
> > > >
> > > > Note 1. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is already set on stage
> > > > 5 in the framework of the dw_pcie_setup_rc() method. But in our case
> > > > it only caused having the Gen.2 link speed. Adding stage 7 helped to
> > > > get stable Gen.3 link. So please try the denoted approach. If it works
> > > > what about adding stage 7 twice in order to get Gen.4 speed?
> > > > (waiting for the DIRECT_SPEED_CHANGE flag being auto-cleared and then
> > > > set it up again?)
> > > >
> > > > Note 2. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is defined as
> > > > PCIE_LINK_WIDTH_SPEED_CONTROL.PORT_LOGIC_SPEED_CHANGE macros in the DW
> > > > PCIe core driver.
> > > >
> > > > Note 3. If what is suggested above works well then you won't need to
> > > > have the heavy rcar_gen4_pcie_check_retrain_link() method in the way
> > > > you have it defined.
> > >
> > > Thank you very much for your comments!
> > 
> > Please see the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE description for details
> > of how the flag is supposed to be de-asserted and asserted in order to
> > initiate the direct speed change.
> 
> After I modified the start_link() like below, it also works. Is the code
> acceptable? (Sorry all tabs are replaced to spaces...)

Looks good, but still there are some questions.

> ----------------------------------------------------------------------------
> static bool rcar_gen4_pcie_check_current_link(struct dw_pcie *dw)
> {
>         u8 offset = dw_pcie_find_capability(dw, PCI_CAP_ID_EXP);
>         u32 lnkcap = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCAP);
>         u16 lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> 

>         if ((lnksta & PCI_EXP_LNKSTA_CLS) == (lnkcap & PCI_EXP_LNKCAP_SLS))
>                 return true;

AFAICS depending on the link partner speed capabilities this may never
happen.

PCI_EXP_LNKCAP_SLS - Max Link Speed. This field indicates the maximum Link
speed of the associated Port.
PCI_EXP_LNKSTA_CLS - Current Link Speed. This field indicates the negotiated
Link speed of the given PCI Express Link

What if a link partner has the speed capability weaker than the link
speed of the Root Port? If so then the current link speed will never
reach the max link speed value.

Of course this can be fixed by specifying a correct "max-link-speed"
property, but what if a platform has a cold-swappable port connected to
the root port? Since any device can be attached you'll never be able
to predict its capabilities beforahead.

> 
>         return false;
> }
> 
> static void rcar_gen4_pcie_speed_change(struct dw_pcie *dw)
> {
>         u32 val;
> 
>         val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
>         val &= ~PORT_LOGIC_SPEED_CHANGE;
>         dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> 
>         val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
>         val |= PORT_LOGIC_SPEED_CHANGE;
>         dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> }
> 
> static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> {
>         struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
>         int i;
> 
>         rcar_gen4_pcie_ltssm_enable(rcar, true);
> 
>         /*
>          * Require direct speed change here. Otherwise RDLH_LINK_UP of
>          * PCIEINTSTS0 which is this controller specific register may not
>          * be set.
>          */

>         if (rcar->needs_speed_change) {

Seeing this is specified for the root port only what about
replacing the statement with just test whether the rcar_gen4_pcie.mode ==
DW_PCIE_RC_TYPE? Thus you'll be ablt to drop the needs_speed_change field.

BTW Just curious. Why is the loop below enabled for the Root Port
only? What about the end-point controller? It's the same hardware
after all..

>                 for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
>                         rcar_gen4_pcie_speed_change(dw);
>                         msleep(100);
>                         if (rcar_gen4_pcie_check_current_link(dw))
>                                 return 0;
>                 }

Did you trace how many iterations this loop normally takes? Is it
constant or varies for the same platform setup and a connected link
partner? Does the number of iterations depend on the target link speed
specified via the "max-link-speed" property?

I am just trying to understand whether we can completely get rid from
the rcar_gen4_pcie_check_current_link() method and have it replaced
with several rcar_gen4_pcie_speed_change() calls. The current link
state would have been checked in the framework of the
dw_pcie_wait_for_link() method which calls dw_pcie_link_up() and your
rcar_gen4_pcie_link_up() in order to make sure the link is actually up.

-Serge(y)

> 
>                 return -ETIMEDOUT;      /* Failed */
>         }
> ------------------------------------------------------------------
> 
> > >
> > > > > +
> > > > > +	return false;
> > > > > +}
> > > > > +
> > > > > +static int rcar_gen4_pcie_link_up(struct dw_pcie *dw)
> > > > > +{
> > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > +	u32 val, mask;
> > > > > +
> > > > > +	val = readl(rcar->base + PCIEINTSTS0);
> > > > > +	mask = RDLH_LINK_UP | SMLH_LINK_UP;
> > > > > +
> > > > > +	return (val & mask) == mask;
> > > > > +}
> > > > > +
> > > > > +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > > +{
> > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > +	int i;
> > > > > +
> > > > > +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > +
> > > > > +	/*
> > > > > +	 * Require retraining here. Otherwise RDLH_LINK_UP of PCIEINTSTS0 which
> > > > > +	 * is this controller specific register may not be set.
> > > > > +	 */
> > > > > +	if (rcar->needs_retrain) {
> > > > > +		for (i = 0; i < RETRAIN_MAX_RETRIES; i++) {
> > > > > +			if (rcar_gen4_pcie_check_retrain_link(dw))
> > > > > +				return 0;
> > > > > +			msleep(100);
> > > > > +		}
> > > > > +
> > > > > +		return -ETIMEDOUT;	/* Failed */
> > > > > +	}
> > > > > +
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > > +static void rcar_gen4_pcie_stop_link(struct dw_pcie *dw)
> > > > > +{
> > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > +
> > > > > +	rcar_gen4_pcie_ltssm_enable(rcar, false);
> > > > > +}
> > > > > +
> > > >
> > > > > +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> > > > > +				   int num_lanes)
> > > >
> > > > 1. Number of lanes is already defined in the rcar_gen4_pcie.dw.num_lanes field.
> > > > What about using it from there instead of passing it as an argument?
> > > > 2. DW PCIe core driver has a very handy enum defined:
> > > > dw_pcie_device_mode. It describes the controller modes (End-point,
> > > > Root port, etc). What about adding the mode field right to the
> > > > rcar_gen4_pcie structure and initializing it in someplace in probe() ?
> > > > 3. Based on the function semantic it's better to be named as something
> > > > like rcar_gen4_pcie_init_device() or even rcar_gen4_pcie_basic_init().
> > >
> > > Thank you for your comments! I'll modify the function.
> > >
> > > >
> > > > > +{
> > > > > +	u32 val;
> > > > > +
> > > >
> > > > > +	/* Note: Assume the rcar->rst which is Cold-reset is asserted here */
> > > >
> > > > What about directly asserting it here then? In accordance with the DW
> > > > PCIe DM manual the "device_type" input must be set before the DM
> > > > controller is powered up (basically un-reset). What if the controller
> > > > reset is already de-asserted, but you are going to changes its mode?
> > > > In that case the mode won't be changed and you'll end up with
> > > > unpredictable results.
> > >
> > > Thank you for your comment. We should add asserting it here as you mentioned.
> > >
> > > > > +	val = readl(rcar->base + PCIEMSR0);
> > > > > +	if (rc)
> > > > > +		val |= DEVICE_TYPE_RC;
> > > > > +	else
> > > > > +		val |= DEVICE_TYPE_EP;
> > > > > +
> > > > > +	if (num_lanes < 4)
> > > > > +		val |= BIFUR_MOD_SET_ON;
> > > > > +
> > > > > +	writel(val, rcar->base + PCIEMSR0);
> > > > > +
> > > > > +	return reset_control_deassert(rcar->rst);
> > > > > +}
> > > > > +
> > > > > +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *rcar)
> > > > > +{
> > > > > +	struct device *dev = rcar->dw.dev;
> > > > > +	int err;
> > > > > +
> > > > > +	pm_runtime_enable(dev);
> > > > > +	err = pm_runtime_resume_and_get(dev);
> > > > > +	if (err < 0) {
> > > > > +		dev_err(dev, "Failed to resume/get Runtime PM\n");
> > > > > +		pm_runtime_disable(dev);
> > > > > +	}
> > > > > +
> > > > > +	return err;
> > > > > +}
> > > > > +
> > > > > +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *rcar)
> > > > > +{
> > > > > +	struct device *dev = rcar->dw.dev;
> > > > > +
> > > > > +	if (!reset_control_status(rcar->rst))
> > > > > +		reset_control_assert(rcar->rst);
> > > > > +	pm_runtime_put(dev);
> > > > > +	pm_runtime_disable(dev);
> > > > > +}
> > > > > +
> > > > > +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> > > > > +				 struct platform_device *pdev)
> > > > > +{
> > > > > +	struct device *dev = rcar->dw.dev;
> > > > > +
> > > > > +	/* Renesas-specific registers */
> > > > > +	rcar->base = devm_platform_ioremap_resource_byname(pdev, "app");
> > > > > +	if (IS_ERR(rcar->base))
> > > > > +		return PTR_ERR(rcar->base);
> > > > > +
> > > >
> > > > > +	rcar->rst = devm_reset_control_get(dev, NULL);
> > > > > +	if (IS_ERR(rcar->rst)) {
> > > > > +		dev_err(dev, "Failed to get Cold-reset\n");
> > > >
> > > > So AFAICS your platform is equipped with the DWC_pcie_clkrst.v module.
> > > > Thus all the resets are appropriately cleared by a single flag:
> > > > power_up_rst_n. What about using the named reset in this case with the
> > > > "pwr" name? Thus you'll be able to drop the manual
> > > > devm_reset_control_get() invocation and instead use the reset-resources
> > > > requested in the framework of the generic dw_pcie_get_resources()
> > > > method? Note you'll need to move the dw_pcie_cap_set(dw, REQ_RES);
> > > > statement to rcar_gen4_pcie_devm_alloc() then and drop the
> > > > rcar_gen4_pcie.rst field afterwords.
> > >
> > > Thank you for your suggestion! Using "pwr" can work on my environment.
> > >
> > > > By the way I don't see you requesting and enabling the reference
> > > > clock in your driver but the bindings imply the clock source. How
> > > > come?
> > >
> > 
> > > For now, I used gpio-hog to enable the reference clock. But, it seem
> > > I should use "ref" clock for it. So, I'll fix it too.
> > 
> > Not sure what gpio-hog you are talking about. Do you mean the pe_rst
> > signal or some another gpio? I failed to see of how pe_rst is
> > connected to the clock source. In anyway directly handling the clock
> > source would be more portable choice.
> 
> Sorry for lacking information. I described a gpio node like below
> and then the gpio will be high automatically, and the reference clock
> will be output. But, this is completely independent from pci.
> ---
> &gpio2 {
>         pci-clkreq0-hog {
>                 gpio-hog;
>                 gpios = <15 GPIO_ACTIVE_LOW>;
>                 output-high;
>         };
> };
> ---
> 
> Now I could implement the clock handling by using "gpio-gate-clock".
> So, I'll drop the gpio-hog for the reference clock.
> 
> Best regards,
> Yoshihiro Shimoda
> 
> > -Serge(y)
> > 
> > >
> > > > > +		return PTR_ERR(rcar->rst);
> > > > > +	}
> > > > > +
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > > +static const struct dw_pcie_ops dw_pcie_ops = {
> > > > > +	.start_link = rcar_gen4_pcie_start_link,
> > > > > +	.stop_link = rcar_gen4_pcie_stop_link,
> > > > > +	.link_up = rcar_gen4_pcie_link_up,
> > > > > +};
> > > > > +
> > > > > +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev)
> > > > > +{
> > > > > +	struct rcar_gen4_pcie *rcar;
> > > > > +
> > > > > +	rcar = devm_kzalloc(dev, sizeof(*rcar), GFP_KERNEL);
> > > > > +	if (!rcar)
> > > > > +		return NULL;
> > > > > +
> > > > > +	rcar->dw.dev = dev;
> > > > > +	rcar->dw.ops = &dw_pcie_ops;
> > > > > +	dw_pcie_cap_set(&rcar->dw, EDMA_UNROLL);
> > > > > +
> > > > > +	return rcar;
> > > > > +}
> > > > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.h b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > > > new file mode 100644
> > > > > index 000000000000..fec3f18609f4
> > > > > --- /dev/null
> > > > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > > > @@ -0,0 +1,46 @@
> > > > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > > > +/*
> > > > > + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> > > > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > > > + */
> > > > > +
> > > > > +#ifndef _PCIE_RCAR_GEN4_H_
> > > > > +#define _PCIE_RCAR_GEN4_H_
> > > > > +
> > > > > +#include <linux/io.h>
> > > > > +#include <linux/pci.h>
> > > > > +#include <linux/reset.h>
> > > > > +
> > > > > +#include "pcie-designware.h"
> > > > > +
> > > > > +/* Renesas-specific */
> > > > > +#define PCIEMSR0		0x0000
> > > > > +#define  BIFUR_MOD_SET_ON	BIT(0)
> > > > > +#define  DEVICE_TYPE_EP		0
> > > > > +#define  DEVICE_TYPE_RC		BIT(4)
> > > > > +
> > > > > +#define PCIEINTSTS0		0x0084
> > > > > +#define PCIEINTSTS0EN		0x0310
> > > > > +#define  MSI_CTRL_INT		BIT(26)
> > > > > +#define  SMLH_LINK_UP		BIT(7)
> > > > > +#define  RDLH_LINK_UP		BIT(6)
> > > > > +#define PCIEDMAINTSTSEN		0x0314
> > > > > +#define  PCIEDMAINTSTSEN_INIT	GENMASK(15, 0)
> > > > > +
> > > >
> > > > > +struct rcar_gen4_pcie {
> > > >
> > > > As I mentioned above this structure can be extended with the enum
> > > > dw_pcie_device_mode field thus dropping the boolean argument from the
> > > > rcar_gen4_pcie_set_device_type() method.
> > >
> > > I got it. I'll fix this.
> > >
> > > > > +	struct dw_pcie		dw;
> > > >
> > > > As I already mentioned above the dw.num_lanes could be used instead of
> > > > passing it as the rcar_gen4_pcie_set_device_type() argument.
> > >
> > > I'll fix this too.
> > >
> > > Best regards,
> > > Yoshihiro Shimoda
> > >
> > > > -Serge(y)
> > > >
> > > > > +	void __iomem		*base;
> > > > > +	struct reset_control	*rst;
> > > > > +	bool			needs_retrain;
> > > > > +};
> > > > > +#define to_rcar_gen4_pcie(x)	dev_get_drvdata((x)->dev)
> > > > > +
> > > > > +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> > > > > +				   int num_lanes);
> > > > > +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *pcie);
> > > > > +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *pcie);
> > > > > +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> > > > > +				 struct platform_device *pdev);
> > > > > +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev);
> > > > > +
> > > > > +#endif /* _PCIE_RCAR_GEN4_H_ */
> > > > > --
> > > > > 2.25.1
> > > > >
Yoshihiro Shimoda June 9, 2023, 6:29 a.m. UTC | #6
Hello Serge,

> From: Serge Semin, Sent: Thursday, June 8, 2023 9:11 PM
> 
> On Thu, Jun 08, 2023 at 08:47:16AM +0000, Yoshihiro Shimoda wrote:
> > Hello Serge,
> >
> > > From: Serge Semin, Sent: Wednesday, June 7, 2023 9:16 PM
> > >
> > > On Wed, Jun 07, 2023 at 02:59:20AM +0000, Yoshihiro Shimoda wrote:
> > > > Hello Serge,
> > > >
> > > > > From: Serge Semin, Sent: Monday, June 5, 2023 11:39 PM
> > > > >
> > > > > On Wed, May 10, 2023 at 03:22:31PM +0900, Yoshihiro Shimoda wrote:
> > > > > > Add R-Car Gen4 PCIe Host support. This controller is based on
> > > > > > Synopsys DesignWare PCIe, but this controller has vendor-specific
> > > > > > registers so that requires initialization code like mode setting
> > > > > > and retraining and so on.
> > > > > >
> > > > > > To reduce code delta, adds some helper functions which are used by
> > > > > > both the host driver and the endpoint driver (which is added
> > > > > > immediately afterwards) into a separate file.
> > > > > >
> > > > > > Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> > > > > > ---
> > > > > >  drivers/pci/controller/dwc/Kconfig            |   9 +
> > > > > >  drivers/pci/controller/dwc/Makefile           |   2 +
> > > > > >  .../pci/controller/dwc/pcie-rcar-gen4-host.c  | 141 +++++++++++++
> > > > > >  drivers/pci/controller/dwc/pcie-rcar-gen4.c   | 190 ++++++++++++++++++
> > > > > >  drivers/pci/controller/dwc/pcie-rcar-gen4.h   |  46 +++++
> > > > > >  5 files changed, 388 insertions(+)
> > > > > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > > > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > > > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > > > >
> > > > > > diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
> > > > > > index ab96da43e0c2..64d4d37bc891 100644
> > > > > > --- a/drivers/pci/controller/dwc/Kconfig
> > > > > > +++ b/drivers/pci/controller/dwc/Kconfig
> > > > > > @@ -415,4 +415,13 @@ config PCIE_VISCONTI_HOST
> > > > > >  	  Say Y here if you want PCIe controller support on Toshiba Visconti SoC.
> > > > > >  	  This driver supports TMPV7708 SoC.
> > > > > >
> > > > > > +config PCIE_RCAR_GEN4
> > > > > > +	tristate "Renesas R-Car Gen4 PCIe Host controller"
> > > > > > +	depends on ARCH_RENESAS || COMPILE_TEST
> > > > > > +	depends on PCI_MSI
> > > > > > +	select PCIE_DW_HOST
> > > > > > +	help
> > > > > > +	  Say Y here if you want PCIe host controller support on R-Car Gen4 SoCs.
> > > > > > +	  This uses the DesignWare core.
> > > > > > +
> > > > > >  endmenu
> > > > > > diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
> > > > > > index bf5c311875a1..486cf706b53d 100644
> > > > > > --- a/drivers/pci/controller/dwc/Makefile
> > > > > > +++ b/drivers/pci/controller/dwc/Makefile
> > > > > > @@ -26,6 +26,8 @@ obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
> > > > > >  obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
> > > > > >  obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
> > > > > >  obj-$(CONFIG_PCIE_VISCONTI_HOST) += pcie-visconti.o
> > > > > > +pcie-rcar-gen4-host-drv-objs := pcie-rcar-gen4.o pcie-rcar-gen4-host.o
> > > > > > +obj-$(CONFIG_PCIE_RCAR_GEN4) += pcie-rcar-gen4-host-drv.o
> > > > > >
> > > > > >  # The following drivers are for devices that use the generic ACPI
> > > > > >  # pci_root.c driver but don't support standard ECAM config access.
> > > > > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > > > > new file mode 100644
> > > > > > index 000000000000..df7d80f1874f
> > > > > > --- /dev/null
> > > > > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > > > > @@ -0,0 +1,141 @@
> > > > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > > > +/*
> > > > > > + * PCIe host controller driver for Renesas R-Car Gen4 Series SoCs
> > > > > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > > > > + */
> > > > > > +
> > > > > > +#include <linux/delay.h>
> > > > > > +#include <linux/interrupt.h>
> > > > > > +#include <linux/module.h>
> > > > > > +#include <linux/of_device.h>
> > > > > > +#include <linux/pci.h>
> > > > > > +#include <linux/platform_device.h>
> > > > > > +
> > > > > > +#include "pcie-rcar-gen4.h"
> > > > > > +#include "pcie-designware.h"
> > > > > > +
> > > > > > +static int rcar_gen4_pcie_host_init(struct dw_pcie_rp *pp)
> > > > > > +{
> > > > > > +	struct dw_pcie *dw = to_dw_pcie_from_pp(pp);
> > > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > > +	int ret;
> > > > > > +	u32 val;
> > > > > > +
> > > > > > +	gpiod_set_value_cansleep(dw->pe_rst, 1);
> > > > > > +
> > > > > > +	ret = rcar_gen4_pcie_set_device_type(rcar, true, dw->num_lanes);
> > > > > > +	if (ret < 0)
> > > > > > +		return ret;
> > > > > > +
> > > > >
> > > > > > +	dw_pcie_dbi_ro_wr_en(dw);
> > > > >
> > > > > Are you sure dw_pcie_dbi_ro_wr_en() and dw_pcie_dbi_ro_wr_dis() are
> > > > > needed? In accordance with the DW PCIe Dual-mode HW manual the BARx
> > > > > registers are W-only over the DBI2 map with no need in setting the
> > > > > DBI_RO_WR_EN flag.
> > > > >
> > > > > Please check that on your hardware.
> > > >
> > > > You're correct. They are not needed. So, I'll drop this on v17.
> > > >
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * According to the section 3.5.7.2 "RC Mode" in DWC PCIe Dual Mode
> > > > > > +	 * Rev.5.20a, we should disable two BARs to avoid unnecessary memory
> > > > > > +	 * assignment during device enumeration.
> > > > > > +	 */
> > > > > > +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_0, 0x0);
> > > > > > +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_1, 0x0);
> > > > > > +
> > > > >
> > > > > > +	dw_pcie_dbi_ro_wr_dis(dw);
> > > > >
> > > > > ditto
> > > >
> > > > I'll drop this too.
> > > >
> > > > > > +
> > > > > > +	if (IS_ENABLED(CONFIG_PCI_MSI)) {
> > > > > > +		/* Enable MSI interrupt signal */
> > > > > > +		val = readl(rcar->base + PCIEINTSTS0EN);
> > > > > > +		val |= MSI_CTRL_INT;
> > > > > > +		writel(val, rcar->base + PCIEINTSTS0EN);
> > > > > > +	}
> > > > > > +
> > > > > > +	msleep(100);	/* pe_rst requires 100msec delay */
> > > > > > +
> > > > > > +	gpiod_set_value_cansleep(dw->pe_rst, 0);
> > > > > > +
> > > > > > +	return 0;
> > > > > > +}
> > > > > > +
> > > > > > +static const struct dw_pcie_host_ops rcar_gen4_pcie_host_ops = {
> > > > > > +	.host_init = rcar_gen4_pcie_host_init,
> > > > > > +};
> > > > > > +
> > > > > > +static int rcar_gen4_add_dw_pcie_rp(struct rcar_gen4_pcie *rcar,
> > > > > > +				   struct platform_device *pdev)
> > > > > > +{
> > > > > > +	struct dw_pcie *dw = &rcar->dw;
> > > > > > +	struct dw_pcie_rp *pp = &dw->pp;
> > > > > > +
> > > > > > +	pp->num_vectors = MAX_MSI_IRQS;
> > > > > > +	pp->ops = &rcar_gen4_pcie_host_ops;
> > > > > > +	dw_pcie_cap_set(dw, REQ_RES);
> > > > > > +
> > > > > > +	return dw_pcie_host_init(pp);
> > > > > > +}
> > > > > > +
> > > > > > +static void rcar_gen4_remove_dw_pcie_rp(struct rcar_gen4_pcie *rcar)
> > > > > > +{
> > > > > > +	dw_pcie_host_deinit(&rcar->dw.pp);
> > > > > > +	gpiod_set_value_cansleep(rcar->dw.pe_rst, 1);
> > > > > > +}
> > > > > > +
> > > > > > +static int rcar_gen4_pcie_probe(struct platform_device *pdev)
> > > > > > +{
> > > > > > +	struct device *dev = &pdev->dev;
> > > > > > +	struct rcar_gen4_pcie *rcar;
> > > > > > +	int err;
> > > > > > +
> > > > > > +	rcar = rcar_gen4_pcie_devm_alloc(dev);
> > > > > > +	if (!rcar)
> > > > > > +		return -ENOMEM;
> > > > > > +
> > > > > > +	err = rcar_gen4_pcie_get_resources(rcar, pdev);
> > > > > > +	if (err < 0) {
> > > > > > +		dev_err(dev, "Failed to request resource: %d\n", err);
> > > > > > +		return err;
> > > > > > +	}
> > > > > > +
> > > > > > +	platform_set_drvdata(pdev, rcar);
> > > > > > +
> > > > > > +	err = rcar_gen4_pcie_prepare(rcar);
> > > > > > +	if (err < 0)
> > > > > > +		return err;
> > > > > > +
> > > > > > +	rcar->needs_retrain = true;
> > > > > > +	err = rcar_gen4_add_dw_pcie_rp(rcar, pdev);
> > > > > > +	if (err < 0)
> > > > > > +		goto err_add;
> > > > > > +
> > > > > > +	return 0;
> > > > > > +
> > > > > > +err_add:
> > > > > > +	rcar_gen4_pcie_unprepare(rcar);
> > > > > > +
> > > > > > +	return err;
> > > > > > +}
> > > > > > +
> > > > > > +static int rcar_gen4_pcie_remove(struct platform_device *pdev)
> > > > > > +{
> > > > > > +	struct rcar_gen4_pcie *rcar = platform_get_drvdata(pdev);
> > > > > > +
> > > > > > +	rcar_gen4_remove_dw_pcie_rp(rcar);
> > > > > > +	rcar_gen4_pcie_unprepare(rcar);
> > > > > > +
> > > > > > +	return 0;
> > > > > > +}
> > > > > > +
> > > > > > +static const struct of_device_id rcar_gen4_pcie_of_match[] = {
> > > > > > +	{ .compatible = "renesas,rcar-gen4-pcie", },
> > > > > > +	{},
> > > > > > +};
> > > > > > +
> > > > > > +static struct platform_driver rcar_gen4_pcie_driver = {
> > > > > > +	.driver = {
> > > > > > +		.name = "pcie-rcar-gen4",
> > > > > > +		.of_match_table = rcar_gen4_pcie_of_match,
> > > > > > +		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
> > > > > > +	},
> > > > > > +	.probe = rcar_gen4_pcie_probe,
> > > > > > +	.remove = rcar_gen4_pcie_remove,
> > > > > > +};
> > > > > > +module_platform_driver(rcar_gen4_pcie_driver);
> > > > > > +
> > > > > > +MODULE_DESCRIPTION("Renesas R-Car Gen4 PCIe host controller driver");
> > > > > > +MODULE_LICENSE("GPL");
> > > > > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.c b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > > > > new file mode 100644
> > > > > > index 000000000000..35923fda8ed5
> > > > > > --- /dev/null
> > > > > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > > > > @@ -0,0 +1,190 @@
> > > > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > > > +/*
> > > > > > + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> > > > > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > > > > + */
> > > > > > +
> > > > > > +#include <linux/delay.h>
> > > > > > +#include <linux/io.h>
> > > > > > +#include <linux/of_device.h>
> > > > > > +#include <linux/pci.h>
> > > > > > +#include <linux/pm_runtime.h>
> > > > > > +#include <linux/reset.h>
> > > > > > +
> > > > > > +#include "pcie-rcar-gen4.h"
> > > > > > +#include "pcie-designware.h"
> > > > > > +
> > > > > > +/* Renesas-specific */
> > > > > > +#define PCIERSTCTRL1		0x0014
> > > > > > +#define  APP_HOLD_PHY_RST	BIT(16)
> > > > > > +#define  APP_LTSSM_ENABLE	BIT(0)
> > > > > > +
> > > > > > +#define RETRAIN_MAX_CHECK	10
> > > > > > +#define RETRAIN_MAX_RETRIES	10
> > > > > > +
> > > > > > +static void rcar_gen4_pcie_ltssm_enable(struct rcar_gen4_pcie *rcar,
> > > > > > +					bool enable)
> > > > > > +{
> > > > > > +	u32 val;
> > > > > > +
> > > > > > +	val = readl(rcar->base + PCIERSTCTRL1);
> > > > > > +	if (enable) {
> > > > > > +		val |= APP_LTSSM_ENABLE;
> > > > >
> > > > > > +		val &= ~APP_HOLD_PHY_RST;
> > > > >
> > > > > What about moving the APP_HOLD_PHY_RST de-assertion to the
> > > > > rcar_gen4_pcie_set_device_type() method? In accordance with the
> > > > > "3.1 Initialization" chapter it's supposed to be done before
> > > > > performing the DBI programming and activating the link training.
> > > >
> > >
> > > > IIUC, the "3.1 Initialization" said app_hold_phy_rst = 1 before
> > > > performing the DBI programming. So, it is assertion. Also, the SoC
> > > > documentation described the initializing procedure as the follows:
> > > >  app_ltssm_enable = 1
> > > >  app_hold_phy_rst = 0
> > > > So, I would like to keep them in the function.
> > >
> > > Indeed. I was wrong. Sorry for the misleading comment.
> >
> > No problem. Thank you for the confirmation!
> >
> > > >
> > > > > > +	} else {
> > > > > > +		val &= ~APP_LTSSM_ENABLE;
> > > > > > +		val |= APP_HOLD_PHY_RST;
> > > > > > +	}
> > > > > > +	writel(val, rcar->base + PCIERSTCTRL1);
> > > > > > +}
> > > > > > +
> > > > > > +static bool rcar_gen4_pcie_check_retrain_link(struct dw_pcie *dw)
> > > > > > +{
> > > > > > +	u8 offset = dw_pcie_find_capability(dw, PCI_CAP_ID_EXP);
> > > > > > +	u32 lnkcap = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCAP);
> > > > > > +	u32 lnkctl = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCTL);
> > > > > > +	u16 lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > > > > > +	int i;
> > > > > > +
> > > > >
> > > > > > +	if ((lnksta & PCI_EXP_LNKSTA_CLS) == (lnkcap & PCI_EXP_LNKCAP_SLS))
> > > > > > +		return true;
> > > > > > +
> > > > > > +	lnkctl |= PCI_EXP_LNKCTL_RL;
> > > > > > +	dw_pcie_writel_dbi(dw, offset + PCI_EXP_LNKCTL, lnkctl);
> > > > > > +
> > > > > > +	for (i = 0; i < RETRAIN_MAX_CHECK; i++) {
> > > > > > +		lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > > > > > +		if (lnksta & PCI_EXP_LNKSTA_LT)
> > > > > > +			return true;
> > > > > > +		usleep_range(1000, 1100);
> > > > > > +	}
> > > > >
> > > > > I'll ask one more time because you didn't respond to my previous note
> > > > > about this.
> > > >
> > > > I'm sorry. I completely overlooked the previous note.
> > > >
> > > > > Are you sure that this is needed? Did you try
> > > > > the approach described in "3.13 Gen2/3/4/5 Speed Modes" with
> > > > > de-asserting/asserting the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag?
> > > >
> > > > I tried this setting, but it doesn't work. I'll investigate this setting more.
> > > >
> > > > > I keep asking because the same problem we used to have on our hardware
> > > > > until we found out that the DIRECT_SPEED_CHANGE flag helped to train
> > > > > the link right to the speed specified in the capabilities.
> > > > >
> > > > > So here is what presumably you'll need to do (based on the
> > > > > "3.1 Initialization" and "3.13 Gen2/3/4/5 Speed Modes" chapters of
> > > > > the DW PCIe DM hw-manual):
> > > > > 1. Make sure the controller is in the power-down/reset state.
> > > > > 2. Select device_type (EP or RP).
> > > > > 3. De-assert the controller reset.
> > > > > 4. Clear PHY-reset flag in the app registers.
> > > > > 5. Perform some controller initializations.
> > > > > 6. Enable LTSSM to start link training.
> > > > > 7. Set GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag one more time.
> > > > >
> > > > > 1-4 are supposed to be done in rcar_gen4_pcie_host_init().
> > > > > 5 is performed in the framework of the DW PCIe core driver.
> > > > > 6-7 should be done in rcar_gen4_pcie_start_link().
> > > > >
> > > > > Note 1. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is already set on stage
> > > > > 5 in the framework of the dw_pcie_setup_rc() method. But in our case
> > > > > it only caused having the Gen.2 link speed. Adding stage 7 helped to
> > > > > get stable Gen.3 link. So please try the denoted approach. If it works
> > > > > what about adding stage 7 twice in order to get Gen.4 speed?
> > > > > (waiting for the DIRECT_SPEED_CHANGE flag being auto-cleared and then
> > > > > set it up again?)
> > > > >
> > > > > Note 2. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is defined as
> > > > > PCIE_LINK_WIDTH_SPEED_CONTROL.PORT_LOGIC_SPEED_CHANGE macros in the DW
> > > > > PCIe core driver.
> > > > >
> > > > > Note 3. If what is suggested above works well then you won't need to
> > > > > have the heavy rcar_gen4_pcie_check_retrain_link() method in the way
> > > > > you have it defined.
> > > >
> > > > Thank you very much for your comments!
> > >
> > > Please see the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE description for details
> > > of how the flag is supposed to be de-asserted and asserted in order to
> > > initiate the direct speed change.
> >
> > After I modified the start_link() like below, it also works. Is the code
> > acceptable? (Sorry all tabs are replaced to spaces...)
> 
> Looks good, but still there are some questions.
> 
> > ----------------------------------------------------------------------------
> > static bool rcar_gen4_pcie_check_current_link(struct dw_pcie *dw)
> > {
> >         u8 offset = dw_pcie_find_capability(dw, PCI_CAP_ID_EXP);
> >         u32 lnkcap = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCAP);
> >         u16 lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> >
> 
> >         if ((lnksta & PCI_EXP_LNKSTA_CLS) == (lnkcap & PCI_EXP_LNKCAP_SLS))
> >                 return true;
> 
> AFAICS depending on the link partner speed capabilities this may never
> happen.
> 
> PCI_EXP_LNKCAP_SLS - Max Link Speed. This field indicates the maximum Link
> speed of the associated Port.
> PCI_EXP_LNKSTA_CLS - Current Link Speed. This field indicates the negotiated
> Link speed of the given PCI Express Link
> 
> What if a link partner has the speed capability weaker than the link
> speed of the Root Port? If so then the current link speed will never
> reach the max link speed value.

Thank you very much for your comments! You're correct. This code cannot
work correctly if a link partner has the speed capability weaker than the link speed...

> Of course this can be fixed by specifying a correct "max-link-speed"
> property, but what if a platform has a cold-swappable port connected to
> the root port? Since any device can be attached you'll never be able
> to predict its capabilities beforahead.

You're correct. So, I'll fix the code somehow.

> >
> >         return false;
> > }
> >
> > static void rcar_gen4_pcie_speed_change(struct dw_pcie *dw)
> > {
> >         u32 val;
> >
> >         val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
> >         val &= ~PORT_LOGIC_SPEED_CHANGE;
> >         dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> >
> >         val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
> >         val |= PORT_LOGIC_SPEED_CHANGE;
> >         dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> > }
> >
> > static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > {
> >         struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> >         int i;
> >
> >         rcar_gen4_pcie_ltssm_enable(rcar, true);
> >
> >         /*
> >          * Require direct speed change here. Otherwise RDLH_LINK_UP of
> >          * PCIEINTSTS0 which is this controller specific register may not
> >          * be set.
> >          */
> 
> >         if (rcar->needs_speed_change) {
> 
> Seeing this is specified for the root port only what about
> replacing the statement with just test whether the rcar_gen4_pcie.mode ==
> DW_PCIE_RC_TYPE? Thus you'll be ablt to drop the needs_speed_change field.

Thank you for the comment. I'll fix it.

> BTW Just curious. Why is the loop below enabled for the Root Port
> only? What about the end-point controller? It's the same hardware
> after all..

This is reused from v16 and then it used "link retraining" which is only for
the Root Port. As you mentioned, it seems endpoint controller is also needed
if we use direct speed change.

> >                 for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> >                         rcar_gen4_pcie_speed_change(dw);
> >                         msleep(100);
> >                         if (rcar_gen4_pcie_check_current_link(dw))
> >                                 return 0;
> >                 }
> 
> Did you trace how many iterations this loop normally takes?

i = 0 or 1 (if the max-link-speed is suitable for a connected device.)

> Is it
> constant or varies for the same platform setup and a connected link
> partner? Does the number of iterations depend on the target link speed
> specified via the "max-link-speed" property?

This is not related to the "max-link-speed". It seems to related to
a link partner.
		LinkCap	max-link-speed	loop
Device A		4	4		1
Device A		4	3		1
Device B		3	3		0

> I am just trying to understand whether we can completely get rid from
> the rcar_gen4_pcie_check_current_link() method and have it replaced
> with several rcar_gen4_pcie_speed_change() calls. The current link
> state would have been checked in the framework of the
> dw_pcie_wait_for_link() method which calls dw_pcie_link_up() and your
> rcar_gen4_pcie_link_up() in order to make sure the link is actually up.

Thank you for your comment! I'll investigate it.

Best regards,
Yoshihiro Shimoda

> -Serge(y)
> 
> >
> >                 return -ETIMEDOUT;      /* Failed */
> >         }
> > ------------------------------------------------------------------
> >
> > > >
> > > > > > +
> > > > > > +	return false;
> > > > > > +}
> > > > > > +
> > > > > > +static int rcar_gen4_pcie_link_up(struct dw_pcie *dw)
> > > > > > +{
> > > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > > +	u32 val, mask;
> > > > > > +
> > > > > > +	val = readl(rcar->base + PCIEINTSTS0);
> > > > > > +	mask = RDLH_LINK_UP | SMLH_LINK_UP;
> > > > > > +
> > > > > > +	return (val & mask) == mask;
> > > > > > +}
> > > > > > +
> > > > > > +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > > > +{
> > > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > > +	int i;
> > > > > > +
> > > > > > +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * Require retraining here. Otherwise RDLH_LINK_UP of PCIEINTSTS0 which
> > > > > > +	 * is this controller specific register may not be set.
> > > > > > +	 */
> > > > > > +	if (rcar->needs_retrain) {
> > > > > > +		for (i = 0; i < RETRAIN_MAX_RETRIES; i++) {
> > > > > > +			if (rcar_gen4_pcie_check_retrain_link(dw))
> > > > > > +				return 0;
> > > > > > +			msleep(100);
> > > > > > +		}
> > > > > > +
> > > > > > +		return -ETIMEDOUT;	/* Failed */
> > > > > > +	}
> > > > > > +
> > > > > > +	return 0;
> > > > > > +}
> > > > > > +
> > > > > > +static void rcar_gen4_pcie_stop_link(struct dw_pcie *dw)
> > > > > > +{
> > > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > > +
> > > > > > +	rcar_gen4_pcie_ltssm_enable(rcar, false);
> > > > > > +}
> > > > > > +
> > > > >
> > > > > > +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> > > > > > +				   int num_lanes)
> > > > >
> > > > > 1. Number of lanes is already defined in the rcar_gen4_pcie.dw.num_lanes field.
> > > > > What about using it from there instead of passing it as an argument?
> > > > > 2. DW PCIe core driver has a very handy enum defined:
> > > > > dw_pcie_device_mode. It describes the controller modes (End-point,
> > > > > Root port, etc). What about adding the mode field right to the
> > > > > rcar_gen4_pcie structure and initializing it in someplace in probe() ?
> > > > > 3. Based on the function semantic it's better to be named as something
> > > > > like rcar_gen4_pcie_init_device() or even rcar_gen4_pcie_basic_init().
> > > >
> > > > Thank you for your comments! I'll modify the function.
> > > >
> > > > >
> > > > > > +{
> > > > > > +	u32 val;
> > > > > > +
> > > > >
> > > > > > +	/* Note: Assume the rcar->rst which is Cold-reset is asserted here */
> > > > >
> > > > > What about directly asserting it here then? In accordance with the DW
> > > > > PCIe DM manual the "device_type" input must be set before the DM
> > > > > controller is powered up (basically un-reset). What if the controller
> > > > > reset is already de-asserted, but you are going to changes its mode?
> > > > > In that case the mode won't be changed and you'll end up with
> > > > > unpredictable results.
> > > >
> > > > Thank you for your comment. We should add asserting it here as you mentioned.
> > > >
> > > > > > +	val = readl(rcar->base + PCIEMSR0);
> > > > > > +	if (rc)
> > > > > > +		val |= DEVICE_TYPE_RC;
> > > > > > +	else
> > > > > > +		val |= DEVICE_TYPE_EP;
> > > > > > +
> > > > > > +	if (num_lanes < 4)
> > > > > > +		val |= BIFUR_MOD_SET_ON;
> > > > > > +
> > > > > > +	writel(val, rcar->base + PCIEMSR0);
> > > > > > +
> > > > > > +	return reset_control_deassert(rcar->rst);
> > > > > > +}
> > > > > > +
> > > > > > +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *rcar)
> > > > > > +{
> > > > > > +	struct device *dev = rcar->dw.dev;
> > > > > > +	int err;
> > > > > > +
> > > > > > +	pm_runtime_enable(dev);
> > > > > > +	err = pm_runtime_resume_and_get(dev);
> > > > > > +	if (err < 0) {
> > > > > > +		dev_err(dev, "Failed to resume/get Runtime PM\n");
> > > > > > +		pm_runtime_disable(dev);
> > > > > > +	}
> > > > > > +
> > > > > > +	return err;
> > > > > > +}
> > > > > > +
> > > > > > +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *rcar)
> > > > > > +{
> > > > > > +	struct device *dev = rcar->dw.dev;
> > > > > > +
> > > > > > +	if (!reset_control_status(rcar->rst))
> > > > > > +		reset_control_assert(rcar->rst);
> > > > > > +	pm_runtime_put(dev);
> > > > > > +	pm_runtime_disable(dev);
> > > > > > +}
> > > > > > +
> > > > > > +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> > > > > > +				 struct platform_device *pdev)
> > > > > > +{
> > > > > > +	struct device *dev = rcar->dw.dev;
> > > > > > +
> > > > > > +	/* Renesas-specific registers */
> > > > > > +	rcar->base = devm_platform_ioremap_resource_byname(pdev, "app");
> > > > > > +	if (IS_ERR(rcar->base))
> > > > > > +		return PTR_ERR(rcar->base);
> > > > > > +
> > > > >
> > > > > > +	rcar->rst = devm_reset_control_get(dev, NULL);
> > > > > > +	if (IS_ERR(rcar->rst)) {
> > > > > > +		dev_err(dev, "Failed to get Cold-reset\n");
> > > > >
> > > > > So AFAICS your platform is equipped with the DWC_pcie_clkrst.v module.
> > > > > Thus all the resets are appropriately cleared by a single flag:
> > > > > power_up_rst_n. What about using the named reset in this case with the
> > > > > "pwr" name? Thus you'll be able to drop the manual
> > > > > devm_reset_control_get() invocation and instead use the reset-resources
> > > > > requested in the framework of the generic dw_pcie_get_resources()
> > > > > method? Note you'll need to move the dw_pcie_cap_set(dw, REQ_RES);
> > > > > statement to rcar_gen4_pcie_devm_alloc() then and drop the
> > > > > rcar_gen4_pcie.rst field afterwords.
> > > >
> > > > Thank you for your suggestion! Using "pwr" can work on my environment.
> > > >
> > > > > By the way I don't see you requesting and enabling the reference
> > > > > clock in your driver but the bindings imply the clock source. How
> > > > > come?
> > > >
> > >
> > > > For now, I used gpio-hog to enable the reference clock. But, it seem
> > > > I should use "ref" clock for it. So, I'll fix it too.
> > >
> > > Not sure what gpio-hog you are talking about. Do you mean the pe_rst
> > > signal or some another gpio? I failed to see of how pe_rst is
> > > connected to the clock source. In anyway directly handling the clock
> > > source would be more portable choice.
> >
> > Sorry for lacking information. I described a gpio node like below
> > and then the gpio will be high automatically, and the reference clock
> > will be output. But, this is completely independent from pci.
> > ---
> > &gpio2 {
> >         pci-clkreq0-hog {
> >                 gpio-hog;
> >                 gpios = <15 GPIO_ACTIVE_LOW>;
> >                 output-high;
> >         };
> > };
> > ---
> >
> > Now I could implement the clock handling by using "gpio-gate-clock".
> > So, I'll drop the gpio-hog for the reference clock.
> >
> > Best regards,
> > Yoshihiro Shimoda
> >
> > > -Serge(y)
> > >
> > > >
> > > > > > +		return PTR_ERR(rcar->rst);
> > > > > > +	}
> > > > > > +
> > > > > > +	return 0;
> > > > > > +}
> > > > > > +
> > > > > > +static const struct dw_pcie_ops dw_pcie_ops = {
> > > > > > +	.start_link = rcar_gen4_pcie_start_link,
> > > > > > +	.stop_link = rcar_gen4_pcie_stop_link,
> > > > > > +	.link_up = rcar_gen4_pcie_link_up,
> > > > > > +};
> > > > > > +
> > > > > > +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev)
> > > > > > +{
> > > > > > +	struct rcar_gen4_pcie *rcar;
> > > > > > +
> > > > > > +	rcar = devm_kzalloc(dev, sizeof(*rcar), GFP_KERNEL);
> > > > > > +	if (!rcar)
> > > > > > +		return NULL;
> > > > > > +
> > > > > > +	rcar->dw.dev = dev;
> > > > > > +	rcar->dw.ops = &dw_pcie_ops;
> > > > > > +	dw_pcie_cap_set(&rcar->dw, EDMA_UNROLL);
> > > > > > +
> > > > > > +	return rcar;
> > > > > > +}
> > > > > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.h b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > > > > new file mode 100644
> > > > > > index 000000000000..fec3f18609f4
> > > > > > --- /dev/null
> > > > > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > > > > @@ -0,0 +1,46 @@
> > > > > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > > > > +/*
> > > > > > + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> > > > > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > > > > + */
> > > > > > +
> > > > > > +#ifndef _PCIE_RCAR_GEN4_H_
> > > > > > +#define _PCIE_RCAR_GEN4_H_
> > > > > > +
> > > > > > +#include <linux/io.h>
> > > > > > +#include <linux/pci.h>
> > > > > > +#include <linux/reset.h>
> > > > > > +
> > > > > > +#include "pcie-designware.h"
> > > > > > +
> > > > > > +/* Renesas-specific */
> > > > > > +#define PCIEMSR0		0x0000
> > > > > > +#define  BIFUR_MOD_SET_ON	BIT(0)
> > > > > > +#define  DEVICE_TYPE_EP		0
> > > > > > +#define  DEVICE_TYPE_RC		BIT(4)
> > > > > > +
> > > > > > +#define PCIEINTSTS0		0x0084
> > > > > > +#define PCIEINTSTS0EN		0x0310
> > > > > > +#define  MSI_CTRL_INT		BIT(26)
> > > > > > +#define  SMLH_LINK_UP		BIT(7)
> > > > > > +#define  RDLH_LINK_UP		BIT(6)
> > > > > > +#define PCIEDMAINTSTSEN		0x0314
> > > > > > +#define  PCIEDMAINTSTSEN_INIT	GENMASK(15, 0)
> > > > > > +
> > > > >
> > > > > > +struct rcar_gen4_pcie {
> > > > >
> > > > > As I mentioned above this structure can be extended with the enum
> > > > > dw_pcie_device_mode field thus dropping the boolean argument from the
> > > > > rcar_gen4_pcie_set_device_type() method.
> > > >
> > > > I got it. I'll fix this.
> > > >
> > > > > > +	struct dw_pcie		dw;
> > > > >
> > > > > As I already mentioned above the dw.num_lanes could be used instead of
> > > > > passing it as the rcar_gen4_pcie_set_device_type() argument.
> > > >
> > > > I'll fix this too.
> > > >
> > > > Best regards,
> > > > Yoshihiro Shimoda
> > > >
> > > > > -Serge(y)
> > > > >
> > > > > > +	void __iomem		*base;
> > > > > > +	struct reset_control	*rst;
> > > > > > +	bool			needs_retrain;
> > > > > > +};
> > > > > > +#define to_rcar_gen4_pcie(x)	dev_get_drvdata((x)->dev)
> > > > > > +
> > > > > > +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> > > > > > +				   int num_lanes);
> > > > > > +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *pcie);
> > > > > > +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *pcie);
> > > > > > +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> > > > > > +				 struct platform_device *pdev);
> > > > > > +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev);
> > > > > > +
> > > > > > +#endif /* _PCIE_RCAR_GEN4_H_ */
> > > > > > --
> > > > > > 2.25.1
> > > > > >
Serge Semin June 9, 2023, 10:54 a.m. UTC | #7
On Fri, Jun 09, 2023 at 06:29:39AM +0000, Yoshihiro Shimoda wrote:
> Hello Serge,
> 
> > From: Serge Semin, Sent: Thursday, June 8, 2023 9:11 PM
> > 
> > On Thu, Jun 08, 2023 at 08:47:16AM +0000, Yoshihiro Shimoda wrote:
> > > Hello Serge,
> > >
> > > > From: Serge Semin, Sent: Wednesday, June 7, 2023 9:16 PM
> > > >
> > > > On Wed, Jun 07, 2023 at 02:59:20AM +0000, Yoshihiro Shimoda wrote:
> > > > > Hello Serge,
> > > > >
> > > > > > From: Serge Semin, Sent: Monday, June 5, 2023 11:39 PM
> > > > > >
> > > > > > On Wed, May 10, 2023 at 03:22:31PM +0900, Yoshihiro Shimoda wrote:
> > > > > > > Add R-Car Gen4 PCIe Host support. This controller is based on
> > > > > > > Synopsys DesignWare PCIe, but this controller has vendor-specific
> > > > > > > registers so that requires initialization code like mode setting
> > > > > > > and retraining and so on.
> > > > > > >
> > > > > > > To reduce code delta, adds some helper functions which are used by
> > > > > > > both the host driver and the endpoint driver (which is added
> > > > > > > immediately afterwards) into a separate file.
> > > > > > >
> > > > > > > Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> > > > > > > ---
> > > > > > >  drivers/pci/controller/dwc/Kconfig            |   9 +
> > > > > > >  drivers/pci/controller/dwc/Makefile           |   2 +
> > > > > > >  .../pci/controller/dwc/pcie-rcar-gen4-host.c  | 141 +++++++++++++
> > > > > > >  drivers/pci/controller/dwc/pcie-rcar-gen4.c   | 190 ++++++++++++++++++
> > > > > > >  drivers/pci/controller/dwc/pcie-rcar-gen4.h   |  46 +++++
> > > > > > >  5 files changed, 388 insertions(+)
> > > > > > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > > > > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > > > > >  create mode 100644 drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > > > > >
> > > > > > > diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
> > > > > > > index ab96da43e0c2..64d4d37bc891 100644
> > > > > > > --- a/drivers/pci/controller/dwc/Kconfig
> > > > > > > +++ b/drivers/pci/controller/dwc/Kconfig
> > > > > > > @@ -415,4 +415,13 @@ config PCIE_VISCONTI_HOST
> > > > > > >  	  Say Y here if you want PCIe controller support on Toshiba Visconti SoC.
> > > > > > >  	  This driver supports TMPV7708 SoC.
> > > > > > >
> > > > > > > +config PCIE_RCAR_GEN4
> > > > > > > +	tristate "Renesas R-Car Gen4 PCIe Host controller"
> > > > > > > +	depends on ARCH_RENESAS || COMPILE_TEST
> > > > > > > +	depends on PCI_MSI
> > > > > > > +	select PCIE_DW_HOST
> > > > > > > +	help
> > > > > > > +	  Say Y here if you want PCIe host controller support on R-Car Gen4 SoCs.
> > > > > > > +	  This uses the DesignWare core.
> > > > > > > +
> > > > > > >  endmenu
> > > > > > > diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
> > > > > > > index bf5c311875a1..486cf706b53d 100644
> > > > > > > --- a/drivers/pci/controller/dwc/Makefile
> > > > > > > +++ b/drivers/pci/controller/dwc/Makefile
> > > > > > > @@ -26,6 +26,8 @@ obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
> > > > > > >  obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
> > > > > > >  obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
> > > > > > >  obj-$(CONFIG_PCIE_VISCONTI_HOST) += pcie-visconti.o
> > > > > > > +pcie-rcar-gen4-host-drv-objs := pcie-rcar-gen4.o pcie-rcar-gen4-host.o
> > > > > > > +obj-$(CONFIG_PCIE_RCAR_GEN4) += pcie-rcar-gen4-host-drv.o
> > > > > > >
> > > > > > >  # The following drivers are for devices that use the generic ACPI
> > > > > > >  # pci_root.c driver but don't support standard ECAM config access.
> > > > > > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > > > > > new file mode 100644
> > > > > > > index 000000000000..df7d80f1874f
> > > > > > > --- /dev/null
> > > > > > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
> > > > > > > @@ -0,0 +1,141 @@
> > > > > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > > > > +/*
> > > > > > > + * PCIe host controller driver for Renesas R-Car Gen4 Series SoCs
> > > > > > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > > > > > + */
> > > > > > > +
> > > > > > > +#include <linux/delay.h>
> > > > > > > +#include <linux/interrupt.h>
> > > > > > > +#include <linux/module.h>
> > > > > > > +#include <linux/of_device.h>
> > > > > > > +#include <linux/pci.h>
> > > > > > > +#include <linux/platform_device.h>
> > > > > > > +
> > > > > > > +#include "pcie-rcar-gen4.h"
> > > > > > > +#include "pcie-designware.h"
> > > > > > > +
> > > > > > > +static int rcar_gen4_pcie_host_init(struct dw_pcie_rp *pp)
> > > > > > > +{
> > > > > > > +	struct dw_pcie *dw = to_dw_pcie_from_pp(pp);
> > > > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > > > +	int ret;
> > > > > > > +	u32 val;
> > > > > > > +
> > > > > > > +	gpiod_set_value_cansleep(dw->pe_rst, 1);
> > > > > > > +
> > > > > > > +	ret = rcar_gen4_pcie_set_device_type(rcar, true, dw->num_lanes);
> > > > > > > +	if (ret < 0)
> > > > > > > +		return ret;
> > > > > > > +
> > > > > >
> > > > > > > +	dw_pcie_dbi_ro_wr_en(dw);
> > > > > >
> > > > > > Are you sure dw_pcie_dbi_ro_wr_en() and dw_pcie_dbi_ro_wr_dis() are
> > > > > > needed? In accordance with the DW PCIe Dual-mode HW manual the BARx
> > > > > > registers are W-only over the DBI2 map with no need in setting the
> > > > > > DBI_RO_WR_EN flag.
> > > > > >
> > > > > > Please check that on your hardware.
> > > > >
> > > > > You're correct. They are not needed. So, I'll drop this on v17.
> > > > >
> > > > > > > +
> > > > > > > +	/*
> > > > > > > +	 * According to the section 3.5.7.2 "RC Mode" in DWC PCIe Dual Mode
> > > > > > > +	 * Rev.5.20a, we should disable two BARs to avoid unnecessary memory
> > > > > > > +	 * assignment during device enumeration.
> > > > > > > +	 */
> > > > > > > +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_0, 0x0);
> > > > > > > +	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_1, 0x0);
> > > > > > > +
> > > > > >
> > > > > > > +	dw_pcie_dbi_ro_wr_dis(dw);
> > > > > >
> > > > > > ditto
> > > > >
> > > > > I'll drop this too.
> > > > >
> > > > > > > +
> > > > > > > +	if (IS_ENABLED(CONFIG_PCI_MSI)) {
> > > > > > > +		/* Enable MSI interrupt signal */
> > > > > > > +		val = readl(rcar->base + PCIEINTSTS0EN);
> > > > > > > +		val |= MSI_CTRL_INT;
> > > > > > > +		writel(val, rcar->base + PCIEINTSTS0EN);
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	msleep(100);	/* pe_rst requires 100msec delay */
> > > > > > > +
> > > > > > > +	gpiod_set_value_cansleep(dw->pe_rst, 0);
> > > > > > > +
> > > > > > > +	return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static const struct dw_pcie_host_ops rcar_gen4_pcie_host_ops = {
> > > > > > > +	.host_init = rcar_gen4_pcie_host_init,
> > > > > > > +};
> > > > > > > +
> > > > > > > +static int rcar_gen4_add_dw_pcie_rp(struct rcar_gen4_pcie *rcar,
> > > > > > > +				   struct platform_device *pdev)
> > > > > > > +{
> > > > > > > +	struct dw_pcie *dw = &rcar->dw;
> > > > > > > +	struct dw_pcie_rp *pp = &dw->pp;
> > > > > > > +
> > > > > > > +	pp->num_vectors = MAX_MSI_IRQS;
> > > > > > > +	pp->ops = &rcar_gen4_pcie_host_ops;
> > > > > > > +	dw_pcie_cap_set(dw, REQ_RES);
> > > > > > > +
> > > > > > > +	return dw_pcie_host_init(pp);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void rcar_gen4_remove_dw_pcie_rp(struct rcar_gen4_pcie *rcar)
> > > > > > > +{
> > > > > > > +	dw_pcie_host_deinit(&rcar->dw.pp);
> > > > > > > +	gpiod_set_value_cansleep(rcar->dw.pe_rst, 1);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int rcar_gen4_pcie_probe(struct platform_device *pdev)
> > > > > > > +{
> > > > > > > +	struct device *dev = &pdev->dev;
> > > > > > > +	struct rcar_gen4_pcie *rcar;
> > > > > > > +	int err;
> > > > > > > +
> > > > > > > +	rcar = rcar_gen4_pcie_devm_alloc(dev);
> > > > > > > +	if (!rcar)
> > > > > > > +		return -ENOMEM;
> > > > > > > +
> > > > > > > +	err = rcar_gen4_pcie_get_resources(rcar, pdev);
> > > > > > > +	if (err < 0) {
> > > > > > > +		dev_err(dev, "Failed to request resource: %d\n", err);
> > > > > > > +		return err;
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	platform_set_drvdata(pdev, rcar);
> > > > > > > +
> > > > > > > +	err = rcar_gen4_pcie_prepare(rcar);
> > > > > > > +	if (err < 0)
> > > > > > > +		return err;
> > > > > > > +
> > > > > > > +	rcar->needs_retrain = true;
> > > > > > > +	err = rcar_gen4_add_dw_pcie_rp(rcar, pdev);
> > > > > > > +	if (err < 0)
> > > > > > > +		goto err_add;
> > > > > > > +
> > > > > > > +	return 0;
> > > > > > > +
> > > > > > > +err_add:
> > > > > > > +	rcar_gen4_pcie_unprepare(rcar);
> > > > > > > +
> > > > > > > +	return err;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int rcar_gen4_pcie_remove(struct platform_device *pdev)
> > > > > > > +{
> > > > > > > +	struct rcar_gen4_pcie *rcar = platform_get_drvdata(pdev);
> > > > > > > +
> > > > > > > +	rcar_gen4_remove_dw_pcie_rp(rcar);
> > > > > > > +	rcar_gen4_pcie_unprepare(rcar);
> > > > > > > +
> > > > > > > +	return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static const struct of_device_id rcar_gen4_pcie_of_match[] = {
> > > > > > > +	{ .compatible = "renesas,rcar-gen4-pcie", },
> > > > > > > +	{},
> > > > > > > +};
> > > > > > > +
> > > > > > > +static struct platform_driver rcar_gen4_pcie_driver = {
> > > > > > > +	.driver = {
> > > > > > > +		.name = "pcie-rcar-gen4",
> > > > > > > +		.of_match_table = rcar_gen4_pcie_of_match,
> > > > > > > +		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
> > > > > > > +	},
> > > > > > > +	.probe = rcar_gen4_pcie_probe,
> > > > > > > +	.remove = rcar_gen4_pcie_remove,
> > > > > > > +};
> > > > > > > +module_platform_driver(rcar_gen4_pcie_driver);
> > > > > > > +
> > > > > > > +MODULE_DESCRIPTION("Renesas R-Car Gen4 PCIe host controller driver");
> > > > > > > +MODULE_LICENSE("GPL");
> > > > > > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.c b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > > > > > new file mode 100644
> > > > > > > index 000000000000..35923fda8ed5
> > > > > > > --- /dev/null
> > > > > > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
> > > > > > > @@ -0,0 +1,190 @@
> > > > > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > > > > +/*
> > > > > > > + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> > > > > > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > > > > > + */
> > > > > > > +
> > > > > > > +#include <linux/delay.h>
> > > > > > > +#include <linux/io.h>
> > > > > > > +#include <linux/of_device.h>
> > > > > > > +#include <linux/pci.h>
> > > > > > > +#include <linux/pm_runtime.h>
> > > > > > > +#include <linux/reset.h>
> > > > > > > +
> > > > > > > +#include "pcie-rcar-gen4.h"
> > > > > > > +#include "pcie-designware.h"
> > > > > > > +
> > > > > > > +/* Renesas-specific */
> > > > > > > +#define PCIERSTCTRL1		0x0014
> > > > > > > +#define  APP_HOLD_PHY_RST	BIT(16)
> > > > > > > +#define  APP_LTSSM_ENABLE	BIT(0)
> > > > > > > +
> > > > > > > +#define RETRAIN_MAX_CHECK	10
> > > > > > > +#define RETRAIN_MAX_RETRIES	10
> > > > > > > +
> > > > > > > +static void rcar_gen4_pcie_ltssm_enable(struct rcar_gen4_pcie *rcar,
> > > > > > > +					bool enable)
> > > > > > > +{
> > > > > > > +	u32 val;
> > > > > > > +
> > > > > > > +	val = readl(rcar->base + PCIERSTCTRL1);
> > > > > > > +	if (enable) {
> > > > > > > +		val |= APP_LTSSM_ENABLE;
> > > > > >
> > > > > > > +		val &= ~APP_HOLD_PHY_RST;
> > > > > >
> > > > > > What about moving the APP_HOLD_PHY_RST de-assertion to the
> > > > > > rcar_gen4_pcie_set_device_type() method? In accordance with the
> > > > > > "3.1 Initialization" chapter it's supposed to be done before
> > > > > > performing the DBI programming and activating the link training.
> > > > >
> > > >
> > > > > IIUC, the "3.1 Initialization" said app_hold_phy_rst = 1 before
> > > > > performing the DBI programming. So, it is assertion. Also, the SoC
> > > > > documentation described the initializing procedure as the follows:
> > > > >  app_ltssm_enable = 1
> > > > >  app_hold_phy_rst = 0
> > > > > So, I would like to keep them in the function.
> > > >
> > > > Indeed. I was wrong. Sorry for the misleading comment.
> > >
> > > No problem. Thank you for the confirmation!
> > >
> > > > >
> > > > > > > +	} else {
> > > > > > > +		val &= ~APP_LTSSM_ENABLE;
> > > > > > > +		val |= APP_HOLD_PHY_RST;
> > > > > > > +	}
> > > > > > > +	writel(val, rcar->base + PCIERSTCTRL1);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static bool rcar_gen4_pcie_check_retrain_link(struct dw_pcie *dw)
> > > > > > > +{
> > > > > > > +	u8 offset = dw_pcie_find_capability(dw, PCI_CAP_ID_EXP);
> > > > > > > +	u32 lnkcap = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCAP);
> > > > > > > +	u32 lnkctl = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCTL);
> > > > > > > +	u16 lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > > > > > > +	int i;
> > > > > > > +
> > > > > >
> > > > > > > +	if ((lnksta & PCI_EXP_LNKSTA_CLS) == (lnkcap & PCI_EXP_LNKCAP_SLS))
> > > > > > > +		return true;
> > > > > > > +
> > > > > > > +	lnkctl |= PCI_EXP_LNKCTL_RL;
> > > > > > > +	dw_pcie_writel_dbi(dw, offset + PCI_EXP_LNKCTL, lnkctl);
> > > > > > > +
> > > > > > > +	for (i = 0; i < RETRAIN_MAX_CHECK; i++) {
> > > > > > > +		lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > > > > > > +		if (lnksta & PCI_EXP_LNKSTA_LT)
> > > > > > > +			return true;
> > > > > > > +		usleep_range(1000, 1100);
> > > > > > > +	}
> > > > > >
> > > > > > I'll ask one more time because you didn't respond to my previous note
> > > > > > about this.
> > > > >
> > > > > I'm sorry. I completely overlooked the previous note.
> > > > >
> > > > > > Are you sure that this is needed? Did you try
> > > > > > the approach described in "3.13 Gen2/3/4/5 Speed Modes" with
> > > > > > de-asserting/asserting the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag?
> > > > >
> > > > > I tried this setting, but it doesn't work. I'll investigate this setting more.
> > > > >
> > > > > > I keep asking because the same problem we used to have on our hardware
> > > > > > until we found out that the DIRECT_SPEED_CHANGE flag helped to train
> > > > > > the link right to the speed specified in the capabilities.
> > > > > >
> > > > > > So here is what presumably you'll need to do (based on the
> > > > > > "3.1 Initialization" and "3.13 Gen2/3/4/5 Speed Modes" chapters of
> > > > > > the DW PCIe DM hw-manual):
> > > > > > 1. Make sure the controller is in the power-down/reset state.
> > > > > > 2. Select device_type (EP or RP).
> > > > > > 3. De-assert the controller reset.
> > > > > > 4. Clear PHY-reset flag in the app registers.
> > > > > > 5. Perform some controller initializations.
> > > > > > 6. Enable LTSSM to start link training.
> > > > > > 7. Set GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag one more time.
> > > > > >
> > > > > > 1-4 are supposed to be done in rcar_gen4_pcie_host_init().
> > > > > > 5 is performed in the framework of the DW PCIe core driver.
> > > > > > 6-7 should be done in rcar_gen4_pcie_start_link().
> > > > > >
> > > > > > Note 1. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is already set on stage
> > > > > > 5 in the framework of the dw_pcie_setup_rc() method. But in our case
> > > > > > it only caused having the Gen.2 link speed. Adding stage 7 helped to
> > > > > > get stable Gen.3 link. So please try the denoted approach. If it works
> > > > > > what about adding stage 7 twice in order to get Gen.4 speed?
> > > > > > (waiting for the DIRECT_SPEED_CHANGE flag being auto-cleared and then
> > > > > > set it up again?)
> > > > > >
> > > > > > Note 2. GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag is defined as
> > > > > > PCIE_LINK_WIDTH_SPEED_CONTROL.PORT_LOGIC_SPEED_CHANGE macros in the DW
> > > > > > PCIe core driver.
> > > > > >
> > > > > > Note 3. If what is suggested above works well then you won't need to
> > > > > > have the heavy rcar_gen4_pcie_check_retrain_link() method in the way
> > > > > > you have it defined.
> > > > >
> > > > > Thank you very much for your comments!
> > > >
> > > > Please see the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE description for details
> > > > of how the flag is supposed to be de-asserted and asserted in order to
> > > > initiate the direct speed change.
> > >
> > > After I modified the start_link() like below, it also works. Is the code
> > > acceptable? (Sorry all tabs are replaced to spaces...)
> > 
> > Looks good, but still there are some questions.
> > 
> > > ----------------------------------------------------------------------------
> > > static bool rcar_gen4_pcie_check_current_link(struct dw_pcie *dw)
> > > {
> > >         u8 offset = dw_pcie_find_capability(dw, PCI_CAP_ID_EXP);
> > >         u32 lnkcap = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCAP);
> > >         u16 lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
> > >
> > 
> > >         if ((lnksta & PCI_EXP_LNKSTA_CLS) == (lnkcap & PCI_EXP_LNKCAP_SLS))
> > >                 return true;
> > 
> > AFAICS depending on the link partner speed capabilities this may never
> > happen.
> > 
> > PCI_EXP_LNKCAP_SLS - Max Link Speed. This field indicates the maximum Link
> > speed of the associated Port.
> > PCI_EXP_LNKSTA_CLS - Current Link Speed. This field indicates the negotiated
> > Link speed of the given PCI Express Link
> > 
> > What if a link partner has the speed capability weaker than the link
> > speed of the Root Port? If so then the current link speed will never
> > reach the max link speed value.
> 
> Thank you very much for your comments! You're correct. This code cannot
> work correctly if a link partner has the speed capability weaker than the link speed...
> 
> > Of course this can be fixed by specifying a correct "max-link-speed"
> > property, but what if a platform has a cold-swappable port connected to
> > the root port? Since any device can be attached you'll never be able
> > to predict its capabilities beforahead.
> 
> You're correct. So, I'll fix the code somehow.
> 
> > >
> > >         return false;
> > > }
> > >
> > > static void rcar_gen4_pcie_speed_change(struct dw_pcie *dw)
> > > {
> > >         u32 val;
> > >
> > >         val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
> > >         val &= ~PORT_LOGIC_SPEED_CHANGE;
> > >         dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> > >
> > >         val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
> > >         val |= PORT_LOGIC_SPEED_CHANGE;
> > >         dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> > > }
> > >
> > > static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > {
> > >         struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > >         int i;
> > >
> > >         rcar_gen4_pcie_ltssm_enable(rcar, true);
> > >
> > >         /*
> > >          * Require direct speed change here. Otherwise RDLH_LINK_UP of
> > >          * PCIEINTSTS0 which is this controller specific register may not
> > >          * be set.
> > >          */
> > 
> > >         if (rcar->needs_speed_change) {
> > 
> > Seeing this is specified for the root port only what about
> > replacing the statement with just test whether the rcar_gen4_pcie.mode ==
> > DW_PCIE_RC_TYPE? Thus you'll be ablt to drop the needs_speed_change field.
> 
> Thank you for the comment. I'll fix it.
> 
> > BTW Just curious. Why is the loop below enabled for the Root Port
> > only? What about the end-point controller? It's the same hardware
> > after all..
> 
> This is reused from v16 and then it used "link retraining" which is only for
> the Root Port. As you mentioned, it seems endpoint controller is also needed
> if we use direct speed change.
> 
> > >                 for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > >                         rcar_gen4_pcie_speed_change(dw);
> > >                         msleep(100);
> > >                         if (rcar_gen4_pcie_check_current_link(dw))
> > >                                 return 0;
> > >                 }
> > 
> > Did you trace how many iterations this loop normally takes?
> 
> i = 0 or 1 (if the max-link-speed is suitable for a connected device.)
> 
> > Is it
> > constant or varies for the same platform setup and a connected link
> > partner? Does the number of iterations depend on the target link speed
> > specified via the "max-link-speed" property?
> 

> This is not related to the "max-link-speed". It seems to related to
> a link partner.
> 		LinkCap	max-link-speed	loop
> Device A		4	4		1
> Device A		4	3		1
> Device B		3	3		0

Great! If so I would have just left a single unconditional
rcar_gen4_pcie_speed_change() call placed right after the
rcar_gen4_pcie_ltssm_enable() method with no delays afterwards. These
methods would have been invoked in the framework of
dw_pcie_start_link() after which the dw_pcie_wait_for_link() method is
called with several checks parted with the ~100ms delay. It will make
sure that at least some link is up with the link state printed to the
system log. If for some reason the performance degradation happens
then it will be up to the system administrator to investigate what was
wrong. Your driver did as much is it could to reach the best link gen.

-Serge(y)

> 
> > I am just trying to understand whether we can completely get rid from
> > the rcar_gen4_pcie_check_current_link() method and have it replaced
> > with several rcar_gen4_pcie_speed_change() calls. The current link
> > state would have been checked in the framework of the
> > dw_pcie_wait_for_link() method which calls dw_pcie_link_up() and your
> > rcar_gen4_pcie_link_up() in order to make sure the link is actually up.
> 
> Thank you for your comment! I'll investigate it.
> 
> Best regards,
> Yoshihiro Shimoda
> 
> > -Serge(y)
> > 
> > >
> > >                 return -ETIMEDOUT;      /* Failed */
> > >         }
> > > ------------------------------------------------------------------
> > >
> > > > >
> > > > > > > +
> > > > > > > +	return false;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int rcar_gen4_pcie_link_up(struct dw_pcie *dw)
> > > > > > > +{
> > > > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > > > +	u32 val, mask;
> > > > > > > +
> > > > > > > +	val = readl(rcar->base + PCIEINTSTS0);
> > > > > > > +	mask = RDLH_LINK_UP | SMLH_LINK_UP;
> > > > > > > +
> > > > > > > +	return (val & mask) == mask;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > > > > +{
> > > > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > > > +	int i;
> > > > > > > +
> > > > > > > +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > > > +
> > > > > > > +	/*
> > > > > > > +	 * Require retraining here. Otherwise RDLH_LINK_UP of PCIEINTSTS0 which
> > > > > > > +	 * is this controller specific register may not be set.
> > > > > > > +	 */
> > > > > > > +	if (rcar->needs_retrain) {
> > > > > > > +		for (i = 0; i < RETRAIN_MAX_RETRIES; i++) {
> > > > > > > +			if (rcar_gen4_pcie_check_retrain_link(dw))
> > > > > > > +				return 0;
> > > > > > > +			msleep(100);
> > > > > > > +		}
> > > > > > > +
> > > > > > > +		return -ETIMEDOUT;	/* Failed */
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void rcar_gen4_pcie_stop_link(struct dw_pcie *dw)
> > > > > > > +{
> > > > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > > > +
> > > > > > > +	rcar_gen4_pcie_ltssm_enable(rcar, false);
> > > > > > > +}
> > > > > > > +
> > > > > >
> > > > > > > +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> > > > > > > +				   int num_lanes)
> > > > > >
> > > > > > 1. Number of lanes is already defined in the rcar_gen4_pcie.dw.num_lanes field.
> > > > > > What about using it from there instead of passing it as an argument?
> > > > > > 2. DW PCIe core driver has a very handy enum defined:
> > > > > > dw_pcie_device_mode. It describes the controller modes (End-point,
> > > > > > Root port, etc). What about adding the mode field right to the
> > > > > > rcar_gen4_pcie structure and initializing it in someplace in probe() ?
> > > > > > 3. Based on the function semantic it's better to be named as something
> > > > > > like rcar_gen4_pcie_init_device() or even rcar_gen4_pcie_basic_init().
> > > > >
> > > > > Thank you for your comments! I'll modify the function.
> > > > >
> > > > > >
> > > > > > > +{
> > > > > > > +	u32 val;
> > > > > > > +
> > > > > >
> > > > > > > +	/* Note: Assume the rcar->rst which is Cold-reset is asserted here */
> > > > > >
> > > > > > What about directly asserting it here then? In accordance with the DW
> > > > > > PCIe DM manual the "device_type" input must be set before the DM
> > > > > > controller is powered up (basically un-reset). What if the controller
> > > > > > reset is already de-asserted, but you are going to changes its mode?
> > > > > > In that case the mode won't be changed and you'll end up with
> > > > > > unpredictable results.
> > > > >
> > > > > Thank you for your comment. We should add asserting it here as you mentioned.
> > > > >
> > > > > > > +	val = readl(rcar->base + PCIEMSR0);
> > > > > > > +	if (rc)
> > > > > > > +		val |= DEVICE_TYPE_RC;
> > > > > > > +	else
> > > > > > > +		val |= DEVICE_TYPE_EP;
> > > > > > > +
> > > > > > > +	if (num_lanes < 4)
> > > > > > > +		val |= BIFUR_MOD_SET_ON;
> > > > > > > +
> > > > > > > +	writel(val, rcar->base + PCIEMSR0);
> > > > > > > +
> > > > > > > +	return reset_control_deassert(rcar->rst);
> > > > > > > +}
> > > > > > > +
> > > > > > > +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *rcar)
> > > > > > > +{
> > > > > > > +	struct device *dev = rcar->dw.dev;
> > > > > > > +	int err;
> > > > > > > +
> > > > > > > +	pm_runtime_enable(dev);
> > > > > > > +	err = pm_runtime_resume_and_get(dev);
> > > > > > > +	if (err < 0) {
> > > > > > > +		dev_err(dev, "Failed to resume/get Runtime PM\n");
> > > > > > > +		pm_runtime_disable(dev);
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	return err;
> > > > > > > +}
> > > > > > > +
> > > > > > > +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *rcar)
> > > > > > > +{
> > > > > > > +	struct device *dev = rcar->dw.dev;
> > > > > > > +
> > > > > > > +	if (!reset_control_status(rcar->rst))
> > > > > > > +		reset_control_assert(rcar->rst);
> > > > > > > +	pm_runtime_put(dev);
> > > > > > > +	pm_runtime_disable(dev);
> > > > > > > +}
> > > > > > > +
> > > > > > > +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> > > > > > > +				 struct platform_device *pdev)
> > > > > > > +{
> > > > > > > +	struct device *dev = rcar->dw.dev;
> > > > > > > +
> > > > > > > +	/* Renesas-specific registers */
> > > > > > > +	rcar->base = devm_platform_ioremap_resource_byname(pdev, "app");
> > > > > > > +	if (IS_ERR(rcar->base))
> > > > > > > +		return PTR_ERR(rcar->base);
> > > > > > > +
> > > > > >
> > > > > > > +	rcar->rst = devm_reset_control_get(dev, NULL);
> > > > > > > +	if (IS_ERR(rcar->rst)) {
> > > > > > > +		dev_err(dev, "Failed to get Cold-reset\n");
> > > > > >
> > > > > > So AFAICS your platform is equipped with the DWC_pcie_clkrst.v module.
> > > > > > Thus all the resets are appropriately cleared by a single flag:
> > > > > > power_up_rst_n. What about using the named reset in this case with the
> > > > > > "pwr" name? Thus you'll be able to drop the manual
> > > > > > devm_reset_control_get() invocation and instead use the reset-resources
> > > > > > requested in the framework of the generic dw_pcie_get_resources()
> > > > > > method? Note you'll need to move the dw_pcie_cap_set(dw, REQ_RES);
> > > > > > statement to rcar_gen4_pcie_devm_alloc() then and drop the
> > > > > > rcar_gen4_pcie.rst field afterwords.
> > > > >
> > > > > Thank you for your suggestion! Using "pwr" can work on my environment.
> > > > >
> > > > > > By the way I don't see you requesting and enabling the reference
> > > > > > clock in your driver but the bindings imply the clock source. How
> > > > > > come?
> > > > >
> > > >
> > > > > For now, I used gpio-hog to enable the reference clock. But, it seem
> > > > > I should use "ref" clock for it. So, I'll fix it too.
> > > >
> > > > Not sure what gpio-hog you are talking about. Do you mean the pe_rst
> > > > signal or some another gpio? I failed to see of how pe_rst is
> > > > connected to the clock source. In anyway directly handling the clock
> > > > source would be more portable choice.
> > >
> > > Sorry for lacking information. I described a gpio node like below
> > > and then the gpio will be high automatically, and the reference clock
> > > will be output. But, this is completely independent from pci.
> > > ---
> > > &gpio2 {
> > >         pci-clkreq0-hog {
> > >                 gpio-hog;
> > >                 gpios = <15 GPIO_ACTIVE_LOW>;
> > >                 output-high;
> > >         };
> > > };
> > > ---
> > >
> > > Now I could implement the clock handling by using "gpio-gate-clock".
> > > So, I'll drop the gpio-hog for the reference clock.
> > >
> > > Best regards,
> > > Yoshihiro Shimoda
> > >
> > > > -Serge(y)
> > > >
> > > > >
> > > > > > > +		return PTR_ERR(rcar->rst);
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static const struct dw_pcie_ops dw_pcie_ops = {
> > > > > > > +	.start_link = rcar_gen4_pcie_start_link,
> > > > > > > +	.stop_link = rcar_gen4_pcie_stop_link,
> > > > > > > +	.link_up = rcar_gen4_pcie_link_up,
> > > > > > > +};
> > > > > > > +
> > > > > > > +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev)
> > > > > > > +{
> > > > > > > +	struct rcar_gen4_pcie *rcar;
> > > > > > > +
> > > > > > > +	rcar = devm_kzalloc(dev, sizeof(*rcar), GFP_KERNEL);
> > > > > > > +	if (!rcar)
> > > > > > > +		return NULL;
> > > > > > > +
> > > > > > > +	rcar->dw.dev = dev;
> > > > > > > +	rcar->dw.ops = &dw_pcie_ops;
> > > > > > > +	dw_pcie_cap_set(&rcar->dw, EDMA_UNROLL);
> > > > > > > +
> > > > > > > +	return rcar;
> > > > > > > +}
> > > > > > > diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.h b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > > > > > new file mode 100644
> > > > > > > index 000000000000..fec3f18609f4
> > > > > > > --- /dev/null
> > > > > > > +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
> > > > > > > @@ -0,0 +1,46 @@
> > > > > > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > > > > > +/*
> > > > > > > + * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
> > > > > > > + * Copyright (C) 2022-2023 Renesas Electronics Corporation
> > > > > > > + */
> > > > > > > +
> > > > > > > +#ifndef _PCIE_RCAR_GEN4_H_
> > > > > > > +#define _PCIE_RCAR_GEN4_H_
> > > > > > > +
> > > > > > > +#include <linux/io.h>
> > > > > > > +#include <linux/pci.h>
> > > > > > > +#include <linux/reset.h>
> > > > > > > +
> > > > > > > +#include "pcie-designware.h"
> > > > > > > +
> > > > > > > +/* Renesas-specific */
> > > > > > > +#define PCIEMSR0		0x0000
> > > > > > > +#define  BIFUR_MOD_SET_ON	BIT(0)
> > > > > > > +#define  DEVICE_TYPE_EP		0
> > > > > > > +#define  DEVICE_TYPE_RC		BIT(4)
> > > > > > > +
> > > > > > > +#define PCIEINTSTS0		0x0084
> > > > > > > +#define PCIEINTSTS0EN		0x0310
> > > > > > > +#define  MSI_CTRL_INT		BIT(26)
> > > > > > > +#define  SMLH_LINK_UP		BIT(7)
> > > > > > > +#define  RDLH_LINK_UP		BIT(6)
> > > > > > > +#define PCIEDMAINTSTSEN		0x0314
> > > > > > > +#define  PCIEDMAINTSTSEN_INIT	GENMASK(15, 0)
> > > > > > > +
> > > > > >
> > > > > > > +struct rcar_gen4_pcie {
> > > > > >
> > > > > > As I mentioned above this structure can be extended with the enum
> > > > > > dw_pcie_device_mode field thus dropping the boolean argument from the
> > > > > > rcar_gen4_pcie_set_device_type() method.
> > > > >
> > > > > I got it. I'll fix this.
> > > > >
> > > > > > > +	struct dw_pcie		dw;
> > > > > >
> > > > > > As I already mentioned above the dw.num_lanes could be used instead of
> > > > > > passing it as the rcar_gen4_pcie_set_device_type() argument.
> > > > >
> > > > > I'll fix this too.
> > > > >
> > > > > Best regards,
> > > > > Yoshihiro Shimoda
> > > > >
> > > > > > -Serge(y)
> > > > > >
> > > > > > > +	void __iomem		*base;
> > > > > > > +	struct reset_control	*rst;
> > > > > > > +	bool			needs_retrain;
> > > > > > > +};
> > > > > > > +#define to_rcar_gen4_pcie(x)	dev_get_drvdata((x)->dev)
> > > > > > > +
> > > > > > > +int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
> > > > > > > +				   int num_lanes);
> > > > > > > +int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *pcie);
> > > > > > > +void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *pcie);
> > > > > > > +int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
> > > > > > > +				 struct platform_device *pdev);
> > > > > > > +struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev);
> > > > > > > +
> > > > > > > +#endif /* _PCIE_RCAR_GEN4_H_ */
> > > > > > > --
> > > > > > > 2.25.1
> > > > > > >
Yoshihiro Shimoda June 12, 2023, 1:19 p.m. UTC | #8
Hello Serge,

> From: Serge Semin, Sent: Friday, June 9, 2023 7:54 PM
<snip>
> > > > static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > {
> > > >         struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > >         int i;
> > > >
> > > >         rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > >
> > > >         /*
> > > >          * Require direct speed change here. Otherwise RDLH_LINK_UP of
> > > >          * PCIEINTSTS0 which is this controller specific register may not
> > > >          * be set.
> > > >          */
> > >
> > > >         if (rcar->needs_speed_change) {
> > >
> > > Seeing this is specified for the root port only what about
> > > replacing the statement with just test whether the rcar_gen4_pcie.mode ==
> > > DW_PCIE_RC_TYPE? Thus you'll be ablt to drop the needs_speed_change field.
> >
> > Thank you for the comment. I'll fix it.
> >
> > > BTW Just curious. Why is the loop below enabled for the Root Port
> > > only? What about the end-point controller? It's the same hardware
> > > after all..
> >
> > This is reused from v16 and then it used "link retraining" which is only for
> > the Root Port. As you mentioned, it seems endpoint controller is also needed
> > if we use direct speed change.
> >
> > > >                 for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > >                         rcar_gen4_pcie_speed_change(dw);
> > > >                         msleep(100);
> > > >                         if (rcar_gen4_pcie_check_current_link(dw))
> > > >                                 return 0;
> > > >                 }
> > >
> > > Did you trace how many iterations this loop normally takes?
> >
> > i = 0 or 1 (if the max-link-speed is suitable for a connected device.)
> >
> > > Is it
> > > constant or varies for the same platform setup and a connected link
> > > partner? Does the number of iterations depend on the target link speed
> > > specified via the "max-link-speed" property?
> >
> 
> > This is not related to the "max-link-speed". It seems to related to
> > a link partner.
> > 		LinkCap	max-link-speed	loop
> > Device A		4	4		1
> > Device A		4	3		1
> > Device B		3	3		0
> 
> Great! If so I would have just left a single unconditional
> rcar_gen4_pcie_speed_change() call placed right after the
> rcar_gen4_pcie_ltssm_enable() method with no delays afterwards. These
> methods would have been invoked in the framework of
> dw_pcie_start_link() after which the dw_pcie_wait_for_link() method is
> called with several checks parted with the ~100ms delay. It will make
> sure that at least some link is up with the link state printed to the
> system log. If for some reason the performance degradation happens
> then it will be up to the system administrator to investigate what was
> wrong. Your driver did as much is it could to reach the best link gen.

IIUC, is your suggestion like the following code?
---
	rcar_gen4_pcie_ltssm_enable(rcar, true);
	if (!dw_pcie_wait_for_link(dw)) {
		rcar_gen4_pcie_speed_change(dw);
		return 0;
	}
---

Unfortunately, it doesn't work correctly...
The following code can work correctly. The value of i is still 1 on the device A.
What do you think that the following code is acceptable?
---
	rcar_gen4_pcie_ltssm_enable(rcar, true);
	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
		msleep(100);
		rcar_gen4_pcie_speed_change(dw);
		if (dw_pcie_link_up(dw)) {
			printk("%s:%d\n", __func__, i);
			return 0;
		}
	}
---

Best regards,
Yoshihiro Shimoda

> -Serge(y)
Serge Semin June 12, 2023, 7:51 p.m. UTC | #9
On Mon, Jun 12, 2023 at 01:19:02PM +0000, Yoshihiro Shimoda wrote:
> Hello Serge,
> 
> > From: Serge Semin, Sent: Friday, June 9, 2023 7:54 PM
> <snip>
> > > > > static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > > {
> > > > >         struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > >         int i;
> > > > >
> > > > >         rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > >
> > > > >         /*
> > > > >          * Require direct speed change here. Otherwise RDLH_LINK_UP of
> > > > >          * PCIEINTSTS0 which is this controller specific register may not
> > > > >          * be set.
> > > > >          */
> > > >
> > > > >         if (rcar->needs_speed_change) {
> > > >
> > > > Seeing this is specified for the root port only what about
> > > > replacing the statement with just test whether the rcar_gen4_pcie.mode ==
> > > > DW_PCIE_RC_TYPE? Thus you'll be ablt to drop the needs_speed_change field.
> > >
> > > Thank you for the comment. I'll fix it.
> > >
> > > > BTW Just curious. Why is the loop below enabled for the Root Port
> > > > only? What about the end-point controller? It's the same hardware
> > > > after all..
> > >
> > > This is reused from v16 and then it used "link retraining" which is only for
> > > the Root Port. As you mentioned, it seems endpoint controller is also needed
> > > if we use direct speed change.
> > >
> > > > >                 for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > > >                         rcar_gen4_pcie_speed_change(dw);
> > > > >                         msleep(100);
> > > > >                         if (rcar_gen4_pcie_check_current_link(dw))
> > > > >                                 return 0;
> > > > >                 }
> > > >
> > > > Did you trace how many iterations this loop normally takes?
> > >
> > > i = 0 or 1 (if the max-link-speed is suitable for a connected device.)
> > >
> > > > Is it
> > > > constant or varies for the same platform setup and a connected link
> > > > partner? Does the number of iterations depend on the target link speed
> > > > specified via the "max-link-speed" property?
> > >
> > 
> > > This is not related to the "max-link-speed". It seems to related to
> > > a link partner.
> > > 		LinkCap	max-link-speed	loop
> > > Device A		4	4		1
> > > Device A		4	3		1
> > > Device B		3	3		0
> > 
> > Great! If so I would have just left a single unconditional
> > rcar_gen4_pcie_speed_change() call placed right after the
> > rcar_gen4_pcie_ltssm_enable() method with no delays afterwards. These
> > methods would have been invoked in the framework of
> > dw_pcie_start_link() after which the dw_pcie_wait_for_link() method is
> > called with several checks parted with the ~100ms delay. It will make
> > sure that at least some link is up with the link state printed to the
> > system log. If for some reason the performance degradation happens
> > then it will be up to the system administrator to investigate what was
> > wrong. Your driver did as much is it could to reach the best link gen.
> 
> IIUC, is your suggestion like the following code?
> ---
> 	rcar_gen4_pcie_ltssm_enable(rcar, true);
> 	if (!dw_pcie_wait_for_link(dw)) {
> 		rcar_gen4_pcie_speed_change(dw);
> 		return 0;
> 	}
> ---
> 
> Unfortunately, it doesn't work correctly...
> The following code can work correctly. The value of i is still 1 on the device A.
> What do you think that the following code is acceptable?
> ---
> 	rcar_gen4_pcie_ltssm_enable(rcar, true);
> 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> 		msleep(100);
> 		rcar_gen4_pcie_speed_change(dw);
> 		if (dw_pcie_link_up(dw)) {
> 			printk("%s:%d\n", __func__, i);
> 			return 0;
> 		}
> 	}
> ---

My idea was to implement something like this:

+static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
+{
+	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
+
+	rcar_gen4_pcie_ltssm_enable(rcar, true);
+
+	rcar_gen4_pcie_speed_change(dw);
+
+	return 0;
+}

and retain the rcar_gen4_pcie_link_up() method as is.

* Note: originally your loop used to have the msleep() call performed
after the first rcar_gen4_pcie_speed_change() invocation. Thus the
delay can be dropped if there is only one iteration implemented (see
further to understand why).

You don't need to wait for the link to actually get up in the
start_link() callback because there is the link_up() callback, which
is called from the dw_pcie_wait_for_link() method during the generic
DWC PCIe setup procedure. See:

dw_pcie_host_init():
+-> ops->host_init()
+-> ...
+-> dw_pcie_setup_rc()
|   +-> ...
|   +-> dw_pcie_setup()
|   +-> ...
+-> if !dw_pcie_link_up()
|   |   +-> ops->link_up()
|   +-> dw_pcie_start_link()
|       +-> ops->start_link()
+-> dw_pcie_wait_for_link();   // See, wait-procedure is already performed
|   +-> loop 10 times          // for you in the core driver together
|       +-> dw_pcie_link_up()  // with the delays between the checks
|           +-> ops->link_up()
|       +-> msleep(~100)
+-> ...

-Serge(y)

> 
> Best regards,
> Yoshihiro Shimoda
> 
> > -Serge(y)
>
Yoshihiro Shimoda June 14, 2023, 2:30 a.m. UTC | #10
Hello Serge,

 From: Serge Semin, Sent: Tuesday, June 13, 2023 4:52 AM
> 
> On Mon, Jun 12, 2023 at 01:19:02PM +0000, Yoshihiro Shimoda wrote:
> > Hello Serge,
> >
> > > From: Serge Semin, Sent: Friday, June 9, 2023 7:54 PM
> > <snip>
> > > > > > static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > > > {
> > > > > >         struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > >         int i;
> > > > > >
> > > > > >         rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > >
> > > > > >         /*
> > > > > >          * Require direct speed change here. Otherwise RDLH_LINK_UP of
> > > > > >          * PCIEINTSTS0 which is this controller specific register may not
> > > > > >          * be set.
> > > > > >          */
> > > > >
> > > > > >         if (rcar->needs_speed_change) {
> > > > >
> > > > > Seeing this is specified for the root port only what about
> > > > > replacing the statement with just test whether the rcar_gen4_pcie.mode ==
> > > > > DW_PCIE_RC_TYPE? Thus you'll be ablt to drop the needs_speed_change field.
> > > >
> > > > Thank you for the comment. I'll fix it.
> > > >
> > > > > BTW Just curious. Why is the loop below enabled for the Root Port
> > > > > only? What about the end-point controller? It's the same hardware
> > > > > after all..
> > > >
> > > > This is reused from v16 and then it used "link retraining" which is only for
> > > > the Root Port. As you mentioned, it seems endpoint controller is also needed
> > > > if we use direct speed change.
> > > >
> > > > > >                 for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > > > >                         rcar_gen4_pcie_speed_change(dw);
> > > > > >                         msleep(100);
> > > > > >                         if (rcar_gen4_pcie_check_current_link(dw))
> > > > > >                                 return 0;
> > > > > >                 }
> > > > >
> > > > > Did you trace how many iterations this loop normally takes?
> > > >
> > > > i = 0 or 1 (if the max-link-speed is suitable for a connected device.)
> > > >
> > > > > Is it
> > > > > constant or varies for the same platform setup and a connected link
> > > > > partner? Does the number of iterations depend on the target link speed
> > > > > specified via the "max-link-speed" property?
> > > >
> > >
> > > > This is not related to the "max-link-speed". It seems to related to
> > > > a link partner.
> > > > 		LinkCap	max-link-speed	loop
> > > > Device A		4	4		1
> > > > Device A		4	3		1
> > > > Device B		3	3		0
> > >
> > > Great! If so I would have just left a single unconditional
> > > rcar_gen4_pcie_speed_change() call placed right after the
> > > rcar_gen4_pcie_ltssm_enable() method with no delays afterwards. These
> > > methods would have been invoked in the framework of
> > > dw_pcie_start_link() after which the dw_pcie_wait_for_link() method is
> > > called with several checks parted with the ~100ms delay. It will make
> > > sure that at least some link is up with the link state printed to the
> > > system log. If for some reason the performance degradation happens
> > > then it will be up to the system administrator to investigate what was
> > > wrong. Your driver did as much is it could to reach the best link gen.
> >
> > IIUC, is your suggestion like the following code?
> > ---
> > 	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > 	if (!dw_pcie_wait_for_link(dw)) {
> > 		rcar_gen4_pcie_speed_change(dw);
> > 		return 0;
> > 	}
> > ---
> >
> > Unfortunately, it doesn't work correctly...
> > The following code can work correctly. The value of i is still 1 on the device A.
> > What do you think that the following code is acceptable?
> > ---
> > 	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > 		msleep(100);
> > 		rcar_gen4_pcie_speed_change(dw);
> > 		if (dw_pcie_link_up(dw)) {
> > 			printk("%s:%d\n", __func__, i);
> > 			return 0;
> > 		}
> > 	}
> > ---
> 
> My idea was to implement something like this:
> 
> +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> +{
> +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> +
> +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> +
> +	rcar_gen4_pcie_speed_change(dw);
> +
> +	return 0;
> +}
> 
> and retain the rcar_gen4_pcie_link_up() method as is.

Unfortunately, such a code doesn't work on my environment...

> * Note: originally your loop used to have the msleep() call performed
> after the first rcar_gen4_pcie_speed_change() invocation. Thus the
> delay can be dropped if there is only one iteration implemented (see
> further to understand why).

Calling rcar_gen4_pcie_speed_change() multiple times is required on
my environment. I thought msleep(100) was quite long so that I tried
other wait interval like below:

 msleep(1) : about 5 loops is needed for link. (about 5 msec.)
 usleep_range(100, 110) : about 400 loops is needed for link. (about 40 msec.)
 usleep_range(500, 600) : about 80 loops is needed for link. (about 40 msec.)

The delay timing doesn't seems important. Both cases below can work correctly.
--- case 1 ---
	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
		rcar_gen4_pcie_speed_change(dw);
		if (dw_pcie_link_up(dw)) {
			printk("%s:%d\n", __func__, i); // will be removed
			return 0;
		}
		msleep(1);
	}
---
--- case 2 ---
	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
		rcar_gen4_pcie_speed_change(dw);
		msleep(1);
		if (dw_pcie_link_up(dw)) {
			printk("%s:%d\n", __func__, i); // will be removed
			return 0;
		}
	}
---

So, I'll use case 1 for it.

> You don't need to wait for the link to actually get up in the
> start_link() callback because there is the link_up() callback, which
> is called from the dw_pcie_wait_for_link() method during the generic
> DWC PCIe setup procedure. See:

Since the procedure will call rcar_gen4_pcie_speed_change() from
->start_link() once, my environment cannot work correctly...

Best regards,
Yoshihiro Shimoda

> dw_pcie_host_init():
> +-> ops->host_init()
> +-> ...
> +-> dw_pcie_setup_rc()
> |   +-> ...
> |   +-> dw_pcie_setup()
> |   +-> ...
> +-> if !dw_pcie_link_up()
> |   |   +-> ops->link_up()
> |   +-> dw_pcie_start_link()
> |       +-> ops->start_link()
> +-> dw_pcie_wait_for_link();   // See, wait-procedure is already performed
> |   +-> loop 10 times          // for you in the core driver together
> |       +-> dw_pcie_link_up()  // with the delays between the checks
> |           +-> ops->link_up()
> |       +-> msleep(~100)
> +-> ...
> 
> -Serge(y)
> 
> >
> > Best regards,
> > Yoshihiro Shimoda
> >
> > > -Serge(y)
> >
Serge Semin June 14, 2023, 11:39 a.m. UTC | #11
On Wed, Jun 14, 2023 at 02:30:13AM +0000, Yoshihiro Shimoda wrote:
> Hello Serge,
> 
>  From: Serge Semin, Sent: Tuesday, June 13, 2023 4:52 AM
> > 
> > On Mon, Jun 12, 2023 at 01:19:02PM +0000, Yoshihiro Shimoda wrote:
> > > Hello Serge,
> > >
> > > > From: Serge Semin, Sent: Friday, June 9, 2023 7:54 PM
> > > <snip>
> > > > > > > static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > > > > {
> > > > > > >         struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > > >         int i;
> > > > > > >
> > > > > > >         rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > > >
> > > > > > >         /*
> > > > > > >          * Require direct speed change here. Otherwise RDLH_LINK_UP of
> > > > > > >          * PCIEINTSTS0 which is this controller specific register may not
> > > > > > >          * be set.
> > > > > > >          */
> > > > > >
> > > > > > >         if (rcar->needs_speed_change) {
> > > > > >
> > > > > > Seeing this is specified for the root port only what about
> > > > > > replacing the statement with just test whether the rcar_gen4_pcie.mode ==
> > > > > > DW_PCIE_RC_TYPE? Thus you'll be ablt to drop the needs_speed_change field.
> > > > >
> > > > > Thank you for the comment. I'll fix it.
> > > > >
> > > > > > BTW Just curious. Why is the loop below enabled for the Root Port
> > > > > > only? What about the end-point controller? It's the same hardware
> > > > > > after all..
> > > > >
> > > > > This is reused from v16 and then it used "link retraining" which is only for
> > > > > the Root Port. As you mentioned, it seems endpoint controller is also needed
> > > > > if we use direct speed change.
> > > > >
> > > > > > >                 for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > > > > >                         rcar_gen4_pcie_speed_change(dw);
> > > > > > >                         msleep(100);
> > > > > > >                         if (rcar_gen4_pcie_check_current_link(dw))
> > > > > > >                                 return 0;
> > > > > > >                 }
> > > > > >
> > > > > > Did you trace how many iterations this loop normally takes?
> > > > >
> > > > > i = 0 or 1 (if the max-link-speed is suitable for a connected device.)
> > > > >
> > > > > > Is it
> > > > > > constant or varies for the same platform setup and a connected link
> > > > > > partner? Does the number of iterations depend on the target link speed
> > > > > > specified via the "max-link-speed" property?
> > > > >
> > > >
> > > > > This is not related to the "max-link-speed". It seems to related to
> > > > > a link partner.
> > > > > 		LinkCap	max-link-speed	loop
> > > > > Device A		4	4		1
> > > > > Device A		4	3		1
> > > > > Device B		3	3		0
> > > >
> > > > Great! If so I would have just left a single unconditional
> > > > rcar_gen4_pcie_speed_change() call placed right after the
> > > > rcar_gen4_pcie_ltssm_enable() method with no delays afterwards. These
> > > > methods would have been invoked in the framework of
> > > > dw_pcie_start_link() after which the dw_pcie_wait_for_link() method is
> > > > called with several checks parted with the ~100ms delay. It will make
> > > > sure that at least some link is up with the link state printed to the
> > > > system log. If for some reason the performance degradation happens
> > > > then it will be up to the system administrator to investigate what was
> > > > wrong. Your driver did as much is it could to reach the best link gen.
> > >
> > > IIUC, is your suggestion like the following code?
> > > ---
> > > 	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > 	if (!dw_pcie_wait_for_link(dw)) {
> > > 		rcar_gen4_pcie_speed_change(dw);
> > > 		return 0;
> > > 	}
> > > ---
> > >
> > > Unfortunately, it doesn't work correctly...
> > > The following code can work correctly. The value of i is still 1 on the device A.
> > > What do you think that the following code is acceptable?
> > > ---
> > > 	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > 		msleep(100);
> > > 		rcar_gen4_pcie_speed_change(dw);
> > > 		if (dw_pcie_link_up(dw)) {
> > > 			printk("%s:%d\n", __func__, i);
> > > 			return 0;
> > > 		}
> > > 	}
> > > ---
> > 
> > My idea was to implement something like this:
> > 
> > +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > +{
> > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > +
> > +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > +
> > +	rcar_gen4_pcie_speed_change(dw);
> > +
> > +	return 0;
> > +}
> > 
> > and retain the rcar_gen4_pcie_link_up() method as is.
> 
> Unfortunately, such a code doesn't work on my environment...
> 
> > * Note: originally your loop used to have the msleep() call performed
> > after the first rcar_gen4_pcie_speed_change() invocation. Thus the
> > delay can be dropped if there is only one iteration implemented (see
> > further to understand why).
> 
> Calling rcar_gen4_pcie_speed_change() multiple times is required on
> my environment. I thought msleep(100) was quite long so that I tried
> other wait interval like below:
> 
>  msleep(1) : about 5 loops is needed for link. (about 5 msec.)
>  usleep_range(100, 110) : about 400 loops is needed for link. (about 40 msec.)
>  usleep_range(500, 600) : about 80 loops is needed for link. (about 40 msec.)
> 
> The delay timing doesn't seems important. Both cases below can work correctly.
> --- case 1 ---
> 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> 		rcar_gen4_pcie_speed_change(dw);
> 		if (dw_pcie_link_up(dw)) {
> 			printk("%s:%d\n", __func__, i); // will be removed
> 			return 0;
> 		}

> 		msleep(1);

Why? Just set it to 5 ms. In anyway please see the next message.

> 	}
> ---
> --- case 2 ---
> 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> 		rcar_gen4_pcie_speed_change(dw);
> 		msleep(1);
> 		if (dw_pcie_link_up(dw)) {
> 			printk("%s:%d\n", __func__, i); // will be removed
> 			return 0;
> 		}
> 	}
> ---
> 
> So, I'll use case 1 for it.

Ah. I think I get it now. Your spreadsheet:

                LinkCap max-link-speed  loop
Device A           4          4           1
Device A           4          3           1
Device B           3          3           0

actually meant (loop+1) iterations. So in case of Gen4 you'll need
three speed changes (one already enabled in the dw_pcie_setup_rc()
method and another two ones are performed in your loop). Similarly in
case of Gen3 you'll need only one iteration. I bet you won't need to
call rcar_gen4_pcie_speed_change() at all if gen2 needs to be trained.
Could you try it out?

Anyway based on what you discovered and on my experience working with
that controller, there should be as many
GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag changes as the target speed
value, i.e. no flag switch if Gen1 is required, one flag switch if
Gen2 is required and so on. Although I failed to find any explicit
statement about that in the HW-manual.

In addition to the above I've found out that
GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE field is actually self cleared when
the speed change occurs (see the register description in the HW
reference manual). We can use it to implement the
dw_pcie_link_up()-independent link training algorithm like this:

+#define RCAR_RETRAIN_MAX_CHECK		10
+#define RCAR_LINK_SPEED_MAX		4
+
+static bool rcar_gen4_pcie_speed_change(struct dw_pcie *dw)
+{
+	u32 val;
+	int i;
+
+	val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
+	val &= ~PORT_LOGIC_SPEED_CHANGE;
+	dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+
+	val |= PORT_LOGIC_SPEED_CHANGE;
+	dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+
+	for (i = 0; i < RCAR_SPEED_CHANGE_WAIT_RETRIES; i++) {
+		val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
+		if (!(val & PORT_LOGIC_SPEED_CHANGE))
+			return true;
+
+		msleep(1);
+	}
+
+	return false;
+}
+
+static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
+{
+	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
+	int i, changes;
+
+	rcar_gen4_pcie_ltssm_enable(rcar, true);
+
+	changes = min_not_zero(dw->link_gen, RCAR_LINK_SPEED_MAX);
+	for (i = 0; i < changes; ++i) {
+		if (!rcar_gen4_pcie_speed_change(dw))
+			break;
+	}
+
+	return 0;
+}

Note 1. The actual link state will be checked in the framework of the
dw_pcie_wait_for_link() function, by means of dw_pcie_link_up().

Note 2. RCAR_LINK_SPEED_MAX is deliberately set to 4 because DW PCIe
EP core driver doesn't set the PORT_LOGIC_SPEED_CHANGE flag. In case
of the DW PCIe Root Port at most 3 iterations should be enough.

Note 3. Please use the RCAR_ prefix for the vendor-specific macros.
It concerns the entire series.

Could you try out the code suggested above?

-Serge(y)

> 
> > You don't need to wait for the link to actually get up in the
> > start_link() callback because there is the link_up() callback, which
> > is called from the dw_pcie_wait_for_link() method during the generic
> > DWC PCIe setup procedure. See:
> 
> Since the procedure will call rcar_gen4_pcie_speed_change() from
> ->start_link() once, my environment cannot work correctly...
> 
> Best regards,
> Yoshihiro Shimoda
> 
> > dw_pcie_host_init():
> > +-> ops->host_init()
> > +-> ...
> > +-> dw_pcie_setup_rc()
> > |   +-> ...
> > |   +-> dw_pcie_setup()
> > |   +-> ...
> > +-> if !dw_pcie_link_up()
> > |   |   +-> ops->link_up()
> > |   +-> dw_pcie_start_link()
> > |       +-> ops->start_link()
> > +-> dw_pcie_wait_for_link();   // See, wait-procedure is already performed
> > |   +-> loop 10 times          // for you in the core driver together
> > |       +-> dw_pcie_link_up()  // with the delays between the checks
> > |           +-> ops->link_up()
> > |       +-> msleep(~100)
> > +-> ...
> > 
> > -Serge(y)
> > 
> > >
> > > Best regards,
> > > Yoshihiro Shimoda
> > >
> > > > -Serge(y)
> > >
Serge Semin June 14, 2023, 7:31 p.m. UTC | #12
On Wed, Jun 14, 2023 at 02:39:29PM +0300, Serge Semin wrote:
> On Wed, Jun 14, 2023 at 02:30:13AM +0000, Yoshihiro Shimoda wrote:
> > Hello Serge,
> > 
> >  From: Serge Semin, Sent: Tuesday, June 13, 2023 4:52 AM
> > > 
> > > On Mon, Jun 12, 2023 at 01:19:02PM +0000, Yoshihiro Shimoda wrote:
> > > > Hello Serge,
> > > >
> > > > > From: Serge Semin, Sent: Friday, June 9, 2023 7:54 PM
> > > > <snip>
> > > > > > > > static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > > > > > {
> > > > > > > >         struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > > > >         int i;
> > > > > > > >
> > > > > > > >         rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > > > >
> > > > > > > >         /*
> > > > > > > >          * Require direct speed change here. Otherwise RDLH_LINK_UP of
> > > > > > > >          * PCIEINTSTS0 which is this controller specific register may not
> > > > > > > >          * be set.
> > > > > > > >          */
> > > > > > >
> > > > > > > >         if (rcar->needs_speed_change) {
> > > > > > >
> > > > > > > Seeing this is specified for the root port only what about
> > > > > > > replacing the statement with just test whether the rcar_gen4_pcie.mode ==
> > > > > > > DW_PCIE_RC_TYPE? Thus you'll be ablt to drop the needs_speed_change field.
> > > > > >
> > > > > > Thank you for the comment. I'll fix it.
> > > > > >
> > > > > > > BTW Just curious. Why is the loop below enabled for the Root Port
> > > > > > > only? What about the end-point controller? It's the same hardware
> > > > > > > after all..
> > > > > >
> > > > > > This is reused from v16 and then it used "link retraining" which is only for
> > > > > > the Root Port. As you mentioned, it seems endpoint controller is also needed
> > > > > > if we use direct speed change.
> > > > > >
> > > > > > > >                 for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > > > > > >                         rcar_gen4_pcie_speed_change(dw);
> > > > > > > >                         msleep(100);
> > > > > > > >                         if (rcar_gen4_pcie_check_current_link(dw))
> > > > > > > >                                 return 0;
> > > > > > > >                 }
> > > > > > >
> > > > > > > Did you trace how many iterations this loop normally takes?
> > > > > >
> > > > > > i = 0 or 1 (if the max-link-speed is suitable for a connected device.)
> > > > > >
> > > > > > > Is it
> > > > > > > constant or varies for the same platform setup and a connected link
> > > > > > > partner? Does the number of iterations depend on the target link speed
> > > > > > > specified via the "max-link-speed" property?
> > > > > >
> > > > >
> > > > > > This is not related to the "max-link-speed". It seems to related to
> > > > > > a link partner.
> > > > > > 		LinkCap	max-link-speed	loop
> > > > > > Device A		4	4		1
> > > > > > Device A		4	3		1
> > > > > > Device B		3	3		0
> > > > >
> > > > > Great! If so I would have just left a single unconditional
> > > > > rcar_gen4_pcie_speed_change() call placed right after the
> > > > > rcar_gen4_pcie_ltssm_enable() method with no delays afterwards. These
> > > > > methods would have been invoked in the framework of
> > > > > dw_pcie_start_link() after which the dw_pcie_wait_for_link() method is
> > > > > called with several checks parted with the ~100ms delay. It will make
> > > > > sure that at least some link is up with the link state printed to the
> > > > > system log. If for some reason the performance degradation happens
> > > > > then it will be up to the system administrator to investigate what was
> > > > > wrong. Your driver did as much is it could to reach the best link gen.
> > > >
> > > > IIUC, is your suggestion like the following code?
> > > > ---
> > > > 	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > 	if (!dw_pcie_wait_for_link(dw)) {
> > > > 		rcar_gen4_pcie_speed_change(dw);
> > > > 		return 0;
> > > > 	}
> > > > ---
> > > >
> > > > Unfortunately, it doesn't work correctly...
> > > > The following code can work correctly. The value of i is still 1 on the device A.
> > > > What do you think that the following code is acceptable?
> > > > ---
> > > > 	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > > 		msleep(100);
> > > > 		rcar_gen4_pcie_speed_change(dw);
> > > > 		if (dw_pcie_link_up(dw)) {
> > > > 			printk("%s:%d\n", __func__, i);
> > > > 			return 0;
> > > > 		}
> > > > 	}
> > > > ---
> > > 
> > > My idea was to implement something like this:
> > > 
> > > +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > +{
> > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > +
> > > +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > +
> > > +	rcar_gen4_pcie_speed_change(dw);
> > > +
> > > +	return 0;
> > > +}
> > > 
> > > and retain the rcar_gen4_pcie_link_up() method as is.
> > 
> > Unfortunately, such a code doesn't work on my environment...
> > 
> > > * Note: originally your loop used to have the msleep() call performed
> > > after the first rcar_gen4_pcie_speed_change() invocation. Thus the
> > > delay can be dropped if there is only one iteration implemented (see
> > > further to understand why).
> > 
> > Calling rcar_gen4_pcie_speed_change() multiple times is required on
> > my environment. I thought msleep(100) was quite long so that I tried
> > other wait interval like below:
> > 
> >  msleep(1) : about 5 loops is needed for link. (about 5 msec.)
> >  usleep_range(100, 110) : about 400 loops is needed for link. (about 40 msec.)
> >  usleep_range(500, 600) : about 80 loops is needed for link. (about 40 msec.)
> > 
> > The delay timing doesn't seems important. Both cases below can work correctly.
> > --- case 1 ---
> > 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > 		rcar_gen4_pcie_speed_change(dw);
> > 		if (dw_pcie_link_up(dw)) {
> > 			printk("%s:%d\n", __func__, i); // will be removed
> > 			return 0;
> > 		}
> 
> > 		msleep(1);
> 
> Why? Just set it to 5 ms. In anyway please see the next message.
> 
> > 	}
> > ---
> > --- case 2 ---
> > 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > 		rcar_gen4_pcie_speed_change(dw);
> > 		msleep(1);
> > 		if (dw_pcie_link_up(dw)) {
> > 			printk("%s:%d\n", __func__, i); // will be removed
> > 			return 0;
> > 		}
> > 	}
> > ---
> > 
> > So, I'll use case 1 for it.
> 
> Ah. I think I get it now. Your spreadsheet:
> 
>                 LinkCap max-link-speed  loop
> Device A           4          4           1
> Device A           4          3           1
> Device B           3          3           0
> 
> actually meant (loop+1) iterations. So in case of Gen4 you'll need
> three speed changes (one already enabled in the dw_pcie_setup_rc()
> method and another two ones are performed in your loop). Similarly in
> case of Gen3 you'll need only one iteration. I bet you won't need to
> call rcar_gen4_pcie_speed_change() at all if gen2 needs to be trained.
> Could you try it out?
> 
> Anyway based on what you discovered and on my experience working with
> that controller, there should be as many
> GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag changes as the target speed
> value, i.e. no flag switch if Gen1 is required, one flag switch if
> Gen2 is required and so on. Although I failed to find any explicit
> statement about that in the HW-manual.
> 
> In addition to the above I've found out that
> GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE field is actually self cleared when
> the speed change occurs (see the register description in the HW
> reference manual). We can use it to implement the
> dw_pcie_link_up()-independent link training algorithm like this:
> 
> +#define RCAR_RETRAIN_MAX_CHECK		10
> +#define RCAR_LINK_SPEED_MAX		4
> +
> +static bool rcar_gen4_pcie_speed_change(struct dw_pcie *dw)
> +{
> +	u32 val;
> +	int i;
> +
> +	val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
> +	val &= ~PORT_LOGIC_SPEED_CHANGE;
> +	dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> +
> +	val |= PORT_LOGIC_SPEED_CHANGE;
> +	dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> +
> +	for (i = 0; i < RCAR_SPEED_CHANGE_WAIT_RETRIES; i++) {
> +		val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
> +		if (!(val & PORT_LOGIC_SPEED_CHANGE))
> +			return true;
> +
> +		msleep(1);
> +	}
> +
> +	return false;
> +}
> +
> +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> +{
> +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> +	int i, changes;
> +
> +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> +

> +	changes = min_not_zero(dw->link_gen, RCAR_LINK_SPEED_MAX);

This should have been:
+changes = min_not_zero(dw->link_gen, RCAR_LINK_SPEED_MAX) - 1;
because Gen1 doesn't need any speed change action.

But this part can be further improved. Instead of the code above the
next snipped can be implemented:

+changes = min_not_zero(dw->link_gen, RCAR_LINK_SPEED_MAX) - 1;
+if (changes && rcar->mode == DW_PCIE_RC_TYPE)
+		changes -= 1;

It takes into account that the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE
flag is already set in the dw_pcie_setup_rc() method. So Gen2 will be
trained with no need in addition actions. If it's supported of course.

-Serge(y)

> +	for (i = 0; i < changes; ++i) {
> +		if (!rcar_gen4_pcie_speed_change(dw))
> +			break;
> +	}
> +
> +	return 0;
> +}
> 
> Note 1. The actual link state will be checked in the framework of the
> dw_pcie_wait_for_link() function, by means of dw_pcie_link_up().
> 
> Note 2. RCAR_LINK_SPEED_MAX is deliberately set to 4 because DW PCIe
> EP core driver doesn't set the PORT_LOGIC_SPEED_CHANGE flag. In case
> of the DW PCIe Root Port at most 3 iterations should be enough.
> 
> Note 3. Please use the RCAR_ prefix for the vendor-specific macros.
> It concerns the entire series.
> 
> Could you try out the code suggested above?
> 
> -Serge(y)
> 
> > 
> > > You don't need to wait for the link to actually get up in the
> > > start_link() callback because there is the link_up() callback, which
> > > is called from the dw_pcie_wait_for_link() method during the generic
> > > DWC PCIe setup procedure. See:
> > 
> > Since the procedure will call rcar_gen4_pcie_speed_change() from
> > ->start_link() once, my environment cannot work correctly...
> > 
> > Best regards,
> > Yoshihiro Shimoda
> > 
> > > dw_pcie_host_init():
> > > +-> ops->host_init()
> > > +-> ...
> > > +-> dw_pcie_setup_rc()
> > > |   +-> ...
> > > |   +-> dw_pcie_setup()
> > > |   +-> ...
> > > +-> if !dw_pcie_link_up()
> > > |   |   +-> ops->link_up()
> > > |   +-> dw_pcie_start_link()
> > > |       +-> ops->start_link()
> > > +-> dw_pcie_wait_for_link();   // See, wait-procedure is already performed
> > > |   +-> loop 10 times          // for you in the core driver together
> > > |       +-> dw_pcie_link_up()  // with the delays between the checks
> > > |           +-> ops->link_up()
> > > |       +-> msleep(~100)
> > > +-> ...
> > > 
> > > -Serge(y)
> > > 
> > > >
> > > > Best regards,
> > > > Yoshihiro Shimoda
> > > >
> > > > > -Serge(y)
> > > >
Yoshihiro Shimoda June 20, 2023, 12:02 p.m. UTC | #13
Hello Serge,

> From: Serge Semin, Sent: Thursday, June 15, 2023 4:32 AM
> 
> On Wed, Jun 14, 2023 at 02:39:29PM +0300, Serge Semin wrote:
> > On Wed, Jun 14, 2023 at 02:30:13AM +0000, Yoshihiro Shimoda wrote:
> > > Hello Serge,
> > >
> > >  From: Serge Semin, Sent: Tuesday, June 13, 2023 4:52 AM
> > > >
> > > > On Mon, Jun 12, 2023 at 01:19:02PM +0000, Yoshihiro Shimoda wrote:
> > > > > Hello Serge,
> > > > >
> > > > > > From: Serge Semin, Sent: Friday, June 9, 2023 7:54 PM
> > > > > <snip>
> > > > > > > > > static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > > > > > > {
> > > > > > > > >         struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > > > > >         int i;
> > > > > > > > >
> > > > > > > > >         rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > > > > >
> > > > > > > > >         /*
> > > > > > > > >          * Require direct speed change here. Otherwise RDLH_LINK_UP of
> > > > > > > > >          * PCIEINTSTS0 which is this controller specific register may not
> > > > > > > > >          * be set.
> > > > > > > > >          */
> > > > > > > >
> > > > > > > > >         if (rcar->needs_speed_change) {
> > > > > > > >
> > > > > > > > Seeing this is specified for the root port only what about
> > > > > > > > replacing the statement with just test whether the rcar_gen4_pcie.mode ==
> > > > > > > > DW_PCIE_RC_TYPE? Thus you'll be ablt to drop the needs_speed_change field.
> > > > > > >
> > > > > > > Thank you for the comment. I'll fix it.
> > > > > > >
> > > > > > > > BTW Just curious. Why is the loop below enabled for the Root Port
> > > > > > > > only? What about the end-point controller? It's the same hardware
> > > > > > > > after all..
> > > > > > >
> > > > > > > This is reused from v16 and then it used "link retraining" which is only for
> > > > > > > the Root Port. As you mentioned, it seems endpoint controller is also needed
> > > > > > > if we use direct speed change.
> > > > > > >
> > > > > > > > >                 for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > > > > > > >                         rcar_gen4_pcie_speed_change(dw);
> > > > > > > > >                         msleep(100);
> > > > > > > > >                         if (rcar_gen4_pcie_check_current_link(dw))
> > > > > > > > >                                 return 0;
> > > > > > > > >                 }
> > > > > > > >
> > > > > > > > Did you trace how many iterations this loop normally takes?
> > > > > > >
> > > > > > > i = 0 or 1 (if the max-link-speed is suitable for a connected device.)
> > > > > > >
> > > > > > > > Is it
> > > > > > > > constant or varies for the same platform setup and a connected link
> > > > > > > > partner? Does the number of iterations depend on the target link speed
> > > > > > > > specified via the "max-link-speed" property?
> > > > > > >
> > > > > >
> > > > > > > This is not related to the "max-link-speed". It seems to related to
> > > > > > > a link partner.
> > > > > > > 		LinkCap	max-link-speed	loop
> > > > > > > Device A		4	4		1
> > > > > > > Device A		4	3		1
> > > > > > > Device B		3	3		0
> > > > > >
> > > > > > Great! If so I would have just left a single unconditional
> > > > > > rcar_gen4_pcie_speed_change() call placed right after the
> > > > > > rcar_gen4_pcie_ltssm_enable() method with no delays afterwards. These
> > > > > > methods would have been invoked in the framework of
> > > > > > dw_pcie_start_link() after which the dw_pcie_wait_for_link() method is
> > > > > > called with several checks parted with the ~100ms delay. It will make
> > > > > > sure that at least some link is up with the link state printed to the
> > > > > > system log. If for some reason the performance degradation happens
> > > > > > then it will be up to the system administrator to investigate what was
> > > > > > wrong. Your driver did as much is it could to reach the best link gen.
> > > > >
> > > > > IIUC, is your suggestion like the following code?
> > > > > ---
> > > > > 	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > 	if (!dw_pcie_wait_for_link(dw)) {
> > > > > 		rcar_gen4_pcie_speed_change(dw);
> > > > > 		return 0;
> > > > > 	}
> > > > > ---
> > > > >
> > > > > Unfortunately, it doesn't work correctly...
> > > > > The following code can work correctly. The value of i is still 1 on the device A.
> > > > > What do you think that the following code is acceptable?
> > > > > ---
> > > > > 	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > > > 		msleep(100);
> > > > > 		rcar_gen4_pcie_speed_change(dw);
> > > > > 		if (dw_pcie_link_up(dw)) {
> > > > > 			printk("%s:%d\n", __func__, i);
> > > > > 			return 0;
> > > > > 		}
> > > > > 	}
> > > > > ---
> > > >
> > > > My idea was to implement something like this:
> > > >
> > > > +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > +{
> > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > +
> > > > +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > +
> > > > +	rcar_gen4_pcie_speed_change(dw);
> > > > +
> > > > +	return 0;
> > > > +}
> > > >
> > > > and retain the rcar_gen4_pcie_link_up() method as is.
> > >
> > > Unfortunately, such a code doesn't work on my environment...
> > >
> > > > * Note: originally your loop used to have the msleep() call performed
> > > > after the first rcar_gen4_pcie_speed_change() invocation. Thus the
> > > > delay can be dropped if there is only one iteration implemented (see
> > > > further to understand why).
> > >
> > > Calling rcar_gen4_pcie_speed_change() multiple times is required on
> > > my environment. I thought msleep(100) was quite long so that I tried
> > > other wait interval like below:
> > >
> > >  msleep(1) : about 5 loops is needed for link. (about 5 msec.)
> > >  usleep_range(100, 110) : about 400 loops is needed for link. (about 40 msec.)
> > >  usleep_range(500, 600) : about 80 loops is needed for link. (about 40 msec.)
> > >
> > > The delay timing doesn't seems important. Both cases below can work correctly.
> > > --- case 1 ---
> > > 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > 		rcar_gen4_pcie_speed_change(dw);
> > > 		if (dw_pcie_link_up(dw)) {
> > > 			printk("%s:%d\n", __func__, i); // will be removed
> > > 			return 0;
> > > 		}
> >
> > > 		msleep(1);
> >
> > Why? Just set it to 5 ms. In anyway please see the next message.
> >
> > > 	}
> > > ---
> > > --- case 2 ---
> > > 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > 		rcar_gen4_pcie_speed_change(dw);
> > > 		msleep(1);
> > > 		if (dw_pcie_link_up(dw)) {
> > > 			printk("%s:%d\n", __func__, i); // will be removed
> > > 			return 0;
> > > 		}
> > > 	}
> > > ---
> > >
> > > So, I'll use case 1 for it.
> >
> > Ah. I think I get it now. Your spreadsheet:
> >
> >                 LinkCap max-link-speed  loop
> > Device A           4          4           1
> > Device A           4          3           1
> > Device B           3          3           0
> >
> > actually meant (loop+1) iterations. So in case of Gen4 you'll need
> > three speed changes (one already enabled in the dw_pcie_setup_rc()
> > method and another two ones are performed in your loop). Similarly in
> > case of Gen3 you'll need only one iteration. I bet you won't need to
> > call rcar_gen4_pcie_speed_change() at all if gen2 needs to be trained.
> > Could you try it out?
> >
> > Anyway based on what you discovered and on my experience working with
> > that controller, there should be as many
> > GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag changes as the target speed
> > value, i.e. no flag switch if Gen1 is required, one flag switch if
> > Gen2 is required and so on. Although I failed to find any explicit
> > statement about that in the HW-manual.
> >
> > In addition to the above I've found out that
> > GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE field is actually self cleared when
> > the speed change occurs (see the register description in the HW
> > reference manual). We can use it to implement the
> > dw_pcie_link_up()-independent link training algorithm like this:
> >
> > +#define RCAR_RETRAIN_MAX_CHECK		10
> > +#define RCAR_LINK_SPEED_MAX		4
> > +
> > +static bool rcar_gen4_pcie_speed_change(struct dw_pcie *dw)
> > +{
> > +	u32 val;
> > +	int i;
> > +
> > +	val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
> > +	val &= ~PORT_LOGIC_SPEED_CHANGE;
> > +	dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> > +
> > +	val |= PORT_LOGIC_SPEED_CHANGE;
> > +	dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> > +
> > +	for (i = 0; i < RCAR_SPEED_CHANGE_WAIT_RETRIES; i++) {
> > +		val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
> > +		if (!(val & PORT_LOGIC_SPEED_CHANGE))
> > +			return true;
> > +
> > +		msleep(1);
> > +	}
> > +
> > +	return false;
> > +}
> > +
> > +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > +{
> > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > +	int i, changes;
> > +
> > +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > +
> 
> > +	changes = min_not_zero(dw->link_gen, RCAR_LINK_SPEED_MAX);
> 
> This should have been:
> +changes = min_not_zero(dw->link_gen, RCAR_LINK_SPEED_MAX) - 1;
> because Gen1 doesn't need any speed change action.
> 
> But this part can be further improved. Instead of the code above the
> next snipped can be implemented:
> 
> +changes = min_not_zero(dw->link_gen, RCAR_LINK_SPEED_MAX) - 1;
> +if (changes && rcar->mode == DW_PCIE_RC_TYPE)
> +		changes -= 1;
> 
> It takes into account that the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE
> flag is already set in the dw_pcie_setup_rc() method. So Gen2 will be
> trained with no need in addition actions. If it's supported of course.

Thank you very much for your comments and suggestion! The suggestion code
works correctly on my environment.

Best regards,
Yoshihiro Shimoda

P.S.
So, I'm investigating endpoint mode issues which you commented now.

> -Serge(y)
> 
> > +	for (i = 0; i < changes; ++i) {
> > +		if (!rcar_gen4_pcie_speed_change(dw))
> > +			break;
> > +	}
> > +
> > +	return 0;
> > +}
> >
> > Note 1. The actual link state will be checked in the framework of the
> > dw_pcie_wait_for_link() function, by means of dw_pcie_link_up().
> >
> > Note 2. RCAR_LINK_SPEED_MAX is deliberately set to 4 because DW PCIe
> > EP core driver doesn't set the PORT_LOGIC_SPEED_CHANGE flag. In case
> > of the DW PCIe Root Port at most 3 iterations should be enough.
> >
> > Note 3. Please use the RCAR_ prefix for the vendor-specific macros.
> > It concerns the entire series.
> >
> > Could you try out the code suggested above?
> >
> > -Serge(y)
> >
> > >
> > > > You don't need to wait for the link to actually get up in the
> > > > start_link() callback because there is the link_up() callback, which
> > > > is called from the dw_pcie_wait_for_link() method during the generic
> > > > DWC PCIe setup procedure. See:
> > >
> > > Since the procedure will call rcar_gen4_pcie_speed_change() from
> > > ->start_link() once, my environment cannot work correctly...
> > >
> > > Best regards,
> > > Yoshihiro Shimoda
> > >
> > > > dw_pcie_host_init():
> > > > +-> ops->host_init()
> > > > +-> ...
> > > > +-> dw_pcie_setup_rc()
> > > > |   +-> ...
> > > > |   +-> dw_pcie_setup()
> > > > |   +-> ...
> > > > +-> if !dw_pcie_link_up()
> > > > |   |   +-> ops->link_up()
> > > > |   +-> dw_pcie_start_link()
> > > > |       +-> ops->start_link()
> > > > +-> dw_pcie_wait_for_link();   // See, wait-procedure is already performed
> > > > |   +-> loop 10 times          // for you in the core driver together
> > > > |       +-> dw_pcie_link_up()  // with the delays between the checks
> > > > |           +-> ops->link_up()
> > > > |       +-> msleep(~100)
> > > > +-> ...
> > > >
> > > > -Serge(y)
> > > >
> > > > >
> > > > > Best regards,
> > > > > Yoshihiro Shimoda
> > > > >
> > > > > > -Serge(y)
> > > > >
Serge Semin June 20, 2023, 12:36 p.m. UTC | #14
On Tue, Jun 20, 2023 at 12:02:08PM +0000, Yoshihiro Shimoda wrote:
> Hello Serge,
> 
> > From: Serge Semin, Sent: Thursday, June 15, 2023 4:32 AM
> > 
> > On Wed, Jun 14, 2023 at 02:39:29PM +0300, Serge Semin wrote:
> > > On Wed, Jun 14, 2023 at 02:30:13AM +0000, Yoshihiro Shimoda wrote:
> > > > Hello Serge,
> > > >
> > > >  From: Serge Semin, Sent: Tuesday, June 13, 2023 4:52 AM
> > > > >
> > > > > On Mon, Jun 12, 2023 at 01:19:02PM +0000, Yoshihiro Shimoda wrote:
> > > > > > Hello Serge,
> > > > > >
> > > > > > > From: Serge Semin, Sent: Friday, June 9, 2023 7:54 PM
> > > > > > <snip>
> > > > > > > > > > static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > > > > > > > {
> > > > > > > > > >         struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > > > > > >         int i;
> > > > > > > > > >
> > > > > > > > > >         rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > > > > > >
> > > > > > > > > >         /*
> > > > > > > > > >          * Require direct speed change here. Otherwise RDLH_LINK_UP of
> > > > > > > > > >          * PCIEINTSTS0 which is this controller specific register may not
> > > > > > > > > >          * be set.
> > > > > > > > > >          */
> > > > > > > > >
> > > > > > > > > >         if (rcar->needs_speed_change) {
> > > > > > > > >
> > > > > > > > > Seeing this is specified for the root port only what about
> > > > > > > > > replacing the statement with just test whether the rcar_gen4_pcie.mode ==
> > > > > > > > > DW_PCIE_RC_TYPE? Thus you'll be ablt to drop the needs_speed_change field.
> > > > > > > >
> > > > > > > > Thank you for the comment. I'll fix it.
> > > > > > > >
> > > > > > > > > BTW Just curious. Why is the loop below enabled for the Root Port
> > > > > > > > > only? What about the end-point controller? It's the same hardware
> > > > > > > > > after all..
> > > > > > > >
> > > > > > > > This is reused from v16 and then it used "link retraining" which is only for
> > > > > > > > the Root Port. As you mentioned, it seems endpoint controller is also needed
> > > > > > > > if we use direct speed change.
> > > > > > > >
> > > > > > > > > >                 for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > > > > > > > >                         rcar_gen4_pcie_speed_change(dw);
> > > > > > > > > >                         msleep(100);
> > > > > > > > > >                         if (rcar_gen4_pcie_check_current_link(dw))
> > > > > > > > > >                                 return 0;
> > > > > > > > > >                 }
> > > > > > > > >
> > > > > > > > > Did you trace how many iterations this loop normally takes?
> > > > > > > >
> > > > > > > > i = 0 or 1 (if the max-link-speed is suitable for a connected device.)
> > > > > > > >
> > > > > > > > > Is it
> > > > > > > > > constant or varies for the same platform setup and a connected link
> > > > > > > > > partner? Does the number of iterations depend on the target link speed
> > > > > > > > > specified via the "max-link-speed" property?
> > > > > > > >
> > > > > > >
> > > > > > > > This is not related to the "max-link-speed". It seems to related to
> > > > > > > > a link partner.
> > > > > > > > 		LinkCap	max-link-speed	loop
> > > > > > > > Device A		4	4		1
> > > > > > > > Device A		4	3		1
> > > > > > > > Device B		3	3		0
> > > > > > >
> > > > > > > Great! If so I would have just left a single unconditional
> > > > > > > rcar_gen4_pcie_speed_change() call placed right after the
> > > > > > > rcar_gen4_pcie_ltssm_enable() method with no delays afterwards. These
> > > > > > > methods would have been invoked in the framework of
> > > > > > > dw_pcie_start_link() after which the dw_pcie_wait_for_link() method is
> > > > > > > called with several checks parted with the ~100ms delay. It will make
> > > > > > > sure that at least some link is up with the link state printed to the
> > > > > > > system log. If for some reason the performance degradation happens
> > > > > > > then it will be up to the system administrator to investigate what was
> > > > > > > wrong. Your driver did as much is it could to reach the best link gen.
> > > > > >
> > > > > > IIUC, is your suggestion like the following code?
> > > > > > ---
> > > > > > 	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > > 	if (!dw_pcie_wait_for_link(dw)) {
> > > > > > 		rcar_gen4_pcie_speed_change(dw);
> > > > > > 		return 0;
> > > > > > 	}
> > > > > > ---
> > > > > >
> > > > > > Unfortunately, it doesn't work correctly...
> > > > > > The following code can work correctly. The value of i is still 1 on the device A.
> > > > > > What do you think that the following code is acceptable?
> > > > > > ---
> > > > > > 	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > > 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > > > > 		msleep(100);
> > > > > > 		rcar_gen4_pcie_speed_change(dw);
> > > > > > 		if (dw_pcie_link_up(dw)) {
> > > > > > 			printk("%s:%d\n", __func__, i);
> > > > > > 			return 0;
> > > > > > 		}
> > > > > > 	}
> > > > > > ---
> > > > >
> > > > > My idea was to implement something like this:
> > > > >
> > > > > +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > > > +{
> > > > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > > > +
> > > > > +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > > > +
> > > > > +	rcar_gen4_pcie_speed_change(dw);
> > > > > +
> > > > > +	return 0;
> > > > > +}
> > > > >
> > > > > and retain the rcar_gen4_pcie_link_up() method as is.
> > > >
> > > > Unfortunately, such a code doesn't work on my environment...
> > > >
> > > > > * Note: originally your loop used to have the msleep() call performed
> > > > > after the first rcar_gen4_pcie_speed_change() invocation. Thus the
> > > > > delay can be dropped if there is only one iteration implemented (see
> > > > > further to understand why).
> > > >
> > > > Calling rcar_gen4_pcie_speed_change() multiple times is required on
> > > > my environment. I thought msleep(100) was quite long so that I tried
> > > > other wait interval like below:
> > > >
> > > >  msleep(1) : about 5 loops is needed for link. (about 5 msec.)
> > > >  usleep_range(100, 110) : about 400 loops is needed for link. (about 40 msec.)
> > > >  usleep_range(500, 600) : about 80 loops is needed for link. (about 40 msec.)
> > > >
> > > > The delay timing doesn't seems important. Both cases below can work correctly.
> > > > --- case 1 ---
> > > > 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > > 		rcar_gen4_pcie_speed_change(dw);
> > > > 		if (dw_pcie_link_up(dw)) {
> > > > 			printk("%s:%d\n", __func__, i); // will be removed
> > > > 			return 0;
> > > > 		}
> > >
> > > > 		msleep(1);
> > >
> > > Why? Just set it to 5 ms. In anyway please see the next message.
> > >
> > > > 	}
> > > > ---
> > > > --- case 2 ---
> > > > 	for (i = 0; i < SPEED_CHANGE_MAX_RETRIES; i++) {
> > > > 		rcar_gen4_pcie_speed_change(dw);
> > > > 		msleep(1);
> > > > 		if (dw_pcie_link_up(dw)) {
> > > > 			printk("%s:%d\n", __func__, i); // will be removed
> > > > 			return 0;
> > > > 		}
> > > > 	}
> > > > ---
> > > >
> > > > So, I'll use case 1 for it.
> > >
> > > Ah. I think I get it now. Your spreadsheet:
> > >
> > >                 LinkCap max-link-speed  loop
> > > Device A           4          4           1
> > > Device A           4          3           1
> > > Device B           3          3           0
> > >
> > > actually meant (loop+1) iterations. So in case of Gen4 you'll need
> > > three speed changes (one already enabled in the dw_pcie_setup_rc()
> > > method and another two ones are performed in your loop). Similarly in
> > > case of Gen3 you'll need only one iteration. I bet you won't need to
> > > call rcar_gen4_pcie_speed_change() at all if gen2 needs to be trained.
> > > Could you try it out?
> > >
> > > Anyway based on what you discovered and on my experience working with
> > > that controller, there should be as many
> > > GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE flag changes as the target speed
> > > value, i.e. no flag switch if Gen1 is required, one flag switch if
> > > Gen2 is required and so on. Although I failed to find any explicit
> > > statement about that in the HW-manual.
> > >
> > > In addition to the above I've found out that
> > > GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE field is actually self cleared when
> > > the speed change occurs (see the register description in the HW
> > > reference manual). We can use it to implement the
> > > dw_pcie_link_up()-independent link training algorithm like this:
> > >
> > > +#define RCAR_RETRAIN_MAX_CHECK		10
> > > +#define RCAR_LINK_SPEED_MAX		4
> > > +
> > > +static bool rcar_gen4_pcie_speed_change(struct dw_pcie *dw)
> > > +{
> > > +	u32 val;
> > > +	int i;
> > > +
> > > +	val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
> > > +	val &= ~PORT_LOGIC_SPEED_CHANGE;
> > > +	dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> > > +
> > > +	val |= PORT_LOGIC_SPEED_CHANGE;
> > > +	dw_pcie_writel_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> > > +
> > > +	for (i = 0; i < RCAR_SPEED_CHANGE_WAIT_RETRIES; i++) {
> > > +		val = dw_pcie_readl_dbi(dw, PCIE_LINK_WIDTH_SPEED_CONTROL);
> > > +		if (!(val & PORT_LOGIC_SPEED_CHANGE))
> > > +			return true;
> > > +
> > > +		msleep(1);
> > > +	}
> > > +
> > > +	return false;
> > > +}
> > > +
> > > +static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
> > > +{
> > > +	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
> > > +	int i, changes;
> > > +
> > > +	rcar_gen4_pcie_ltssm_enable(rcar, true);
> > > +
> > 
> > > +	changes = min_not_zero(dw->link_gen, RCAR_LINK_SPEED_MAX);
> > 
> > This should have been:
> > +changes = min_not_zero(dw->link_gen, RCAR_LINK_SPEED_MAX) - 1;
> > because Gen1 doesn't need any speed change action.
> > 
> > But this part can be further improved. Instead of the code above the
> > next snipped can be implemented:
> > 
> > +changes = min_not_zero(dw->link_gen, RCAR_LINK_SPEED_MAX) - 1;
> > +if (changes && rcar->mode == DW_PCIE_RC_TYPE)
> > +		changes -= 1;
> > 
> > It takes into account that the GEN2_CTRL_OFF.DIRECT_SPEED_CHANGE
> > flag is already set in the dw_pcie_setup_rc() method. So Gen2 will be
> > trained with no need in addition actions. If it's supported of course.
> 

> Thank you very much for your comments and suggestion! The suggestion code
> works correctly on my environment.

Awesome! Glad we've finally settled this.

-Serge(y)

> 
> Best regards,
> Yoshihiro Shimoda
> 
> P.S.
> So, I'm investigating endpoint mode issues which you commented now.
> 
> > -Serge(y)
> > 
> > > +	for (i = 0; i < changes; ++i) {
> > > +		if (!rcar_gen4_pcie_speed_change(dw))
> > > +			break;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > >
> > > Note 1. The actual link state will be checked in the framework of the
> > > dw_pcie_wait_for_link() function, by means of dw_pcie_link_up().
> > >
> > > Note 2. RCAR_LINK_SPEED_MAX is deliberately set to 4 because DW PCIe
> > > EP core driver doesn't set the PORT_LOGIC_SPEED_CHANGE flag. In case
> > > of the DW PCIe Root Port at most 3 iterations should be enough.
> > >
> > > Note 3. Please use the RCAR_ prefix for the vendor-specific macros.
> > > It concerns the entire series.
> > >
> > > Could you try out the code suggested above?
> > >
> > > -Serge(y)
> > >
> > > >
> > > > > You don't need to wait for the link to actually get up in the
> > > > > start_link() callback because there is the link_up() callback, which
> > > > > is called from the dw_pcie_wait_for_link() method during the generic
> > > > > DWC PCIe setup procedure. See:
> > > >
> > > > Since the procedure will call rcar_gen4_pcie_speed_change() from
> > > > ->start_link() once, my environment cannot work correctly...
> > > >
> > > > Best regards,
> > > > Yoshihiro Shimoda
> > > >
> > > > > dw_pcie_host_init():
> > > > > +-> ops->host_init()
> > > > > +-> ...
> > > > > +-> dw_pcie_setup_rc()
> > > > > |   +-> ...
> > > > > |   +-> dw_pcie_setup()
> > > > > |   +-> ...
> > > > > +-> if !dw_pcie_link_up()
> > > > > |   |   +-> ops->link_up()
> > > > > |   +-> dw_pcie_start_link()
> > > > > |       +-> ops->start_link()
> > > > > +-> dw_pcie_wait_for_link();   // See, wait-procedure is already performed
> > > > > |   +-> loop 10 times          // for you in the core driver together
> > > > > |       +-> dw_pcie_link_up()  // with the delays between the checks
> > > > > |           +-> ops->link_up()
> > > > > |       +-> msleep(~100)
> > > > > +-> ...
> > > > >
> > > > > -Serge(y)
> > > > >
> > > > > >
> > > > > > Best regards,
> > > > > > Yoshihiro Shimoda
> > > > > >
> > > > > > > -Serge(y)
> > > > > >
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
index ab96da43e0c2..64d4d37bc891 100644
--- a/drivers/pci/controller/dwc/Kconfig
+++ b/drivers/pci/controller/dwc/Kconfig
@@ -415,4 +415,13 @@  config PCIE_VISCONTI_HOST
 	  Say Y here if you want PCIe controller support on Toshiba Visconti SoC.
 	  This driver supports TMPV7708 SoC.
 
+config PCIE_RCAR_GEN4
+	tristate "Renesas R-Car Gen4 PCIe Host controller"
+	depends on ARCH_RENESAS || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	help
+	  Say Y here if you want PCIe host controller support on R-Car Gen4 SoCs.
+	  This uses the DesignWare core.
+
 endmenu
diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
index bf5c311875a1..486cf706b53d 100644
--- a/drivers/pci/controller/dwc/Makefile
+++ b/drivers/pci/controller/dwc/Makefile
@@ -26,6 +26,8 @@  obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
 obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
 obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
 obj-$(CONFIG_PCIE_VISCONTI_HOST) += pcie-visconti.o
+pcie-rcar-gen4-host-drv-objs := pcie-rcar-gen4.o pcie-rcar-gen4-host.o
+obj-$(CONFIG_PCIE_RCAR_GEN4) += pcie-rcar-gen4-host-drv.o
 
 # The following drivers are for devices that use the generic ACPI
 # pci_root.c driver but don't support standard ECAM config access.
diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
new file mode 100644
index 000000000000..df7d80f1874f
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-rcar-gen4-host.c
@@ -0,0 +1,141 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PCIe host controller driver for Renesas R-Car Gen4 Series SoCs
+ * Copyright (C) 2022-2023 Renesas Electronics Corporation
+ */
+
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+#include "pcie-rcar-gen4.h"
+#include "pcie-designware.h"
+
+static int rcar_gen4_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *dw = to_dw_pcie_from_pp(pp);
+	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
+	int ret;
+	u32 val;
+
+	gpiod_set_value_cansleep(dw->pe_rst, 1);
+
+	ret = rcar_gen4_pcie_set_device_type(rcar, true, dw->num_lanes);
+	if (ret < 0)
+		return ret;
+
+	dw_pcie_dbi_ro_wr_en(dw);
+
+	/*
+	 * According to the section 3.5.7.2 "RC Mode" in DWC PCIe Dual Mode
+	 * Rev.5.20a, we should disable two BARs to avoid unnecessary memory
+	 * assignment during device enumeration.
+	 */
+	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_0, 0x0);
+	dw_pcie_writel_dbi2(dw, PCI_BASE_ADDRESS_1, 0x0);
+
+	dw_pcie_dbi_ro_wr_dis(dw);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		/* Enable MSI interrupt signal */
+		val = readl(rcar->base + PCIEINTSTS0EN);
+		val |= MSI_CTRL_INT;
+		writel(val, rcar->base + PCIEINTSTS0EN);
+	}
+
+	msleep(100);	/* pe_rst requires 100msec delay */
+
+	gpiod_set_value_cansleep(dw->pe_rst, 0);
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops rcar_gen4_pcie_host_ops = {
+	.host_init = rcar_gen4_pcie_host_init,
+};
+
+static int rcar_gen4_add_dw_pcie_rp(struct rcar_gen4_pcie *rcar,
+				   struct platform_device *pdev)
+{
+	struct dw_pcie *dw = &rcar->dw;
+	struct dw_pcie_rp *pp = &dw->pp;
+
+	pp->num_vectors = MAX_MSI_IRQS;
+	pp->ops = &rcar_gen4_pcie_host_ops;
+	dw_pcie_cap_set(dw, REQ_RES);
+
+	return dw_pcie_host_init(pp);
+}
+
+static void rcar_gen4_remove_dw_pcie_rp(struct rcar_gen4_pcie *rcar)
+{
+	dw_pcie_host_deinit(&rcar->dw.pp);
+	gpiod_set_value_cansleep(rcar->dw.pe_rst, 1);
+}
+
+static int rcar_gen4_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rcar_gen4_pcie *rcar;
+	int err;
+
+	rcar = rcar_gen4_pcie_devm_alloc(dev);
+	if (!rcar)
+		return -ENOMEM;
+
+	err = rcar_gen4_pcie_get_resources(rcar, pdev);
+	if (err < 0) {
+		dev_err(dev, "Failed to request resource: %d\n", err);
+		return err;
+	}
+
+	platform_set_drvdata(pdev, rcar);
+
+	err = rcar_gen4_pcie_prepare(rcar);
+	if (err < 0)
+		return err;
+
+	rcar->needs_retrain = true;
+	err = rcar_gen4_add_dw_pcie_rp(rcar, pdev);
+	if (err < 0)
+		goto err_add;
+
+	return 0;
+
+err_add:
+	rcar_gen4_pcie_unprepare(rcar);
+
+	return err;
+}
+
+static int rcar_gen4_pcie_remove(struct platform_device *pdev)
+{
+	struct rcar_gen4_pcie *rcar = platform_get_drvdata(pdev);
+
+	rcar_gen4_remove_dw_pcie_rp(rcar);
+	rcar_gen4_pcie_unprepare(rcar);
+
+	return 0;
+}
+
+static const struct of_device_id rcar_gen4_pcie_of_match[] = {
+	{ .compatible = "renesas,rcar-gen4-pcie", },
+	{},
+};
+
+static struct platform_driver rcar_gen4_pcie_driver = {
+	.driver = {
+		.name = "pcie-rcar-gen4",
+		.of_match_table = rcar_gen4_pcie_of_match,
+		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
+	},
+	.probe = rcar_gen4_pcie_probe,
+	.remove = rcar_gen4_pcie_remove,
+};
+module_platform_driver(rcar_gen4_pcie_driver);
+
+MODULE_DESCRIPTION("Renesas R-Car Gen4 PCIe host controller driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.c b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
new file mode 100644
index 000000000000..35923fda8ed5
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.c
@@ -0,0 +1,190 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
+ * Copyright (C) 2022-2023 Renesas Electronics Corporation
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/of_device.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+
+#include "pcie-rcar-gen4.h"
+#include "pcie-designware.h"
+
+/* Renesas-specific */
+#define PCIERSTCTRL1		0x0014
+#define  APP_HOLD_PHY_RST	BIT(16)
+#define  APP_LTSSM_ENABLE	BIT(0)
+
+#define RETRAIN_MAX_CHECK	10
+#define RETRAIN_MAX_RETRIES	10
+
+static void rcar_gen4_pcie_ltssm_enable(struct rcar_gen4_pcie *rcar,
+					bool enable)
+{
+	u32 val;
+
+	val = readl(rcar->base + PCIERSTCTRL1);
+	if (enable) {
+		val |= APP_LTSSM_ENABLE;
+		val &= ~APP_HOLD_PHY_RST;
+	} else {
+		val &= ~APP_LTSSM_ENABLE;
+		val |= APP_HOLD_PHY_RST;
+	}
+	writel(val, rcar->base + PCIERSTCTRL1);
+}
+
+static bool rcar_gen4_pcie_check_retrain_link(struct dw_pcie *dw)
+{
+	u8 offset = dw_pcie_find_capability(dw, PCI_CAP_ID_EXP);
+	u32 lnkcap = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCAP);
+	u32 lnkctl = dw_pcie_readl_dbi(dw, offset + PCI_EXP_LNKCTL);
+	u16 lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
+	int i;
+
+	if ((lnksta & PCI_EXP_LNKSTA_CLS) == (lnkcap & PCI_EXP_LNKCAP_SLS))
+		return true;
+
+	lnkctl |= PCI_EXP_LNKCTL_RL;
+	dw_pcie_writel_dbi(dw, offset + PCI_EXP_LNKCTL, lnkctl);
+
+	for (i = 0; i < RETRAIN_MAX_CHECK; i++) {
+		lnksta = dw_pcie_readw_dbi(dw, offset + PCI_EXP_LNKSTA);
+		if (lnksta & PCI_EXP_LNKSTA_LT)
+			return true;
+		usleep_range(1000, 1100);
+	}
+
+	return false;
+}
+
+static int rcar_gen4_pcie_link_up(struct dw_pcie *dw)
+{
+	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
+	u32 val, mask;
+
+	val = readl(rcar->base + PCIEINTSTS0);
+	mask = RDLH_LINK_UP | SMLH_LINK_UP;
+
+	return (val & mask) == mask;
+}
+
+static int rcar_gen4_pcie_start_link(struct dw_pcie *dw)
+{
+	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
+	int i;
+
+	rcar_gen4_pcie_ltssm_enable(rcar, true);
+
+	/*
+	 * Require retraining here. Otherwise RDLH_LINK_UP of PCIEINTSTS0 which
+	 * is this controller specific register may not be set.
+	 */
+	if (rcar->needs_retrain) {
+		for (i = 0; i < RETRAIN_MAX_RETRIES; i++) {
+			if (rcar_gen4_pcie_check_retrain_link(dw))
+				return 0;
+			msleep(100);
+		}
+
+		return -ETIMEDOUT;	/* Failed */
+	}
+
+	return 0;
+}
+
+static void rcar_gen4_pcie_stop_link(struct dw_pcie *dw)
+{
+	struct rcar_gen4_pcie *rcar = to_rcar_gen4_pcie(dw);
+
+	rcar_gen4_pcie_ltssm_enable(rcar, false);
+}
+
+int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
+				   int num_lanes)
+{
+	u32 val;
+
+	/* Note: Assume the rcar->rst which is Cold-reset is asserted here */
+	val = readl(rcar->base + PCIEMSR0);
+	if (rc)
+		val |= DEVICE_TYPE_RC;
+	else
+		val |= DEVICE_TYPE_EP;
+
+	if (num_lanes < 4)
+		val |= BIFUR_MOD_SET_ON;
+
+	writel(val, rcar->base + PCIEMSR0);
+
+	return reset_control_deassert(rcar->rst);
+}
+
+int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *rcar)
+{
+	struct device *dev = rcar->dw.dev;
+	int err;
+
+	pm_runtime_enable(dev);
+	err = pm_runtime_resume_and_get(dev);
+	if (err < 0) {
+		dev_err(dev, "Failed to resume/get Runtime PM\n");
+		pm_runtime_disable(dev);
+	}
+
+	return err;
+}
+
+void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *rcar)
+{
+	struct device *dev = rcar->dw.dev;
+
+	if (!reset_control_status(rcar->rst))
+		reset_control_assert(rcar->rst);
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
+}
+
+int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
+				 struct platform_device *pdev)
+{
+	struct device *dev = rcar->dw.dev;
+
+	/* Renesas-specific registers */
+	rcar->base = devm_platform_ioremap_resource_byname(pdev, "app");
+	if (IS_ERR(rcar->base))
+		return PTR_ERR(rcar->base);
+
+	rcar->rst = devm_reset_control_get(dev, NULL);
+	if (IS_ERR(rcar->rst)) {
+		dev_err(dev, "Failed to get Cold-reset\n");
+		return PTR_ERR(rcar->rst);
+	}
+
+	return 0;
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.start_link = rcar_gen4_pcie_start_link,
+	.stop_link = rcar_gen4_pcie_stop_link,
+	.link_up = rcar_gen4_pcie_link_up,
+};
+
+struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev)
+{
+	struct rcar_gen4_pcie *rcar;
+
+	rcar = devm_kzalloc(dev, sizeof(*rcar), GFP_KERNEL);
+	if (!rcar)
+		return NULL;
+
+	rcar->dw.dev = dev;
+	rcar->dw.ops = &dw_pcie_ops;
+	dw_pcie_cap_set(&rcar->dw, EDMA_UNROLL);
+
+	return rcar;
+}
diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.h b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
new file mode 100644
index 000000000000..fec3f18609f4
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.h
@@ -0,0 +1,46 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * PCIe host/endpoint controller driver for Renesas R-Car Gen4 Series SoCs
+ * Copyright (C) 2022-2023 Renesas Electronics Corporation
+ */
+
+#ifndef _PCIE_RCAR_GEN4_H_
+#define _PCIE_RCAR_GEN4_H_
+
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/reset.h>
+
+#include "pcie-designware.h"
+
+/* Renesas-specific */
+#define PCIEMSR0		0x0000
+#define  BIFUR_MOD_SET_ON	BIT(0)
+#define  DEVICE_TYPE_EP		0
+#define  DEVICE_TYPE_RC		BIT(4)
+
+#define PCIEINTSTS0		0x0084
+#define PCIEINTSTS0EN		0x0310
+#define  MSI_CTRL_INT		BIT(26)
+#define  SMLH_LINK_UP		BIT(7)
+#define  RDLH_LINK_UP		BIT(6)
+#define PCIEDMAINTSTSEN		0x0314
+#define  PCIEDMAINTSTSEN_INIT	GENMASK(15, 0)
+
+struct rcar_gen4_pcie {
+	struct dw_pcie		dw;
+	void __iomem		*base;
+	struct reset_control	*rst;
+	bool			needs_retrain;
+};
+#define to_rcar_gen4_pcie(x)	dev_get_drvdata((x)->dev)
+
+int rcar_gen4_pcie_set_device_type(struct rcar_gen4_pcie *rcar, bool rc,
+				   int num_lanes);
+int rcar_gen4_pcie_prepare(struct rcar_gen4_pcie *pcie);
+void rcar_gen4_pcie_unprepare(struct rcar_gen4_pcie *pcie);
+int rcar_gen4_pcie_get_resources(struct rcar_gen4_pcie *rcar,
+				 struct platform_device *pdev);
+struct rcar_gen4_pcie *rcar_gen4_pcie_devm_alloc(struct device *dev);
+
+#endif /* _PCIE_RCAR_GEN4_H_ */