diff mbox series

[09/10] PCI: tegra: Add Tegra194 PCIe support

Message ID 1553613207-3988-10-git-send-email-vidyas@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Add Tegra194 PCIe support | expand

Commit Message

Vidya Sagar March 26, 2019, 3:13 p.m. UTC
Add support for Synopsys DesignWare core IP based PCIe host controller
present in Tegra194 SoC.

Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
---
 drivers/pci/controller/dwc/Kconfig         |   10 +
 drivers/pci/controller/dwc/Makefile        |    1 +
 drivers/pci/controller/dwc/pcie-tegra194.c | 1862 ++++++++++++++++++++++++++++
 3 files changed, 1873 insertions(+)
 create mode 100644 drivers/pci/controller/dwc/pcie-tegra194.c

Comments

Jon Hunter March 27, 2019, 10:07 a.m. UTC | #1
On 26/03/2019 15:13, Vidya Sagar wrote:
> Add support for Synopsys DesignWare core IP based PCIe host controller
> present in Tegra194 SoC.
> 
> Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
> ---
>  drivers/pci/controller/dwc/Kconfig         |   10 +
>  drivers/pci/controller/dwc/Makefile        |    1 +
>  drivers/pci/controller/dwc/pcie-tegra194.c | 1862 ++++++++++++++++++++++++++++
>  3 files changed, 1873 insertions(+)
>  create mode 100644 drivers/pci/controller/dwc/pcie-tegra194.c
> 
> diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
> index 6ea74b1c0d94..d80f2d77892a 100644
> --- a/drivers/pci/controller/dwc/Kconfig
> +++ b/drivers/pci/controller/dwc/Kconfig
> @@ -213,4 +213,14 @@ config PCIE_UNIPHIER
>  	  Say Y here if you want PCIe controller support on UniPhier SoCs.
>  	  This driver supports LD20 and PXs3 SoCs.
>  
> +config PCIE_TEGRA194
> +	bool "NVIDIA Tegra (T194) PCIe controller"
> +	depends on TEGRA_BPMP && (ARCH_TEGRA || COMPILE_TEST)
> +	depends on PCI_MSI_IRQ_DOMAIN
> +	select PCIE_DW_HOST
> +	select PHY_TEGRA194_PCIE_P2U
> +	help
> +	  Say Y here if you want support for DesignWare core based PCIe host
> +	  controller found in NVIDIA Tegra T194 SoC.
> +

Don't we want tristate here? You have a removal function.

Cheers
Jon
Bjorn Helgaas March 29, 2019, 8:52 p.m. UTC | #2
Hi Vidya,

Wow, there's a lot of nice work here!  Thanks for that!

On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
> Add support for Synopsys DesignWare core IP based PCIe host controller
> present in Tegra194 SoC.

General comments:

  - There are a few multi-line comments that don't match the
    prevailing style:

        /*
	 * Text...
	 */

  - Comments and dev_info()/dev_err() messages are inconsistent about
    starting with upper-case or lower-case letters.

  - Use "MSI", "IRQ", "PCIe", "CPU", etc in comments and messages.

  - There are a few functions that use "&pdev->dev" many times; can
    you add a "struct device *dev = &pdev->dev" to reduce the
    redundancy?

> +#include "../../pcie/portdrv.h"

What's this for?  I didn't see any obvious uses of things from
portdrv.h, but I didn't actually try to build without it.

> +struct tegra_pcie_dw {
> +	struct device		*dev;
> +	struct resource		*appl_res;
> +	struct resource		*dbi_res;
> +	struct resource		*atu_dma_res;
> +	void __iomem		*appl_base;
> +	struct clk		*core_clk;
> +	struct reset_control	*core_apb_rst;
> +	struct reset_control	*core_rst;
> +	struct dw_pcie		pci;
> +	enum dw_pcie_device_mode mode;
> +
> +	bool disable_clock_request;
> +	bool power_down_en;
> +	u8 init_link_width;
> +	bool link_state;
> +	u32 msi_ctrl_int;
> +	u32 num_lanes;
> +	u32 max_speed;
> +	u32 init_speed;
> +	bool cdm_check;
> +	u32 cid;
> +	int pex_wake;
> +	bool update_fc_fixup;
> +	int n_gpios;
> +	int *gpios;
> +#if defined(CONFIG_PCIEASPM)
> +	u32 cfg_link_cap_l1sub;
> +	u32 event_cntr_ctrl;
> +	u32 event_cntr_data;
> +	u32 aspm_cmrt;
> +	u32 aspm_pwr_on_t;
> +	u32 aspm_l0s_enter_lat;
> +	u32 disabled_aspm_states;
> +#endif

The above could be indented the same as the rest of the struct?

> +	struct regulator	*pex_ctl_reg;
> +
> +	int			phy_count;
> +	struct phy		**phy;
> +
> +	struct dentry		*debugfs;
> +};

> +static void apply_bad_link_workaround(struct pcie_port *pp)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
> +	u16 val;
> +
> +	/*
> +	 * NOTE:- Since this scenario is uncommon and link as
> +	 * such is not stable anyway, not waiting to confirm
> +	 * if link is really transiting to Gen-2 speed

s/transiting/transitioning/

I think there are other uses of "transit" when you mean "transition".

> +static int tegra_pcie_dw_rd_own_conf(struct pcie_port *pp, int where, int size,
> +				     u32 *val)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +
> +	/*
> +	 * This is an endpoint mode specific register happen to appear even
> +	 * when controller is operating in root port mode and system hangs
> +	 * when it is accessed with link being in ASPM-L1 state.
> +	 * So skip accessing it altogether
> +	 */
> +	if (where == PORT_LOGIC_MSIX_DOORBELL) {
> +		*val = 0x00000000;
> +		return PCIBIOS_SUCCESSFUL;
> +	} else {
> +		return dw_pcie_read(pci->dbi_base + where, size, val);
> +	}
> +}
> +
> +static int tegra_pcie_dw_wr_own_conf(struct pcie_port *pp, int where, int size,
> +				     u32 val)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +
> +	/* This is EP specific register and system hangs when it is
> +	 * accessed with link being in ASPM-L1 state.
> +	 * So skip accessing it altogether
> +	 */
> +	if (where == PORT_LOGIC_MSIX_DOORBELL)
> +		return PCIBIOS_SUCCESSFUL;
> +	else
> +		return dw_pcie_write(pci->dbi_base + where, size, val);

These two functions are almost identical and they could look more
similar.  This one has the wrong multi-line comment style, uses "EP"
instead of "endpoint", etc.  Use this style for the "if" since the
first case is really an error case:

  if (where == PORT_LOGIC_MSIX_DOORBELL) {
    ...
    return ...;
  }

  return dw_pcie_...();

> +static int tegra_pcie_dw_host_init(struct pcie_port *pp)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
> +	int count = 200;
> +	u32 val, tmp, offset;
> +	u16 val_w;
> +
> +#if defined(CONFIG_PCIEASPM)
> +	pcie->cfg_link_cap_l1sub =
> +		dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_L1SS) +
> +		PCI_L1SS_CAP;
> +#endif
> +	val = dw_pcie_readl_dbi(pci, PCI_IO_BASE);
> +	val &= ~(IO_BASE_IO_DECODE | IO_BASE_IO_DECODE_BIT8);
> +	dw_pcie_writel_dbi(pci, PCI_IO_BASE, val);
> +
> +	val = dw_pcie_readl_dbi(pci, PCI_PREF_MEMORY_BASE);
> +	val |= CFG_PREF_MEM_LIMIT_BASE_MEM_DECODE;
> +	val |= CFG_PREF_MEM_LIMIT_BASE_MEM_LIMIT_DECODE;
> +	dw_pcie_writel_dbi(pci, PCI_PREF_MEMORY_BASE, val);
> +
> +	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0);
> +
> +	/* Configure FTS */
> +	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL);
> +	val &= ~(N_FTS_MASK << N_FTS_SHIFT);
> +	val |= N_FTS_VAL << N_FTS_SHIFT;
> +	dw_pcie_writel_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL, val);
> +
> +	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_GEN2_CTRL);
> +	val &= ~FTS_MASK;
> +	val |= FTS_VAL;
> +	dw_pcie_writel_dbi(pci, PORT_LOGIC_GEN2_CTRL, val);
> +
> +	/* Enable as 0xFFFF0001 response for CRS */
> +	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT);
> +	val &= ~(AMBA_ERROR_RESPONSE_CRS_MASK << AMBA_ERROR_RESPONSE_CRS_SHIFT);
> +	val |= (AMBA_ERROR_RESPONSE_CRS_OKAY_FFFF0001 <<
> +		AMBA_ERROR_RESPONSE_CRS_SHIFT);
> +	dw_pcie_writel_dbi(pci, PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT, val);
> +
> +	/* Set MPS to 256 in DEV_CTL */
> +	val = dw_pcie_readl_dbi(pci, CFG_DEV_STATUS_CONTROL);
> +	val &= ~PCI_EXP_DEVCTL_PAYLOAD;
> +	val |= (1 << CFG_DEV_STATUS_CONTROL_MPS_SHIFT);
> +	dw_pcie_writel_dbi(pci, CFG_DEV_STATUS_CONTROL, val);
> +
> +	/* Configure Max Speed from DT */
> +	val = dw_pcie_readl_dbi(pci, CFG_LINK_CAP);
> +	val &= ~PCI_EXP_LNKCAP_SLS;
> +	val |= pcie->max_speed;
> +	dw_pcie_writel_dbi(pci, CFG_LINK_CAP, val);
> +
> +	val = dw_pcie_readw_dbi(pci, CFG_LINK_CONTROL_2);
> +	val &= ~PCI_EXP_LNKCTL2_TLS;
> +	val |= pcie->init_speed;
> +	dw_pcie_writew_dbi(pci, CFG_LINK_CONTROL_2, val);
> +
> +	/* Configure Max lane width from DT */
> +	val = dw_pcie_readl_dbi(pci, CFG_LINK_CAP);
> +	val &= ~PCI_EXP_LNKCAP_MLW;
> +	val |= (pcie->num_lanes << PCI_EXP_LNKSTA_NLW_SHIFT);
> +	dw_pcie_writel_dbi(pci, CFG_LINK_CAP, val);
> +
> +	config_gen3_gen4_eq_presets(pcie);
> +
> +#if defined(CONFIG_PCIEASPM)
> +	/* Enable ASPM counters */
> +	val = EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
> +	val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
> +	dw_pcie_writel_dbi(pci, pcie->event_cntr_ctrl, val);
> +
> +	/* Program T_cmrt and T_pwr_on values */
> +	val = dw_pcie_readl_dbi(pci, pcie->cfg_link_cap_l1sub);
> +	val &= ~(PCI_L1SS_CAP_CM_RESTORE_TIME | PCI_L1SS_CAP_P_PWR_ON_VALUE);
> +	val |= (pcie->aspm_cmrt << PCI_L1SS_CAP_CM_RTM_SHIFT);
> +	val |= (pcie->aspm_pwr_on_t << PCI_L1SS_CAP_PWRN_VAL_SHIFT);
> +	dw_pcie_writel_dbi(pci, pcie->cfg_link_cap_l1sub, val);
> +
> +	/* Program L0s and L1 entrance latencies */
> +	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL);
> +	val &= ~L0S_ENTRANCE_LAT_MASK;
> +	val |= (pcie->aspm_l0s_enter_lat << L0S_ENTRANCE_LAT_SHIFT);
> +	val |= ENTER_ASPM;
> +	dw_pcie_writel_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL, val);
> +
> +	/* Program what ASPM states sould get advertised */

s/sould/should/

> +	if (pcie->disabled_aspm_states & 0x1)
> +		disable_aspm_l0s(pcie); /* Disable L0s */
> +	if (pcie->disabled_aspm_states & 0x2) {
> +		disable_aspm_l10(pcie); /* Disable L1 */
> +		disable_aspm_l11(pcie); /* Disable L1.1 */
> +		disable_aspm_l12(pcie); /* Disable L1.2 */
> +	}
> +	if (pcie->disabled_aspm_states & 0x4)
> +		disable_aspm_l11(pcie); /* Disable L1.1 */
> +	if (pcie->disabled_aspm_states & 0x8)
> +		disable_aspm_l12(pcie); /* Disable L1.2 */
> +#endif
> +	val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
> +	val &= ~GEN3_RELATED_OFF_GEN3_ZRXDC_NONCOMPL;
> +	dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
> +
> +	if (pcie->update_fc_fixup) {
> +		val = dw_pcie_readl_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF);
> +		val |= 0x1 << CFG_TIMER_CTRL_ACK_NAK_SHIFT;
> +		dw_pcie_writel_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF, val);
> +	}
> +
> +	/* CDM check enable */
> +	if (pcie->cdm_check) {
> +		val = dw_pcie_readl_dbi(pci,
> +					PORT_LOGIC_PL_CHK_REG_CONTROL_STATUS);
> +		val |= PORT_LOGIC_PL_CHK_REG_CHK_REG_CONTINUOUS;
> +		val |= PORT_LOGIC_PL_CHK_REG_CHK_REG_START;
> +		dw_pcie_writel_dbi(pci, PORT_LOGIC_PL_CHK_REG_CONTROL_STATUS,
> +				   val);
> +	}
> +
> +	dw_pcie_setup_rc(pp);
> +
> +	clk_set_rate(pcie->core_clk, GEN4_CORE_CLK_FREQ);
> +
> +	/* assert RST */
> +	val = readl(pcie->appl_base + APPL_PINMUX);
> +	val &= ~APPL_PINMUX_PEX_RST;
> +	writel(val, pcie->appl_base + APPL_PINMUX);
> +
> +	usleep_range(100, 200);
> +
> +	/* enable LTSSM */
> +	val = readl(pcie->appl_base + APPL_CTRL);
> +	val |= APPL_CTRL_LTSSM_EN;
> +	writel(val, pcie->appl_base + APPL_CTRL);
> +
> +	/* de-assert RST */
> +	val = readl(pcie->appl_base + APPL_PINMUX);
> +	val |= APPL_PINMUX_PEX_RST;
> +	writel(val, pcie->appl_base + APPL_PINMUX);
> +
> +	msleep(100);
> +
> +	val_w = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
> +	while (!(val_w & PCI_EXP_LNKSTA_DLLLA)) {
> +		if (!count) {
> +			val = readl(pcie->appl_base + APPL_DEBUG);
> +			val &= APPL_DEBUG_LTSSM_STATE_MASK;
> +			val >>= APPL_DEBUG_LTSSM_STATE_SHIFT;
> +			tmp = readl(pcie->appl_base + APPL_LINK_STATUS);
> +			tmp &= APPL_LINK_STATUS_RDLH_LINK_UP;
> +			if (val == 0x11 && !tmp) {
> +				dev_info(pci->dev, "link is down in DLL");
> +				dev_info(pci->dev,
> +					 "trying again with DLFE disabled\n");
> +				/* disable LTSSM */
> +				val = readl(pcie->appl_base + APPL_CTRL);
> +				val &= ~APPL_CTRL_LTSSM_EN;
> +				writel(val, pcie->appl_base + APPL_CTRL);
> +
> +				reset_control_assert(pcie->core_rst);
> +				reset_control_deassert(pcie->core_rst);
> +
> +				offset =
> +				dw_pcie_find_ext_capability(pci,
> +							    PCI_EXT_CAP_ID_DLF)
> +				+ PCI_DLF_CAP;

This capability offset doesn't change, does it?  Could it be computed
outside the loop?

> +				val = dw_pcie_readl_dbi(pci, offset);
> +				val &= ~DL_FEATURE_EXCHANGE_EN;
> +				dw_pcie_writel_dbi(pci, offset, val);
> +
> +				tegra_pcie_dw_host_init(&pcie->pci.pp);

This looks like some sort of "wait for link up" retry loop, but a
recursive call seems a little unusual.  My 5 second analysis is that
the loop could run this 200 times, and you sure don't want the
possibility of a 200-deep call chain.  Is there way to split out the
host init from the link-up polling?

> +				return 0;
> +			}
> +			dev_info(pci->dev, "link is down\n");
> +			return 0;
> +		}
> +		dev_dbg(pci->dev, "polling for link up\n");
> +		usleep_range(1000, 2000);
> +		val_w = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
> +		count--;
> +	}
> +	dev_info(pci->dev, "link is up\n");
> +
> +	tegra_pcie_enable_interrupts(pp);
> +
> +	return 0;
> +}

> +static void tegra_pcie_dw_scan_bus(struct pcie_port *pp)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
> +	u32 speed;
> +
> +	if (!tegra_pcie_dw_link_up(pci))
> +		return;
> +
> +	speed = (dw_pcie_readw_dbi(pci, CFG_LINK_STATUS) & PCI_EXP_LNKSTA_CLS);
> +	clk_set_rate(pcie->core_clk, pcie_gen_freq[speed - 1]);

I don't understand what's happening here.  This is named
tegra_pcie_dw_scan_bus(), but it doesn't actually scan anything.
Maybe it's just a bad name for the dw_pcie_host_ops hook
(ks_pcie_v3_65_scan_bus() is the only other .scan_bus()
implementation, and it doesn't scan anything either).

dw_pcie_host_init() calls pci_scan_root_bus_bridge(), which actually
*does* scan the bus, but it does it before calling
pp->ops->scan_bus().  I'd say by the time we get to
pci_scan_root_bus_bridge(), the device-specific init should be all
done and we should be using only generic PCI core interfaces.

Maybe this stuff could/should be done in the ->host_init() hook?  The
code between ->host_init() and ->scan_bus() is all generic code with
no device-specific stuff, so I don't know why we need both hooks.

> +static int tegra_pcie_enable_phy(struct tegra_pcie_dw *pcie)
> +{
> +	int phy_count = pcie->phy_count;
> +	int ret;
> +	int i;
> +
> +	for (i = 0; i < phy_count; i++) {
> +		ret = phy_init(pcie->phy[i]);
> +		if (ret < 0)
> +			goto err_phy_init;
> +
> +		ret = phy_power_on(pcie->phy[i]);
> +		if (ret < 0) {
> +			phy_exit(pcie->phy[i]);
> +			goto err_phy_power_on;
> +		}
> +	}
> +
> +	return 0;
> +
> +	while (i >= 0) {
> +		phy_power_off(pcie->phy[i]);
> +err_phy_power_on:
> +		phy_exit(pcie->phy[i]);
> +err_phy_init:
> +		i--;
> +	}

Wow, jumping into the middle of that loop is clever ;)  Can't decide
what I think of it, but it's certainly clever!

> +	return ret;
> +}
> +
> +static int tegra_pcie_dw_parse_dt(struct tegra_pcie_dw *pcie)
> +{
> +	struct device_node *np = pcie->dev->of_node;
> +	int ret;
> +
> +#if defined(CONFIG_PCIEASPM)
> +	ret = of_property_read_u32(np, "nvidia,event-cntr-ctrl",
> +				   &pcie->event_cntr_ctrl);
> +	if (ret < 0) {
> +		dev_err(pcie->dev, "fail to read event-cntr-ctrl: %d\n", ret);
> +		return ret;
> +	}

The fact that you return error here if DT lacks the
"nvidia,event-cntr-ctrl" property, but only if CONFIG_PCIEASPM=y,
means that you have a revlock between the DT and the kernel: if you
update the kernel to enable CONFIG_PCIEASPM, you may also have to
update your DT.

Maybe that's OK, but I think it'd be nicer if you always required the
presence of these properties, even if you only actually *use* them
when CONFIG_PCIEASPM=y.

> +static int tegra_pcie_dw_probe(struct platform_device *pdev)
> +{
> +	struct tegra_pcie_dw *pcie;
> +	struct pcie_port *pp;
> +	struct dw_pcie *pci;
> +	struct phy **phy;
> +	struct resource	*dbi_res;
> +	struct resource	*atu_dma_res;
> +	const struct of_device_id *match;
> +	const struct tegra_pcie_of_data *data;
> +	char *name;
> +	int ret, i;
> +
> +	pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
> +	if (!pcie)
> +		return -ENOMEM;
> +
> +	pci = &pcie->pci;
> +	pci->dev = &pdev->dev;
> +	pci->ops = &tegra_dw_pcie_ops;
> +	pp = &pci->pp;
> +	pcie->dev = &pdev->dev;
> +
> +	match = of_match_device(of_match_ptr(tegra_pcie_dw_of_match),
> +				&pdev->dev);
> +	if (!match)
> +		return -EINVAL;

Logically could be the first thing in the function since it doesn't
depend on anything.

> +	data = (struct tegra_pcie_of_data *)match->data;
> +	pcie->mode = (enum dw_pcie_device_mode)data->mode;
> +
> +	ret = tegra_pcie_dw_parse_dt(pcie);
> +	if (ret < 0) {
> +		dev_err(pcie->dev, "device tree parsing failed: %d\n", ret);
> +		return ret;
> +	}
> +
> +	if (gpio_is_valid(pcie->pex_wake)) {
> +		ret = devm_gpio_request(pcie->dev, pcie->pex_wake,
> +					"pcie_wake");
> +		if (ret < 0) {
> +			if (ret == -EBUSY) {
> +				dev_err(pcie->dev,
> +					"pex_wake already in use\n");
> +				pcie->pex_wake = -EINVAL;

This looks strange.  "pex_wake == -EINVAL" doesn't look right, and
you're about to pass it to gpio_direction_input(), which looks wrong.

> +			} else {
> +				dev_err(pcie->dev,
> +					"pcie_wake gpio_request failed %d\n",
> +					ret);
> +				return ret;
> +			}
> +		}
> +
> +		ret = gpio_direction_input(pcie->pex_wake);
> +		if (ret < 0) {
> +			dev_err(pcie->dev,
> +				"setting pcie_wake input direction failed %d\n",
> +				ret);
> +			return ret;
> +		}
> +		device_init_wakeup(pcie->dev, true);
> +	}
> +
> +	pcie->pex_ctl_reg = devm_regulator_get(&pdev->dev, "vddio-pex-ctl");
> +	if (IS_ERR(pcie->pex_ctl_reg)) {
> +		dev_err(&pdev->dev, "fail to get regulator: %ld\n",
> +			PTR_ERR(pcie->pex_ctl_reg));
> +		return PTR_ERR(pcie->pex_ctl_reg);
> +	}
> +
> +	pcie->core_clk = devm_clk_get(&pdev->dev, "core_clk");
> +	if (IS_ERR(pcie->core_clk)) {
> +		dev_err(&pdev->dev, "Failed to get core clock\n");
> +		return PTR_ERR(pcie->core_clk);
> +	}
> +
> +	pcie->appl_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
> +						      "appl");
> +	if (!pcie->appl_res) {
> +		dev_err(&pdev->dev, "missing appl space\n");
> +		return PTR_ERR(pcie->appl_res);
> +	}
> +	pcie->appl_base = devm_ioremap_resource(&pdev->dev, pcie->appl_res);
> +	if (IS_ERR(pcie->appl_base)) {
> +		dev_err(&pdev->dev, "mapping appl space failed\n");
> +		return PTR_ERR(pcie->appl_base);
> +	}
> +
> +	pcie->core_apb_rst = devm_reset_control_get(pcie->dev, "core_apb_rst");
> +	if (IS_ERR(pcie->core_apb_rst)) {
> +		dev_err(pcie->dev, "PCIE : core_apb_rst reset is missing\n");

This error message looks different from the others ("PCIE :" prefix).

> +		return PTR_ERR(pcie->core_apb_rst);
> +	}
> +
> +	phy = devm_kcalloc(pcie->dev, pcie->phy_count, sizeof(*phy),
> +			   GFP_KERNEL);
> +	if (!phy)
> +		return PTR_ERR(phy);
> +
> +	for (i = 0; i < pcie->phy_count; i++) {
> +		name = kasprintf(GFP_KERNEL, "pcie-p2u-%u", i);
> +		if (!name) {
> +			dev_err(pcie->dev, "failed to create p2u string\n");
> +			return -ENOMEM;
> +		}
> +		phy[i] = devm_phy_get(pcie->dev, name);
> +		kfree(name);
> +		if (IS_ERR(phy[i])) {
> +			ret = PTR_ERR(phy[i]);
> +			dev_err(pcie->dev, "phy_get error: %d\n", ret);
> +			return ret;
> +		}
> +	}
> +
> +	pcie->phy = phy;
> +
> +	dbi_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
> +	if (!dbi_res) {
> +		dev_err(&pdev->dev, "missing config space\n");
> +		return PTR_ERR(dbi_res);
> +	}
> +	pcie->dbi_res = dbi_res;
> +
> +	pci->dbi_base = devm_ioremap_resource(&pdev->dev, dbi_res);
> +	if (IS_ERR(pci->dbi_base)) {
> +		dev_err(&pdev->dev, "mapping dbi space failed\n");
> +		return PTR_ERR(pci->dbi_base);
> +	}
> +
> +	/* Tegra HW locates DBI2 at a fixed offset from DBI */
> +	pci->dbi_base2 = pci->dbi_base + 0x1000;
> +
> +	atu_dma_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
> +						   "atu_dma");
> +	if (!atu_dma_res) {
> +		dev_err(&pdev->dev, "missing atu_dma space\n");
> +		return PTR_ERR(atu_dma_res);
> +	}
> +	pcie->atu_dma_res = atu_dma_res;
> +	pci->atu_base = devm_ioremap_resource(&pdev->dev, atu_dma_res);
> +	if (IS_ERR(pci->atu_base)) {
> +		dev_err(&pdev->dev, "mapping atu space failed\n");
> +		return PTR_ERR(pci->atu_base);
> +	}
> +
> +	pcie->core_rst = devm_reset_control_get(pcie->dev, "core_rst");
> +	if (IS_ERR(pcie->core_rst)) {
> +		dev_err(pcie->dev, "PCIE : core_rst reset is missing\n");

Different message format again.

> +		return PTR_ERR(pcie->core_rst);
> +	}
> +
> +	pp->irq = platform_get_irq_byname(pdev, "intr");
> +	if (!pp->irq) {
> +		dev_err(pcie->dev, "failed to get intr interrupt\n");
> +		return -ENODEV;
> +	}
> +
> +	ret = devm_request_irq(&pdev->dev, pp->irq, tegra_pcie_irq_handler,
> +			       IRQF_SHARED, "tegra-pcie-intr", pcie);
> +	if (ret) {
> +		dev_err(pcie->dev, "failed to request \"intr\" irq\n");

It'd be nice to include the actual IRQ, i.e., "IRQ %d", pp->irq.

> +		return ret;
> +	}
> +
> +	platform_set_drvdata(pdev, pcie);
> +
> +	if (pcie->mode == DW_PCIE_RC_TYPE) {
> +		ret = tegra_pcie_config_rp(pcie);
> +		if (ret == -ENOMEDIUM)
> +			ret = 0;
> +	}
> +
> +	return ret;
> +}

> +static void tegra_pcie_downstream_dev_to_D0(struct tegra_pcie_dw *pcie)
> +{
> +	struct pci_dev *pdev = NULL;

Unnecessary initialization.

> +	struct pci_bus *child;
> +	struct pcie_port *pp = &pcie->pci.pp;
> +
> +	list_for_each_entry(child, &pp->bus->children, node) {
> +		/* Bring downstream devices to D0 if they are not already in */
> +		if (child->parent == pp->bus) {
> +			pdev = pci_get_slot(child, PCI_DEVFN(0, 0));
> +			pci_dev_put(pdev);
> +			if (!pdev)
> +				break;

I don't really like this dance with iterating over the bus children,
comparing parent to pp->bus, pci_get_slot(), pci_dev_put(), etc.

I guess the idea is to bring only the directly-downstream devices to
D0, not to do it for things deeper in the hierarchy?

Is this some Tegra-specific wrinkle?  I don't think other drivers do
this.

I see that an earlier patch added "bus" to struct pcie_port.  I think
it would be better to somehow connect to the pci_host_bridge struct.
Several other drivers already do this; see uses of
pci_host_bridge_from_priv().

That would give you the bus, as well as flags like no_ext_tags,
native_aer, etc, which this driver, being a host bridge driver that's
responsible for this part of the firmware/OS interface, may
conceivably need.

Rather than pci_get_slot(), couldn't you iterate over bus->devices and
just skip the non-PCI_DEVFN(0, 0) devices?

> +
> +			if (pci_set_power_state(pdev, PCI_D0))
> +				dev_err(pcie->dev, "D0 transition failed\n");
> +		}
> +	}
> +}

> +static int tegra_pcie_dw_remove(struct platform_device *pdev)
> +{
> +	struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
> +
> +	if (pcie->mode == DW_PCIE_RC_TYPE) {

Return early if it's not RC and unindent the rest of the function.

> +		if (!pcie->link_state && pcie->power_down_en)
> +			return 0;
> +
> +		debugfs_remove_recursive(pcie->debugfs);
> +		pm_runtime_put_sync(pcie->dev);
> +		pm_runtime_disable(pcie->dev);
> +	}
> +
> +	return 0;
> +}

> +static int tegra_pcie_dw_runtime_suspend(struct device *dev)
> +{
> +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
> +
> +	tegra_pcie_downstream_dev_to_D0(pcie);
> +
> +	pci_stop_root_bus(pcie->pci.pp.bus);
> +	pci_remove_root_bus(pcie->pci.pp.bus);

Why are you calling these?  No other drivers do this except in their
.remove() methods.  Is there something special about Tegra, or is this
something the other drivers *should* be doing?

> +	tegra_pcie_dw_pme_turnoff(pcie);
> +
> +	reset_control_assert(pcie->core_rst);
> +	tegra_pcie_disable_phy(pcie);
> +	reset_control_assert(pcie->core_apb_rst);
> +	clk_disable_unprepare(pcie->core_clk);
> +	regulator_disable(pcie->pex_ctl_reg);
> +	config_plat_gpio(pcie, 0);
> +
> +	if (pcie->cid != CTRL_5)
> +		tegra_pcie_bpmp_set_ctrl_state(pcie, false);
> +
> +	return 0;
> +}
> +
> +static int tegra_pcie_dw_runtime_resume(struct device *dev)
> +{
> +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
> +	struct dw_pcie *pci = &pcie->pci;
> +	struct pcie_port *pp = &pci->pp;
> +	int ret = 0;
> +
> +	ret = tegra_pcie_config_controller(pcie, false);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* program to use MPS of 256 whereever possible */

s/whereever/wherever/

> +	pcie_bus_config = PCIE_BUS_SAFE;
> +
> +	pp->root_bus_nr = -1;
> +	pp->ops = &tegra_pcie_dw_host_ops;
> +
> +	/* Disable MSI interrupts for PME messages */

Superfluous comment; it repeats the function name.

> +static int tegra_pcie_dw_suspend_noirq(struct device *dev)
> +{
> +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
> +	int ret = 0;
> +
> +	if (!pcie->link_state)
> +		return 0;
> +
> +	/* save MSI interrutp vector*/

s/interrutp/interrupt/
s/vector/vector /

> +static int tegra_pcie_dw_resume_noirq(struct device *dev)
> +{
> +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
> +	int ret;
> +
> +	if (!pcie->link_state)
> +		return 0;
> +
> +	if (gpio_is_valid(pcie->pex_wake) && device_may_wakeup(dev)) {
> +		ret = disable_irq_wake(gpio_to_irq(pcie->pex_wake));
> +		if (ret < 0)
> +			dev_err(dev, "disable wake irq failed: %d\n", ret);
> +	}
> +
> +	ret = tegra_pcie_config_controller(pcie, true);
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = tegra_pcie_dw_host_init(&pcie->pci.pp);
> +	if (ret < 0) {
> +		dev_err(dev, "failed to init host: %d\n", ret);
> +		goto fail_host_init;
> +	}
> +
> +	/* restore MSI interrutp vector*/

s/interrutp/interrupt/
s/vector/vector /

> +static void tegra_pcie_dw_shutdown(struct platform_device *pdev)
> +{
> +	struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
> +
> +	if (pcie->mode == DW_PCIE_RC_TYPE) {

  if (pcie->mode != DW_PCIE_RC_TYPE)
    return;

Then you can unindent the whole function.

> +		if (!pcie->link_state && pcie->power_down_en)
> +			return;
> +
> +		debugfs_remove_recursive(pcie->debugfs);
> +		tegra_pcie_downstream_dev_to_D0(pcie);
> +
> +		/* Disable interrupts */

Superfluous comment.

> +		disable_irq(pcie->pci.pp.irq);
Vidya Sagar April 2, 2019, 7:17 a.m. UTC | #3
On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
> Hi Vidya,
> 
> Wow, there's a lot of nice work here!  Thanks for that!
> 
> On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
>> Add support for Synopsys DesignWare core IP based PCIe host controller
>> present in Tegra194 SoC.
> 
> General comments:
> 
>    - There are a few multi-line comments that don't match the
>      prevailing style:
> 
>          /*
> 	 * Text...
> 	 */
> 
>    - Comments and dev_info()/dev_err() messages are inconsistent about
>      starting with upper-case or lower-case letters.
> 
>    - Use "MSI", "IRQ", "PCIe", "CPU", etc in comments and messages.
> 
>    - There are a few functions that use "&pdev->dev" many times; can
>      you add a "struct device *dev = &pdev->dev" to reduce the
>      redundancy?
Done.

> 
>> +#include "../../pcie/portdrv.h"
> 
> What's this for?  I didn't see any obvious uses of things from
> portdrv.h, but I didn't actually try to build without it.
This is for pcie_pme_disable_msi() API. Since this is defined in portdrv.h
file, I'm including it here.

> 
>> +struct tegra_pcie_dw {
>> +	struct device		*dev;
>> +	struct resource		*appl_res;
>> +	struct resource		*dbi_res;
>> +	struct resource		*atu_dma_res;
>> +	void __iomem		*appl_base;
>> +	struct clk		*core_clk;
>> +	struct reset_control	*core_apb_rst;
>> +	struct reset_control	*core_rst;
>> +	struct dw_pcie		pci;
>> +	enum dw_pcie_device_mode mode;
>> +
>> +	bool disable_clock_request;
>> +	bool power_down_en;
>> +	u8 init_link_width;
>> +	bool link_state;
>> +	u32 msi_ctrl_int;
>> +	u32 num_lanes;
>> +	u32 max_speed;
>> +	u32 init_speed;
>> +	bool cdm_check;
>> +	u32 cid;
>> +	int pex_wake;
>> +	bool update_fc_fixup;
>> +	int n_gpios;
>> +	int *gpios;
>> +#if defined(CONFIG_PCIEASPM)
>> +	u32 cfg_link_cap_l1sub;
>> +	u32 event_cntr_ctrl;
>> +	u32 event_cntr_data;
>> +	u32 aspm_cmrt;
>> +	u32 aspm_pwr_on_t;
>> +	u32 aspm_l0s_enter_lat;
>> +	u32 disabled_aspm_states;
>> +#endif
> 
> The above could be indented the same as the rest of the struct?
Done.

> 
>> +	struct regulator	*pex_ctl_reg;
>> +
>> +	int			phy_count;
>> +	struct phy		**phy;
>> +
>> +	struct dentry		*debugfs;
>> +};
> 
>> +static void apply_bad_link_workaround(struct pcie_port *pp)
>> +{
>> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
>> +	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
>> +	u16 val;
>> +
>> +	/*
>> +	 * NOTE:- Since this scenario is uncommon and link as
>> +	 * such is not stable anyway, not waiting to confirm
>> +	 * if link is really transiting to Gen-2 speed
> 
> s/transiting/transitioning/
> 
> I think there are other uses of "transit" when you mean "transition".
Done.

> 
>> +static int tegra_pcie_dw_rd_own_conf(struct pcie_port *pp, int where, int size,
>> +				     u32 *val)
>> +{
>> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
>> +
>> +	/*
>> +	 * This is an endpoint mode specific register happen to appear even
>> +	 * when controller is operating in root port mode and system hangs
>> +	 * when it is accessed with link being in ASPM-L1 state.
>> +	 * So skip accessing it altogether
>> +	 */
>> +	if (where == PORT_LOGIC_MSIX_DOORBELL) {
>> +		*val = 0x00000000;
>> +		return PCIBIOS_SUCCESSFUL;
>> +	} else {
>> +		return dw_pcie_read(pci->dbi_base + where, size, val);
>> +	}
>> +}
>> +
>> +static int tegra_pcie_dw_wr_own_conf(struct pcie_port *pp, int where, int size,
>> +				     u32 val)
>> +{
>> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
>> +
>> +	/* This is EP specific register and system hangs when it is
>> +	 * accessed with link being in ASPM-L1 state.
>> +	 * So skip accessing it altogether
>> +	 */
>> +	if (where == PORT_LOGIC_MSIX_DOORBELL)
>> +		return PCIBIOS_SUCCESSFUL;
>> +	else
>> +		return dw_pcie_write(pci->dbi_base + where, size, val);
> 
> These two functions are almost identical and they could look more
> similar.  This one has the wrong multi-line comment style, uses "EP"
> instead of "endpoint", etc.  Use this style for the "if" since the
> first case is really an error case:
> 
>    if (where == PORT_LOGIC_MSIX_DOORBELL) {
>      ...
>      return ...;
>    }
> 
>    return dw_pcie_...();
Done.

> 
>> +static int tegra_pcie_dw_host_init(struct pcie_port *pp)
>> +{
>> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
>> +	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
>> +	int count = 200;
>> +	u32 val, tmp, offset;
>> +	u16 val_w;
>> +
>> +#if defined(CONFIG_PCIEASPM)
>> +	pcie->cfg_link_cap_l1sub =
>> +		dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_L1SS) +
>> +		PCI_L1SS_CAP;
>> +#endif
>> +	val = dw_pcie_readl_dbi(pci, PCI_IO_BASE);
>> +	val &= ~(IO_BASE_IO_DECODE | IO_BASE_IO_DECODE_BIT8);
>> +	dw_pcie_writel_dbi(pci, PCI_IO_BASE, val);
>> +
>> +	val = dw_pcie_readl_dbi(pci, PCI_PREF_MEMORY_BASE);
>> +	val |= CFG_PREF_MEM_LIMIT_BASE_MEM_DECODE;
>> +	val |= CFG_PREF_MEM_LIMIT_BASE_MEM_LIMIT_DECODE;
>> +	dw_pcie_writel_dbi(pci, PCI_PREF_MEMORY_BASE, val);
>> +
>> +	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0);
>> +
>> +	/* Configure FTS */
>> +	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL);
>> +	val &= ~(N_FTS_MASK << N_FTS_SHIFT);
>> +	val |= N_FTS_VAL << N_FTS_SHIFT;
>> +	dw_pcie_writel_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL, val);
>> +
>> +	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_GEN2_CTRL);
>> +	val &= ~FTS_MASK;
>> +	val |= FTS_VAL;
>> +	dw_pcie_writel_dbi(pci, PORT_LOGIC_GEN2_CTRL, val);
>> +
>> +	/* Enable as 0xFFFF0001 response for CRS */
>> +	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT);
>> +	val &= ~(AMBA_ERROR_RESPONSE_CRS_MASK << AMBA_ERROR_RESPONSE_CRS_SHIFT);
>> +	val |= (AMBA_ERROR_RESPONSE_CRS_OKAY_FFFF0001 <<
>> +		AMBA_ERROR_RESPONSE_CRS_SHIFT);
>> +	dw_pcie_writel_dbi(pci, PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT, val);
>> +
>> +	/* Set MPS to 256 in DEV_CTL */
>> +	val = dw_pcie_readl_dbi(pci, CFG_DEV_STATUS_CONTROL);
>> +	val &= ~PCI_EXP_DEVCTL_PAYLOAD;
>> +	val |= (1 << CFG_DEV_STATUS_CONTROL_MPS_SHIFT);
>> +	dw_pcie_writel_dbi(pci, CFG_DEV_STATUS_CONTROL, val);
>> +
>> +	/* Configure Max Speed from DT */
>> +	val = dw_pcie_readl_dbi(pci, CFG_LINK_CAP);
>> +	val &= ~PCI_EXP_LNKCAP_SLS;
>> +	val |= pcie->max_speed;
>> +	dw_pcie_writel_dbi(pci, CFG_LINK_CAP, val);
>> +
>> +	val = dw_pcie_readw_dbi(pci, CFG_LINK_CONTROL_2);
>> +	val &= ~PCI_EXP_LNKCTL2_TLS;
>> +	val |= pcie->init_speed;
>> +	dw_pcie_writew_dbi(pci, CFG_LINK_CONTROL_2, val);
>> +
>> +	/* Configure Max lane width from DT */
>> +	val = dw_pcie_readl_dbi(pci, CFG_LINK_CAP);
>> +	val &= ~PCI_EXP_LNKCAP_MLW;
>> +	val |= (pcie->num_lanes << PCI_EXP_LNKSTA_NLW_SHIFT);
>> +	dw_pcie_writel_dbi(pci, CFG_LINK_CAP, val);
>> +
>> +	config_gen3_gen4_eq_presets(pcie);
>> +
>> +#if defined(CONFIG_PCIEASPM)
>> +	/* Enable ASPM counters */
>> +	val = EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
>> +	val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
>> +	dw_pcie_writel_dbi(pci, pcie->event_cntr_ctrl, val);
>> +
>> +	/* Program T_cmrt and T_pwr_on values */
>> +	val = dw_pcie_readl_dbi(pci, pcie->cfg_link_cap_l1sub);
>> +	val &= ~(PCI_L1SS_CAP_CM_RESTORE_TIME | PCI_L1SS_CAP_P_PWR_ON_VALUE);
>> +	val |= (pcie->aspm_cmrt << PCI_L1SS_CAP_CM_RTM_SHIFT);
>> +	val |= (pcie->aspm_pwr_on_t << PCI_L1SS_CAP_PWRN_VAL_SHIFT);
>> +	dw_pcie_writel_dbi(pci, pcie->cfg_link_cap_l1sub, val);
>> +
>> +	/* Program L0s and L1 entrance latencies */
>> +	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL);
>> +	val &= ~L0S_ENTRANCE_LAT_MASK;
>> +	val |= (pcie->aspm_l0s_enter_lat << L0S_ENTRANCE_LAT_SHIFT);
>> +	val |= ENTER_ASPM;
>> +	dw_pcie_writel_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL, val);
>> +
>> +	/* Program what ASPM states sould get advertised */
> 
> s/sould/should/
Done.

> 
>> +	if (pcie->disabled_aspm_states & 0x1)
>> +		disable_aspm_l0s(pcie); /* Disable L0s */
>> +	if (pcie->disabled_aspm_states & 0x2) {
>> +		disable_aspm_l10(pcie); /* Disable L1 */
>> +		disable_aspm_l11(pcie); /* Disable L1.1 */
>> +		disable_aspm_l12(pcie); /* Disable L1.2 */
>> +	}
>> +	if (pcie->disabled_aspm_states & 0x4)
>> +		disable_aspm_l11(pcie); /* Disable L1.1 */
>> +	if (pcie->disabled_aspm_states & 0x8)
>> +		disable_aspm_l12(pcie); /* Disable L1.2 */
>> +#endif
>> +	val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
>> +	val &= ~GEN3_RELATED_OFF_GEN3_ZRXDC_NONCOMPL;
>> +	dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
>> +
>> +	if (pcie->update_fc_fixup) {
>> +		val = dw_pcie_readl_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF);
>> +		val |= 0x1 << CFG_TIMER_CTRL_ACK_NAK_SHIFT;
>> +		dw_pcie_writel_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF, val);
>> +	}
>> +
>> +	/* CDM check enable */
>> +	if (pcie->cdm_check) {
>> +		val = dw_pcie_readl_dbi(pci,
>> +					PORT_LOGIC_PL_CHK_REG_CONTROL_STATUS);
>> +		val |= PORT_LOGIC_PL_CHK_REG_CHK_REG_CONTINUOUS;
>> +		val |= PORT_LOGIC_PL_CHK_REG_CHK_REG_START;
>> +		dw_pcie_writel_dbi(pci, PORT_LOGIC_PL_CHK_REG_CONTROL_STATUS,
>> +				   val);
>> +	}
>> +
>> +	dw_pcie_setup_rc(pp);
>> +
>> +	clk_set_rate(pcie->core_clk, GEN4_CORE_CLK_FREQ);
>> +
>> +	/* assert RST */
>> +	val = readl(pcie->appl_base + APPL_PINMUX);
>> +	val &= ~APPL_PINMUX_PEX_RST;
>> +	writel(val, pcie->appl_base + APPL_PINMUX);
>> +
>> +	usleep_range(100, 200);
>> +
>> +	/* enable LTSSM */
>> +	val = readl(pcie->appl_base + APPL_CTRL);
>> +	val |= APPL_CTRL_LTSSM_EN;
>> +	writel(val, pcie->appl_base + APPL_CTRL);
>> +
>> +	/* de-assert RST */
>> +	val = readl(pcie->appl_base + APPL_PINMUX);
>> +	val |= APPL_PINMUX_PEX_RST;
>> +	writel(val, pcie->appl_base + APPL_PINMUX);
>> +
>> +	msleep(100);
>> +
>> +	val_w = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
>> +	while (!(val_w & PCI_EXP_LNKSTA_DLLLA)) {
>> +		if (!count) {
>> +			val = readl(pcie->appl_base + APPL_DEBUG);
>> +			val &= APPL_DEBUG_LTSSM_STATE_MASK;
>> +			val >>= APPL_DEBUG_LTSSM_STATE_SHIFT;
>> +			tmp = readl(pcie->appl_base + APPL_LINK_STATUS);
>> +			tmp &= APPL_LINK_STATUS_RDLH_LINK_UP;
>> +			if (val == 0x11 && !tmp) {
>> +				dev_info(pci->dev, "link is down in DLL");
>> +				dev_info(pci->dev,
>> +					 "trying again with DLFE disabled\n");
>> +				/* disable LTSSM */
>> +				val = readl(pcie->appl_base + APPL_CTRL);
>> +				val &= ~APPL_CTRL_LTSSM_EN;
>> +				writel(val, pcie->appl_base + APPL_CTRL);
>> +
>> +				reset_control_assert(pcie->core_rst);
>> +				reset_control_deassert(pcie->core_rst);
>> +
>> +				offset =
>> +				dw_pcie_find_ext_capability(pci,
>> +							    PCI_EXT_CAP_ID_DLF)
>> +				+ PCI_DLF_CAP;
> 
> This capability offset doesn't change, does it?  Could it be computed
> outside the loop?
This is the only place where DLF offset is needed and gets calculated and this
scenario is very rare as so far only a legacy ASMedia USB3.0 card requires DLF
to be disabled to get PCIe link up. So, I thought of calculating the offset
here itself instead of using a separate variable.

> 
>> +				val = dw_pcie_readl_dbi(pci, offset);
>> +				val &= ~DL_FEATURE_EXCHANGE_EN;
>> +				dw_pcie_writel_dbi(pci, offset, val);
>> +
>> +				tegra_pcie_dw_host_init(&pcie->pci.pp);
> 
> This looks like some sort of "wait for link up" retry loop, but a
> recursive call seems a little unusual.  My 5 second analysis is that
> the loop could run this 200 times, and you sure don't want the
> possibility of a 200-deep call chain.  Is there way to split out the
> host init from the link-up polling?
Again, this recursive calling comes into picture only for a legacy ASMedia
USB3.0 card and it is going to be a 1-deep call chain as the recursion takes
place only once depending on the condition. Apart from the legacy ASMedia card,
there is no other card at this point in time out of a huge number of cards that we have
tested.

> 
>> +				return 0;
>> +			}
>> +			dev_info(pci->dev, "link is down\n");
>> +			return 0;
>> +		}
>> +		dev_dbg(pci->dev, "polling for link up\n");
>> +		usleep_range(1000, 2000);
>> +		val_w = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
>> +		count--;
>> +	}
>> +	dev_info(pci->dev, "link is up\n");
>> +
>> +	tegra_pcie_enable_interrupts(pp);
>> +
>> +	return 0;
>> +}
> 
>> +static void tegra_pcie_dw_scan_bus(struct pcie_port *pp)
>> +{
>> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
>> +	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
>> +	u32 speed;
>> +
>> +	if (!tegra_pcie_dw_link_up(pci))
>> +		return;
>> +
>> +	speed = (dw_pcie_readw_dbi(pci, CFG_LINK_STATUS) & PCI_EXP_LNKSTA_CLS);
>> +	clk_set_rate(pcie->core_clk, pcie_gen_freq[speed - 1]);
> 
> I don't understand what's happening here.  This is named
> tegra_pcie_dw_scan_bus(), but it doesn't actually scan anything.
> Maybe it's just a bad name for the dw_pcie_host_ops hook
> (ks_pcie_v3_65_scan_bus() is the only other .scan_bus()
> implementation, and it doesn't scan anything either).
> 
> dw_pcie_host_init() calls pci_scan_root_bus_bridge(), which actually
> *does* scan the bus, but it does it before calling
> pp->ops->scan_bus().  I'd say by the time we get to
> pci_scan_root_bus_bridge(), the device-specific init should be all
> done and we should be using only generic PCI core interfaces.
> 
> Maybe this stuff could/should be done in the ->host_init() hook?  The
> code between ->host_init() and ->scan_bus() is all generic code with
> no device-specific stuff, so I don't know why we need both hooks.
Agree. At least whatever I'm going here as part of scan_bus can be moved to
host_init() itslef. I'm not sure about the original intention of the scan_bus
but my understanding is that, after PCIe sub-system enumerates all devices, if
something needs to be done, then, probably scan_bus() can be implemented for that.
I had some other code initially which was accessing downstream devices, hence I
implemented scan_bus(), but, now, given all that is gone, I can move this code to
host_init() itself.

> 
>> +static int tegra_pcie_enable_phy(struct tegra_pcie_dw *pcie)
>> +{
>> +	int phy_count = pcie->phy_count;
>> +	int ret;
>> +	int i;
>> +
>> +	for (i = 0; i < phy_count; i++) {
>> +		ret = phy_init(pcie->phy[i]);
>> +		if (ret < 0)
>> +			goto err_phy_init;
>> +
>> +		ret = phy_power_on(pcie->phy[i]);
>> +		if (ret < 0) {
>> +			phy_exit(pcie->phy[i]);
>> +			goto err_phy_power_on;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +
>> +	while (i >= 0) {
>> +		phy_power_off(pcie->phy[i]);
>> +err_phy_power_on:
>> +		phy_exit(pcie->phy[i]);
>> +err_phy_init:
>> +		i--;
>> +	}
> 
> Wow, jumping into the middle of that loop is clever ;)  Can't decide
> what I think of it, but it's certainly clever!
> 
>> +	return ret;
>> +}
>> +
>> +static int tegra_pcie_dw_parse_dt(struct tegra_pcie_dw *pcie)
>> +{
>> +	struct device_node *np = pcie->dev->of_node;
>> +	int ret;
>> +
>> +#if defined(CONFIG_PCIEASPM)
>> +	ret = of_property_read_u32(np, "nvidia,event-cntr-ctrl",
>> +				   &pcie->event_cntr_ctrl);
>> +	if (ret < 0) {
>> +		dev_err(pcie->dev, "fail to read event-cntr-ctrl: %d\n", ret);
>> +		return ret;
>> +	}
> 
> The fact that you return error here if DT lacks the
> "nvidia,event-cntr-ctrl" property, but only if CONFIG_PCIEASPM=y,
> means that you have a revlock between the DT and the kernel: if you
> update the kernel to enable CONFIG_PCIEASPM, you may also have to
> update your DT.
> 
> Maybe that's OK, but I think it'd be nicer if you always required the
> presence of these properties, even if you only actually *use* them
> when CONFIG_PCIEASPM=y.
Done

> 
>> +static int tegra_pcie_dw_probe(struct platform_device *pdev)
>> +{
>> +	struct tegra_pcie_dw *pcie;
>> +	struct pcie_port *pp;
>> +	struct dw_pcie *pci;
>> +	struct phy **phy;
>> +	struct resource	*dbi_res;
>> +	struct resource	*atu_dma_res;
>> +	const struct of_device_id *match;
>> +	const struct tegra_pcie_of_data *data;
>> +	char *name;
>> +	int ret, i;
>> +
>> +	pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
>> +	if (!pcie)
>> +		return -ENOMEM;
>> +
>> +	pci = &pcie->pci;
>> +	pci->dev = &pdev->dev;
>> +	pci->ops = &tegra_dw_pcie_ops;
>> +	pp = &pci->pp;
>> +	pcie->dev = &pdev->dev;
>> +
>> +	match = of_match_device(of_match_ptr(tegra_pcie_dw_of_match),
>> +				&pdev->dev);
>> +	if (!match)
>> +		return -EINVAL;
> 
> Logically could be the first thing in the function since it doesn't
> depend on anything.
Done

> 
>> +	data = (struct tegra_pcie_of_data *)match->data;
>> +	pcie->mode = (enum dw_pcie_device_mode)data->mode;
>> +
>> +	ret = tegra_pcie_dw_parse_dt(pcie);
>> +	if (ret < 0) {
>> +		dev_err(pcie->dev, "device tree parsing failed: %d\n", ret);
>> +		return ret;
>> +	}
>> +
>> +	if (gpio_is_valid(pcie->pex_wake)) {
>> +		ret = devm_gpio_request(pcie->dev, pcie->pex_wake,
>> +					"pcie_wake");
>> +		if (ret < 0) {
>> +			if (ret == -EBUSY) {
>> +				dev_err(pcie->dev,
>> +					"pex_wake already in use\n");
>> +				pcie->pex_wake = -EINVAL;
> 
> This looks strange.  "pex_wake == -EINVAL" doesn't look right, and
> you're about to pass it to gpio_direction_input(), which looks wrong.
Done.

> 
>> +			} else {
>> +				dev_err(pcie->dev,
>> +					"pcie_wake gpio_request failed %d\n",
>> +					ret);
>> +				return ret;
>> +			}
>> +		}
>> +
>> +		ret = gpio_direction_input(pcie->pex_wake);
>> +		if (ret < 0) {
>> +			dev_err(pcie->dev,
>> +				"setting pcie_wake input direction failed %d\n",
>> +				ret);
>> +			return ret;
>> +		}
>> +		device_init_wakeup(pcie->dev, true);
>> +	}
>> +
>> +	pcie->pex_ctl_reg = devm_regulator_get(&pdev->dev, "vddio-pex-ctl");
>> +	if (IS_ERR(pcie->pex_ctl_reg)) {
>> +		dev_err(&pdev->dev, "fail to get regulator: %ld\n",
>> +			PTR_ERR(pcie->pex_ctl_reg));
>> +		return PTR_ERR(pcie->pex_ctl_reg);
>> +	}
>> +
>> +	pcie->core_clk = devm_clk_get(&pdev->dev, "core_clk");
>> +	if (IS_ERR(pcie->core_clk)) {
>> +		dev_err(&pdev->dev, "Failed to get core clock\n");
>> +		return PTR_ERR(pcie->core_clk);
>> +	}
>> +
>> +	pcie->appl_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
>> +						      "appl");
>> +	if (!pcie->appl_res) {
>> +		dev_err(&pdev->dev, "missing appl space\n");
>> +		return PTR_ERR(pcie->appl_res);
>> +	}
>> +	pcie->appl_base = devm_ioremap_resource(&pdev->dev, pcie->appl_res);
>> +	if (IS_ERR(pcie->appl_base)) {
>> +		dev_err(&pdev->dev, "mapping appl space failed\n");
>> +		return PTR_ERR(pcie->appl_base);
>> +	}
>> +
>> +	pcie->core_apb_rst = devm_reset_control_get(pcie->dev, "core_apb_rst");
>> +	if (IS_ERR(pcie->core_apb_rst)) {
>> +		dev_err(pcie->dev, "PCIE : core_apb_rst reset is missing\n");
> 
> This error message looks different from the others ("PCIE :" prefix).
Done.

> 
>> +		return PTR_ERR(pcie->core_apb_rst);
>> +	}
>> +
>> +	phy = devm_kcalloc(pcie->dev, pcie->phy_count, sizeof(*phy),
>> +			   GFP_KERNEL);
>> +	if (!phy)
>> +		return PTR_ERR(phy);
>> +
>> +	for (i = 0; i < pcie->phy_count; i++) {
>> +		name = kasprintf(GFP_KERNEL, "pcie-p2u-%u", i);
>> +		if (!name) {
>> +			dev_err(pcie->dev, "failed to create p2u string\n");
>> +			return -ENOMEM;
>> +		}
>> +		phy[i] = devm_phy_get(pcie->dev, name);
>> +		kfree(name);
>> +		if (IS_ERR(phy[i])) {
>> +			ret = PTR_ERR(phy[i]);
>> +			dev_err(pcie->dev, "phy_get error: %d\n", ret);
>> +			return ret;
>> +		}
>> +	}
>> +
>> +	pcie->phy = phy;
>> +
>> +	dbi_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
>> +	if (!dbi_res) {
>> +		dev_err(&pdev->dev, "missing config space\n");
>> +		return PTR_ERR(dbi_res);
>> +	}
>> +	pcie->dbi_res = dbi_res;
>> +
>> +	pci->dbi_base = devm_ioremap_resource(&pdev->dev, dbi_res);
>> +	if (IS_ERR(pci->dbi_base)) {
>> +		dev_err(&pdev->dev, "mapping dbi space failed\n");
>> +		return PTR_ERR(pci->dbi_base);
>> +	}
>> +
>> +	/* Tegra HW locates DBI2 at a fixed offset from DBI */
>> +	pci->dbi_base2 = pci->dbi_base + 0x1000;
>> +
>> +	atu_dma_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
>> +						   "atu_dma");
>> +	if (!atu_dma_res) {
>> +		dev_err(&pdev->dev, "missing atu_dma space\n");
>> +		return PTR_ERR(atu_dma_res);
>> +	}
>> +	pcie->atu_dma_res = atu_dma_res;
>> +	pci->atu_base = devm_ioremap_resource(&pdev->dev, atu_dma_res);
>> +	if (IS_ERR(pci->atu_base)) {
>> +		dev_err(&pdev->dev, "mapping atu space failed\n");
>> +		return PTR_ERR(pci->atu_base);
>> +	}
>> +
>> +	pcie->core_rst = devm_reset_control_get(pcie->dev, "core_rst");
>> +	if (IS_ERR(pcie->core_rst)) {
>> +		dev_err(pcie->dev, "PCIE : core_rst reset is missing\n");
> 
> Different message format again.
Done.

> 
>> +		return PTR_ERR(pcie->core_rst);
>> +	}
>> +
>> +	pp->irq = platform_get_irq_byname(pdev, "intr");
>> +	if (!pp->irq) {
>> +		dev_err(pcie->dev, "failed to get intr interrupt\n");
>> +		return -ENODEV;
>> +	}
>> +
>> +	ret = devm_request_irq(&pdev->dev, pp->irq, tegra_pcie_irq_handler,
>> +			       IRQF_SHARED, "tegra-pcie-intr", pcie);
>> +	if (ret) {
>> +		dev_err(pcie->dev, "failed to request \"intr\" irq\n");
> 
> It'd be nice to include the actual IRQ, i.e., "IRQ %d", pp->irq.
Done.

> 
>> +		return ret;
>> +	}
>> +
>> +	platform_set_drvdata(pdev, pcie);
>> +
>> +	if (pcie->mode == DW_PCIE_RC_TYPE) {
>> +		ret = tegra_pcie_config_rp(pcie);
>> +		if (ret == -ENOMEDIUM)
>> +			ret = 0;
>> +	}
>> +
>> +	return ret;
>> +}
> 
>> +static void tegra_pcie_downstream_dev_to_D0(struct tegra_pcie_dw *pcie)
>> +{
>> +	struct pci_dev *pdev = NULL;
> 
> Unnecessary initialization.
Done.

> 
>> +	struct pci_bus *child;
>> +	struct pcie_port *pp = &pcie->pci.pp;
>> +
>> +	list_for_each_entry(child, &pp->bus->children, node) {
>> +		/* Bring downstream devices to D0 if they are not already in */
>> +		if (child->parent == pp->bus) {
>> +			pdev = pci_get_slot(child, PCI_DEVFN(0, 0));
>> +			pci_dev_put(pdev);
>> +			if (!pdev)
>> +				break;
> 
> I don't really like this dance with iterating over the bus children,
> comparing parent to pp->bus, pci_get_slot(), pci_dev_put(), etc.
> 
> I guess the idea is to bring only the directly-downstream devices to
> D0, not to do it for things deeper in the hierarchy?
Yes.

> 
> Is this some Tegra-specific wrinkle?  I don't think other drivers do
> this.
With Tegra PCIe controller, if the downstream device is in non-D0 states,
link doesn't go into L2 state. We observed this behavior with some of the
devices and the solution would be to bring them to D0 state and then attempt
sending PME_TurnOff message to put the link to L2 state.
Since spec also supports this mechanism (Rev.4.0 Ver.1.0 Page #428), we chose
to implement this.

> 
> I see that an earlier patch added "bus" to struct pcie_port.  I think
> it would be better to somehow connect to the pci_host_bridge struct.
> Several other drivers already do this; see uses of
> pci_host_bridge_from_priv().
All non-DesignWare based implementations save their private data structure
in 'private' pointer of struct pci_host_bridge and use pci_host_bridge_from_priv()
to get it back. But, DesignWare based implementations save pcie_port in 'sysdata'
and nothing in 'private' pointer. So,  I'm not sure if pci_host_bridge_from_priv()
can be used in this case. Please do let me know if you think otherwise.

> 
> That would give you the bus, as well as flags like no_ext_tags,
> native_aer, etc, which this driver, being a host bridge driver that's
> responsible for this part of the firmware/OS interface, may
> conceivably need.
> 
> Rather than pci_get_slot(), couldn't you iterate over bus->devices and
> just skip the non-PCI_DEVFN(0, 0) devices?
> 
>> +
>> +			if (pci_set_power_state(pdev, PCI_D0))
>> +				dev_err(pcie->dev, "D0 transition failed\n");
>> +		}
>> +	}
>> +}
Yeah. This seems to be a better method. I'll implement this.

> 
>> +static int tegra_pcie_dw_remove(struct platform_device *pdev)
>> +{
>> +	struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
>> +
>> +	if (pcie->mode == DW_PCIE_RC_TYPE) {
> 
> Return early if it's not RC and unindent the rest of the function.
Done.

> 
>> +		if (!pcie->link_state && pcie->power_down_en)
>> +			return 0;
>> +
>> +		debugfs_remove_recursive(pcie->debugfs);
>> +		pm_runtime_put_sync(pcie->dev);
>> +		pm_runtime_disable(pcie->dev);
>> +	}
>> +
>> +	return 0;
>> +}
> 
>> +static int tegra_pcie_dw_runtime_suspend(struct device *dev)
>> +{
>> +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
>> +
>> +	tegra_pcie_downstream_dev_to_D0(pcie);
>> +
>> +	pci_stop_root_bus(pcie->pci.pp.bus);
>> +	pci_remove_root_bus(pcie->pci.pp.bus);
> 
> Why are you calling these?  No other drivers do this except in their
> .remove() methods.  Is there something special about Tegra, or is this
> something the other drivers *should* be doing?
Since this API is called by remove, I'm removing the hierarchy to safely
bring down all the devices. I'll have to re-visit this part as
Jisheng Zhang's patches https://patchwork.kernel.org/project/linux-pci/list/?series=98559
are now approved and I need to verify this part after cherry-picking
Jisheng's changes.

> 
>> +	tegra_pcie_dw_pme_turnoff(pcie);
>> +
>> +	reset_control_assert(pcie->core_rst);
>> +	tegra_pcie_disable_phy(pcie);
>> +	reset_control_assert(pcie->core_apb_rst);
>> +	clk_disable_unprepare(pcie->core_clk);
>> +	regulator_disable(pcie->pex_ctl_reg);
>> +	config_plat_gpio(pcie, 0);
>> +
>> +	if (pcie->cid != CTRL_5)
>> +		tegra_pcie_bpmp_set_ctrl_state(pcie, false);
>> +
>> +	return 0;
>> +}
>> +
>> +static int tegra_pcie_dw_runtime_resume(struct device *dev)
>> +{
>> +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
>> +	struct dw_pcie *pci = &pcie->pci;
>> +	struct pcie_port *pp = &pci->pp;
>> +	int ret = 0;
>> +
>> +	ret = tegra_pcie_config_controller(pcie, false);
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	/* program to use MPS of 256 whereever possible */
> 
> s/whereever/wherever/
Done.

> 
>> +	pcie_bus_config = PCIE_BUS_SAFE;
>> +
>> +	pp->root_bus_nr = -1;
>> +	pp->ops = &tegra_pcie_dw_host_ops;
>> +
>> +	/* Disable MSI interrupts for PME messages */
> 
> Superfluous comment; it repeats the function name.
Removed it.

> 
>> +static int tegra_pcie_dw_suspend_noirq(struct device *dev)
>> +{
>> +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
>> +	int ret = 0;
>> +
>> +	if (!pcie->link_state)
>> +		return 0;
>> +
>> +	/* save MSI interrutp vector*/
> 
> s/interrutp/interrupt/
> s/vector/vector /
Done.

> 
>> +static int tegra_pcie_dw_resume_noirq(struct device *dev)
>> +{
>> +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
>> +	int ret;
>> +
>> +	if (!pcie->link_state)
>> +		return 0;
>> +
>> +	if (gpio_is_valid(pcie->pex_wake) && device_may_wakeup(dev)) {
>> +		ret = disable_irq_wake(gpio_to_irq(pcie->pex_wake));
>> +		if (ret < 0)
>> +			dev_err(dev, "disable wake irq failed: %d\n", ret);
>> +	}
>> +
>> +	ret = tegra_pcie_config_controller(pcie, true);
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	ret = tegra_pcie_dw_host_init(&pcie->pci.pp);
>> +	if (ret < 0) {
>> +		dev_err(dev, "failed to init host: %d\n", ret);
>> +		goto fail_host_init;
>> +	}
>> +
>> +	/* restore MSI interrutp vector*/
> 
> s/interrutp/interrupt/
> s/vector/vector /
Done.

> 
>> +static void tegra_pcie_dw_shutdown(struct platform_device *pdev)
>> +{
>> +	struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
>> +
>> +	if (pcie->mode == DW_PCIE_RC_TYPE) {
> 
>    if (pcie->mode != DW_PCIE_RC_TYPE)
>      return;
> 
> Then you can unindent the whole function.
Done.

> 
>> +		if (!pcie->link_state && pcie->power_down_en)
>> +			return;
>> +
>> +		debugfs_remove_recursive(pcie->debugfs);
>> +		tegra_pcie_downstream_dev_to_D0(pcie);
>> +
>> +		/* Disable interrupts */
> 
> Superfluous comment.
Removed it.

> 
>> +		disable_irq(pcie->pci.pp.irq);
Thierry Reding April 2, 2019, 2:14 p.m. UTC | #4
On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
> On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
[...]
> > > +static int tegra_pcie_dw_host_init(struct pcie_port *pp)
> > > +{
[...]
> > > +	val_w = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
> > > +	while (!(val_w & PCI_EXP_LNKSTA_DLLLA)) {
> > > +		if (!count) {
> > > +			val = readl(pcie->appl_base + APPL_DEBUG);
> > > +			val &= APPL_DEBUG_LTSSM_STATE_MASK;
> > > +			val >>= APPL_DEBUG_LTSSM_STATE_SHIFT;
> > > +			tmp = readl(pcie->appl_base + APPL_LINK_STATUS);
> > > +			tmp &= APPL_LINK_STATUS_RDLH_LINK_UP;
> > > +			if (val == 0x11 && !tmp) {
> > > +				dev_info(pci->dev, "link is down in DLL");
> > > +				dev_info(pci->dev,
> > > +					 "trying again with DLFE disabled\n");
> > > +				/* disable LTSSM */
> > > +				val = readl(pcie->appl_base + APPL_CTRL);
> > > +				val &= ~APPL_CTRL_LTSSM_EN;
> > > +				writel(val, pcie->appl_base + APPL_CTRL);
> > > +
> > > +				reset_control_assert(pcie->core_rst);
> > > +				reset_control_deassert(pcie->core_rst);
> > > +
> > > +				offset =
> > > +				dw_pcie_find_ext_capability(pci,
> > > +							    PCI_EXT_CAP_ID_DLF)
> > > +				+ PCI_DLF_CAP;
> > 
> > This capability offset doesn't change, does it?  Could it be computed
> > outside the loop?
> This is the only place where DLF offset is needed and gets calculated and this
> scenario is very rare as so far only a legacy ASMedia USB3.0 card requires DLF
> to be disabled to get PCIe link up. So, I thought of calculating the offset
> here itself instead of using a separate variable.
> 
> > 
> > > +				val = dw_pcie_readl_dbi(pci, offset);
> > > +				val &= ~DL_FEATURE_EXCHANGE_EN;
> > > +				dw_pcie_writel_dbi(pci, offset, val);
> > > +
> > > +				tegra_pcie_dw_host_init(&pcie->pci.pp);
> > 
> > This looks like some sort of "wait for link up" retry loop, but a
> > recursive call seems a little unusual.  My 5 second analysis is that
> > the loop could run this 200 times, and you sure don't want the
> > possibility of a 200-deep call chain.  Is there way to split out the
> > host init from the link-up polling?
> Again, this recursive calling comes into picture only for a legacy ASMedia
> USB3.0 card and it is going to be a 1-deep call chain as the recursion takes
> place only once depending on the condition. Apart from the legacy ASMedia card,
> there is no other card at this point in time out of a huge number of cards that we have
> tested.

A more idiomatic way would be to add a "retry:" label somewhere and goto
that after disabling DLFE. That way you achieve the same effect, but you
can avoid the recursion, even if it is harmless in practice.

> > > +static int tegra_pcie_dw_probe(struct platform_device *pdev)
> > > +{
> > > +	struct tegra_pcie_dw *pcie;
> > > +	struct pcie_port *pp;
> > > +	struct dw_pcie *pci;
> > > +	struct phy **phy;
> > > +	struct resource	*dbi_res;
> > > +	struct resource	*atu_dma_res;
> > > +	const struct of_device_id *match;
> > > +	const struct tegra_pcie_of_data *data;
> > > +	char *name;
> > > +	int ret, i;
> > > +
> > > +	pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
> > > +	if (!pcie)
> > > +		return -ENOMEM;
> > > +
> > > +	pci = &pcie->pci;
> > > +	pci->dev = &pdev->dev;
> > > +	pci->ops = &tegra_dw_pcie_ops;
> > > +	pp = &pci->pp;
> > > +	pcie->dev = &pdev->dev;
> > > +
> > > +	match = of_match_device(of_match_ptr(tegra_pcie_dw_of_match),
> > > +				&pdev->dev);
> > > +	if (!match)
> > > +		return -EINVAL;
> > 
> > Logically could be the first thing in the function since it doesn't
> > depend on anything.
> Done
> 
> > 
> > > +	data = (struct tegra_pcie_of_data *)match->data;

of_device_get_match_data() can help remove some of the above
boilerplate. Also, there's no reason to check for a failure with these
functions. The driver is OF-only and can only ever be probed if the
device exists, in which case match (or data for that matter) will never
be NULL.

> > I see that an earlier patch added "bus" to struct pcie_port.  I think
> > it would be better to somehow connect to the pci_host_bridge struct.
> > Several other drivers already do this; see uses of
> > pci_host_bridge_from_priv().
> All non-DesignWare based implementations save their private data structure
> in 'private' pointer of struct pci_host_bridge and use pci_host_bridge_from_priv()
> to get it back. But, DesignWare based implementations save pcie_port in 'sysdata'
> and nothing in 'private' pointer. So,  I'm not sure if pci_host_bridge_from_priv()
> can be used in this case. Please do let me know if you think otherwise.

If nothing is currently stored in the private pointer, why not do like
the other drivers and store the struct pci_host_bridge pointer there?

Thierry
Bjorn Helgaas April 2, 2019, 6:31 p.m. UTC | #5
On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
> On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
> > On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
> > > Add support for Synopsys DesignWare core IP based PCIe host controller
> > > present in Tegra194 SoC.

> > > +#include "../../pcie/portdrv.h"
> > 
> > What's this for?  I didn't see any obvious uses of things from
> > portdrv.h, but I didn't actually try to build without it.
> This is for pcie_pme_disable_msi() API. Since this is defined in portdrv.h
> file, I'm including it here.

Hmm, OK, I missed that.  If you need pcie_pme_disable_msi(), you
definitely need portdrv.h.  But you're still a singleton in terms of
including it, so it leads to follow-up questions:

  - Why does this chip require pcie_pme_disable_msi()?  The only other
    use is a DMI quirk for "MSI Wind U-100", added by c39fae1416d5
    ("PCI PM: Make it possible to force using INTx for PCIe PME
    signaling").

  - Is this a workaround for a Tegra194 defect?  If so, it should be
    separated out and identified as such.  Otherwise it will get
    copied to other places where it doesn't belong.

  - You only call it from the .runtime_resume() hook.  That doesn't
    make sense to me: if you never suspend, you never disable MSI for
    PME signaling.

> > > +	val_w = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
> > > +	while (!(val_w & PCI_EXP_LNKSTA_DLLLA)) {
> > > +		if (!count) {
> > > +			val = readl(pcie->appl_base + APPL_DEBUG);
> > > +			val &= APPL_DEBUG_LTSSM_STATE_MASK;
> > > +			val >>= APPL_DEBUG_LTSSM_STATE_SHIFT;
> > > +			tmp = readl(pcie->appl_base + APPL_LINK_STATUS);
> > > +			tmp &= APPL_LINK_STATUS_RDLH_LINK_UP;
> > > +			if (val == 0x11 && !tmp) {
> > > +				dev_info(pci->dev, "link is down in DLL");
> > > +				dev_info(pci->dev,
> > > +					 "trying again with DLFE disabled\n");
> > > +				/* disable LTSSM */
> > > +				val = readl(pcie->appl_base + APPL_CTRL);
> > > +				val &= ~APPL_CTRL_LTSSM_EN;
> > > +				writel(val, pcie->appl_base + APPL_CTRL);
> > > +
> > > +				reset_control_assert(pcie->core_rst);
> > > +				reset_control_deassert(pcie->core_rst);
> > > +
> > > +				offset =
> > > +				dw_pcie_find_ext_capability(pci,
> > > +							    PCI_EXT_CAP_ID_DLF)
> > > +				+ PCI_DLF_CAP;
> > 
> > This capability offset doesn't change, does it?  Could it be computed
> > outside the loop?
> This is the only place where DLF offset is needed and gets calculated and this
> scenario is very rare as so far only a legacy ASMedia USB3.0 card requires DLF
> to be disabled to get PCIe link up. So, I thought of calculating the offset
> here itself instead of using a separate variable.

Hmm.  Sounds like this is essentially a quirk to work around some
hardware issue in Tegra194.

Is there some way you can pull this out into a separate function so it
doesn't clutter the normal path and it's more obvious that it's a
workaround?

> > > +				val = dw_pcie_readl_dbi(pci, offset);
> > > +				val &= ~DL_FEATURE_EXCHANGE_EN;
> > > +				dw_pcie_writel_dbi(pci, offset, val);
> > > +
> > > +				tegra_pcie_dw_host_init(&pcie->pci.pp);
> > 
> > This looks like some sort of "wait for link up" retry loop, but a
> > recursive call seems a little unusual.  My 5 second analysis is that
> > the loop could run this 200 times, and you sure don't want the
> > possibility of a 200-deep call chain.  Is there way to split out the
> > host init from the link-up polling?
> Again, this recursive calling comes into picture only for a legacy ASMedia
> USB3.0 card and it is going to be a 1-deep call chain as the recursion takes
> place only once depending on the condition. Apart from the legacy ASMedia card,
> there is no other card at this point in time out of a huge number of cards that we have
> tested.

We need to be able to analyze the code without spending time working
out what arcane PCIe spec details might limit this to fewer than 200
iterations, or relying on assumptions about how many cards have been
tested.

If you can restructure this so the "wait for link up" loop looks like
all the other drivers, and the DLF issue is separated out and just
causes a retry of the "wait for link up", I think that will help a
lot.

> > > +static void tegra_pcie_dw_scan_bus(struct pcie_port *pp)
> > > +{
> > > +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > > +	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
> > > +	u32 speed;
> > > +
> > > +	if (!tegra_pcie_dw_link_up(pci))
> > > +		return;
> > > +
> > > +	speed = (dw_pcie_readw_dbi(pci, CFG_LINK_STATUS) & PCI_EXP_LNKSTA_CLS);
> > > +	clk_set_rate(pcie->core_clk, pcie_gen_freq[speed - 1]);
> > 
> > I don't understand what's happening here.  This is named
> > tegra_pcie_dw_scan_bus(), but it doesn't actually scan anything.
> > Maybe it's just a bad name for the dw_pcie_host_ops hook
> > (ks_pcie_v3_65_scan_bus() is the only other .scan_bus()
> > implementation, and it doesn't scan anything either).
> > 
> > dw_pcie_host_init() calls pci_scan_root_bus_bridge(), which actually
> > *does* scan the bus, but it does it before calling
> > pp->ops->scan_bus().  I'd say by the time we get to
> > pci_scan_root_bus_bridge(), the device-specific init should be all
> > done and we should be using only generic PCI core interfaces.
> > 
> > Maybe this stuff could/should be done in the ->host_init() hook?  The
> > code between ->host_init() and ->scan_bus() is all generic code with
> > no device-specific stuff, so I don't know why we need both hooks.
> Agree. At least whatever I'm going here as part of scan_bus can be moved to
> host_init() itslef. I'm not sure about the original intention of the scan_bus
> but my understanding is that, after PCIe sub-system enumerates all devices, if
> something needs to be done, then, probably scan_bus() can be implemented for that.
> I had some other code initially which was accessing downstream devices, hence I
> implemented scan_bus(), but, now, given all that is gone, I can move this code to
> host_init() itself.

That'd be perfect.  I suspect ks_pcie_v3_65_scan_bus() is left over
from before the generic PCI core scan interface, and it could probably
be moved to host_init() as well.

> > > +static void tegra_pcie_downstream_dev_to_D0(struct tegra_pcie_dw *pcie)
> > > +{
> > > +	struct pci_dev *pdev = NULL;
> > 
> > Unnecessary initialization.
> Done.
> 
> > > +	struct pci_bus *child;
> > > +	struct pcie_port *pp = &pcie->pci.pp;
> > > +
> > > +	list_for_each_entry(child, &pp->bus->children, node) {
> > > +		/* Bring downstream devices to D0 if they are not already in */
> > > +		if (child->parent == pp->bus) {
> > > +			pdev = pci_get_slot(child, PCI_DEVFN(0, 0));
> > > +			pci_dev_put(pdev);
> > > +			if (!pdev)
> > > +				break;
> > 
> > I don't really like this dance with iterating over the bus children,
> > comparing parent to pp->bus, pci_get_slot(), pci_dev_put(), etc.
> > 
> > I guess the idea is to bring only the directly-downstream devices to
> > D0, not to do it for things deeper in the hierarchy?
> Yes.
> 
> > Is this some Tegra-specific wrinkle?  I don't think other drivers do
> > this.
> With Tegra PCIe controller, if the downstream device is in non-D0 states,
> link doesn't go into L2 state. We observed this behavior with some of the
> devices and the solution would be to bring them to D0 state and then attempt
> sending PME_TurnOff message to put the link to L2 state.
> Since spec also supports this mechanism (Rev.4.0 Ver.1.0 Page #428), we chose
> to implement this.

Sounds like a Tegra oddity that should be documented as such so it
doesn't get copied elsewhere.

> > I see that an earlier patch added "bus" to struct pcie_port.  I think
> > it would be better to somehow connect to the pci_host_bridge struct.
> > Several other drivers already do this; see uses of
> > pci_host_bridge_from_priv().
> All non-DesignWare based implementations save their private data structure
> in 'private' pointer of struct pci_host_bridge and use pci_host_bridge_from_priv()
> to get it back. But, DesignWare based implementations save pcie_port in 'sysdata'
> and nothing in 'private' pointer. So,  I'm not sure if pci_host_bridge_from_priv()
> can be used in this case. Please do let me know if you think otherwise.

DesignWare-based drivers should have a way to retrieve the
pci_host_bridge pointer.  It doesn't have to be *exactly* the
same as non-DesignWare drivers, but it should be similar.

> > That would give you the bus, as well as flags like no_ext_tags,
> > native_aer, etc, which this driver, being a host bridge driver that's
> > responsible for this part of the firmware/OS interface, may
> > conceivably need.

> > > +static int tegra_pcie_dw_runtime_suspend(struct device *dev)
> > > +{
> > > +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
> > > +
> > > +	tegra_pcie_downstream_dev_to_D0(pcie);
> > > +
> > > +	pci_stop_root_bus(pcie->pci.pp.bus);
> > > +	pci_remove_root_bus(pcie->pci.pp.bus);
> > 
> > Why are you calling these?  No other drivers do this except in their
> > .remove() methods.  Is there something special about Tegra, or is this
> > something the other drivers *should* be doing?
> Since this API is called by remove, I'm removing the hierarchy to safely
> bring down all the devices. I'll have to re-visit this part as
> Jisheng Zhang's patches https://patchwork.kernel.org/project/linux-pci/list/?series=98559
> are now approved and I need to verify this part after cherry-picking
> Jisheng's changes.

Tegra194 should do this the same way as other drivers, independent of
Jisheng's changes.

Bjorn
Vidya Sagar April 3, 2019, 9:15 a.m. UTC | #6
On 4/2/2019 7:44 PM, Thierry Reding wrote:
> On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
>> On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
> [...]
>>>> +static int tegra_pcie_dw_host_init(struct pcie_port *pp)
>>>> +{
> [...]
>>>> +	val_w = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
>>>> +	while (!(val_w & PCI_EXP_LNKSTA_DLLLA)) {
>>>> +		if (!count) {
>>>> +			val = readl(pcie->appl_base + APPL_DEBUG);
>>>> +			val &= APPL_DEBUG_LTSSM_STATE_MASK;
>>>> +			val >>= APPL_DEBUG_LTSSM_STATE_SHIFT;
>>>> +			tmp = readl(pcie->appl_base + APPL_LINK_STATUS);
>>>> +			tmp &= APPL_LINK_STATUS_RDLH_LINK_UP;
>>>> +			if (val == 0x11 && !tmp) {
>>>> +				dev_info(pci->dev, "link is down in DLL");
>>>> +				dev_info(pci->dev,
>>>> +					 "trying again with DLFE disabled\n");
>>>> +				/* disable LTSSM */
>>>> +				val = readl(pcie->appl_base + APPL_CTRL);
>>>> +				val &= ~APPL_CTRL_LTSSM_EN;
>>>> +				writel(val, pcie->appl_base + APPL_CTRL);
>>>> +
>>>> +				reset_control_assert(pcie->core_rst);
>>>> +				reset_control_deassert(pcie->core_rst);
>>>> +
>>>> +				offset =
>>>> +				dw_pcie_find_ext_capability(pci,
>>>> +							    PCI_EXT_CAP_ID_DLF)
>>>> +				+ PCI_DLF_CAP;
>>>
>>> This capability offset doesn't change, does it?  Could it be computed
>>> outside the loop?
>> This is the only place where DLF offset is needed and gets calculated and this
>> scenario is very rare as so far only a legacy ASMedia USB3.0 card requires DLF
>> to be disabled to get PCIe link up. So, I thought of calculating the offset
>> here itself instead of using a separate variable.
>>
>>>
>>>> +				val = dw_pcie_readl_dbi(pci, offset);
>>>> +				val &= ~DL_FEATURE_EXCHANGE_EN;
>>>> +				dw_pcie_writel_dbi(pci, offset, val);
>>>> +
>>>> +				tegra_pcie_dw_host_init(&pcie->pci.pp);
>>>
>>> This looks like some sort of "wait for link up" retry loop, but a
>>> recursive call seems a little unusual.  My 5 second analysis is that
>>> the loop could run this 200 times, and you sure don't want the
>>> possibility of a 200-deep call chain.  Is there way to split out the
>>> host init from the link-up polling?
>> Again, this recursive calling comes into picture only for a legacy ASMedia
>> USB3.0 card and it is going to be a 1-deep call chain as the recursion takes
>> place only once depending on the condition. Apart from the legacy ASMedia card,
>> there is no other card at this point in time out of a huge number of cards that we have
>> tested.
> 
> A more idiomatic way would be to add a "retry:" label somewhere and goto
> that after disabling DLFE. That way you achieve the same effect, but you
> can avoid the recursion, even if it is harmless in practice.
Initially I thought of using goto to keep it simple, but I thought it would be
discouraged and hence used recursion. But, yeah.. agree that goto would keep
it simple and I'll switch to goto now.

> 
>>>> +static int tegra_pcie_dw_probe(struct platform_device *pdev)
>>>> +{
>>>> +	struct tegra_pcie_dw *pcie;
>>>> +	struct pcie_port *pp;
>>>> +	struct dw_pcie *pci;
>>>> +	struct phy **phy;
>>>> +	struct resource	*dbi_res;
>>>> +	struct resource	*atu_dma_res;
>>>> +	const struct of_device_id *match;
>>>> +	const struct tegra_pcie_of_data *data;
>>>> +	char *name;
>>>> +	int ret, i;
>>>> +
>>>> +	pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
>>>> +	if (!pcie)
>>>> +		return -ENOMEM;
>>>> +
>>>> +	pci = &pcie->pci;
>>>> +	pci->dev = &pdev->dev;
>>>> +	pci->ops = &tegra_dw_pcie_ops;
>>>> +	pp = &pci->pp;
>>>> +	pcie->dev = &pdev->dev;
>>>> +
>>>> +	match = of_match_device(of_match_ptr(tegra_pcie_dw_of_match),
>>>> +				&pdev->dev);
>>>> +	if (!match)
>>>> +		return -EINVAL;
>>>
>>> Logically could be the first thing in the function since it doesn't
>>> depend on anything.
>> Done
>>
>>>
>>>> +	data = (struct tegra_pcie_of_data *)match->data;
> 
> of_device_get_match_data() can help remove some of the above
> boilerplate. Also, there's no reason to check for a failure with these
> functions. The driver is OF-only and can only ever be probed if the
> device exists, in which case match (or data for that matter) will never
> be NULL.
Done.

> 
>>> I see that an earlier patch added "bus" to struct pcie_port.  I think
>>> it would be better to somehow connect to the pci_host_bridge struct.
>>> Several other drivers already do this; see uses of
>>> pci_host_bridge_from_priv().
>> All non-DesignWare based implementations save their private data structure
>> in 'private' pointer of struct pci_host_bridge and use pci_host_bridge_from_priv()
>> to get it back. But, DesignWare based implementations save pcie_port in 'sysdata'
>> and nothing in 'private' pointer. So,  I'm not sure if pci_host_bridge_from_priv()
>> can be used in this case. Please do let me know if you think otherwise.
> 
> If nothing is currently stored in the private pointer, why not do like
> the other drivers and store the struct pci_host_bridge pointer there?
non-designware drivers get their private data allocated as part of pci_alloc_host_bridge()
by passing the size of their private structure and use pci_host_bridge_from_priv() to get
pointer to their own private structure (which is within struct pci_host_bridge).
Whereas in Designware core, we can get the memory for struct pcie_port much before than
calling pci_alloc_host_bridge() API, in fact, size '0' is passed as an argument to alloc API.
This is the reason why struct pcie_port pointer is saved in 'sysdata'.

> 
> Thierry
>
Vidya Sagar April 3, 2019, 9:43 a.m. UTC | #7
On 4/3/2019 12:01 AM, Bjorn Helgaas wrote:
> On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
>> On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
>>> On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
>>>> Add support for Synopsys DesignWare core IP based PCIe host controller
>>>> present in Tegra194 SoC.
> 
>>>> +#include "../../pcie/portdrv.h"
>>>
>>> What's this for?  I didn't see any obvious uses of things from
>>> portdrv.h, but I didn't actually try to build without it.
>> This is for pcie_pme_disable_msi() API. Since this is defined in portdrv.h
>> file, I'm including it here.
> 
> Hmm, OK, I missed that.  If you need pcie_pme_disable_msi(), you
> definitely need portdrv.h.  But you're still a singleton in terms of
> including it, so it leads to follow-up questions:
> 
>    - Why does this chip require pcie_pme_disable_msi()?  The only other
>      use is a DMI quirk for "MSI Wind U-100", added by c39fae1416d5
>      ("PCI PM: Make it possible to force using INTx for PCIe PME
>      signaling").
Because Tegra194 doesn't support raising PME interrupts through MSI line.
> 
>    - Is this a workaround for a Tegra194 defect?  If so, it should be
>      separated out and identified as such.  Otherwise it will get
>      copied to other places where it doesn't belong.
This is a guideline from the hardware team that MSI for PME should be disabled.
I'll make an explicit comment in the code that it is specific to Tegra194.
> 
>    - You only call it from the .runtime_resume() hook.  That doesn't
>      make sense to me: if you never suspend, you never disable MSI for
>      PME signaling.
.runtime_resume() not only gets called during resume, but also in normal path
as we are using PM framework and pm_runtime_get_sync() gets called finally
in the probe which executes .runtime_resume() hook.

> 
>>>> +	val_w = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
>>>> +	while (!(val_w & PCI_EXP_LNKSTA_DLLLA)) {
>>>> +		if (!count) {
>>>> +			val = readl(pcie->appl_base + APPL_DEBUG);
>>>> +			val &= APPL_DEBUG_LTSSM_STATE_MASK;
>>>> +			val >>= APPL_DEBUG_LTSSM_STATE_SHIFT;
>>>> +			tmp = readl(pcie->appl_base + APPL_LINK_STATUS);
>>>> +			tmp &= APPL_LINK_STATUS_RDLH_LINK_UP;
>>>> +			if (val == 0x11 && !tmp) {
>>>> +				dev_info(pci->dev, "link is down in DLL");
>>>> +				dev_info(pci->dev,
>>>> +					 "trying again with DLFE disabled\n");
>>>> +				/* disable LTSSM */
>>>> +				val = readl(pcie->appl_base + APPL_CTRL);
>>>> +				val &= ~APPL_CTRL_LTSSM_EN;
>>>> +				writel(val, pcie->appl_base + APPL_CTRL);
>>>> +
>>>> +				reset_control_assert(pcie->core_rst);
>>>> +				reset_control_deassert(pcie->core_rst);
>>>> +
>>>> +				offset =
>>>> +				dw_pcie_find_ext_capability(pci,
>>>> +							    PCI_EXT_CAP_ID_DLF)
>>>> +				+ PCI_DLF_CAP;
>>>
>>> This capability offset doesn't change, does it?  Could it be computed
>>> outside the loop?
>> This is the only place where DLF offset is needed and gets calculated and this
>> scenario is very rare as so far only a legacy ASMedia USB3.0 card requires DLF
>> to be disabled to get PCIe link up. So, I thought of calculating the offset
>> here itself instead of using a separate variable.
> 
> Hmm.  Sounds like this is essentially a quirk to work around some
> hardware issue in Tegra194.
> 
> Is there some way you can pull this out into a separate function so it
> doesn't clutter the normal path and it's more obvious that it's a
> workaround?
> 
>>>> +				val = dw_pcie_readl_dbi(pci, offset);
>>>> +				val &= ~DL_FEATURE_EXCHANGE_EN;
>>>> +				dw_pcie_writel_dbi(pci, offset, val);
>>>> +
>>>> +				tegra_pcie_dw_host_init(&pcie->pci.pp);
>>>
>>> This looks like some sort of "wait for link up" retry loop, but a
>>> recursive call seems a little unusual.  My 5 second analysis is that
>>> the loop could run this 200 times, and you sure don't want the
>>> possibility of a 200-deep call chain.  Is there way to split out the
>>> host init from the link-up polling?
>> Again, this recursive calling comes into picture only for a legacy ASMedia
>> USB3.0 card and it is going to be a 1-deep call chain as the recursion takes
>> place only once depending on the condition. Apart from the legacy ASMedia card,
>> there is no other card at this point in time out of a huge number of cards that we have
>> tested.
> 
> We need to be able to analyze the code without spending time working
> out what arcane PCIe spec details might limit this to fewer than 200
> iterations, or relying on assumptions about how many cards have been
> tested.
> 
> If you can restructure this so the "wait for link up" loop looks like
> all the other drivers, and the DLF issue is separated out and just
> causes a retry of the "wait for link up", I think that will help a
> lot.
As per Thierry Reding's suggestion, I'll be using a goto statement to avoid
recursion and that should simplify things here.

> 
>>>> +static void tegra_pcie_dw_scan_bus(struct pcie_port *pp)
>>>> +{
>>>> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
>>>> +	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
>>>> +	u32 speed;
>>>> +
>>>> +	if (!tegra_pcie_dw_link_up(pci))
>>>> +		return;
>>>> +
>>>> +	speed = (dw_pcie_readw_dbi(pci, CFG_LINK_STATUS) & PCI_EXP_LNKSTA_CLS);
>>>> +	clk_set_rate(pcie->core_clk, pcie_gen_freq[speed - 1]);
>>>
>>> I don't understand what's happening here.  This is named
>>> tegra_pcie_dw_scan_bus(), but it doesn't actually scan anything.
>>> Maybe it's just a bad name for the dw_pcie_host_ops hook
>>> (ks_pcie_v3_65_scan_bus() is the only other .scan_bus()
>>> implementation, and it doesn't scan anything either).
>>>
>>> dw_pcie_host_init() calls pci_scan_root_bus_bridge(), which actually
>>> *does* scan the bus, but it does it before calling
>>> pp->ops->scan_bus().  I'd say by the time we get to
>>> pci_scan_root_bus_bridge(), the device-specific init should be all
>>> done and we should be using only generic PCI core interfaces.
>>>
>>> Maybe this stuff could/should be done in the ->host_init() hook?  The
>>> code between ->host_init() and ->scan_bus() is all generic code with
>>> no device-specific stuff, so I don't know why we need both hooks.
>> Agree. At least whatever I'm going here as part of scan_bus can be moved to
>> host_init() itslef. I'm not sure about the original intention of the scan_bus
>> but my understanding is that, after PCIe sub-system enumerates all devices, if
>> something needs to be done, then, probably scan_bus() can be implemented for that.
>> I had some other code initially which was accessing downstream devices, hence I
>> implemented scan_bus(), but, now, given all that is gone, I can move this code to
>> host_init() itself.
> 
> That'd be perfect.  I suspect ks_pcie_v3_65_scan_bus() is left over
> from before the generic PCI core scan interface, and it could probably
> be moved to host_init() as well.
I think so.

> 
>>>> +static void tegra_pcie_downstream_dev_to_D0(struct tegra_pcie_dw *pcie)
>>>> +{
>>>> +	struct pci_dev *pdev = NULL;
>>>
>>> Unnecessary initialization.
>> Done.
>>
>>>> +	struct pci_bus *child;
>>>> +	struct pcie_port *pp = &pcie->pci.pp;
>>>> +
>>>> +	list_for_each_entry(child, &pp->bus->children, node) {
>>>> +		/* Bring downstream devices to D0 if they are not already in */
>>>> +		if (child->parent == pp->bus) {
>>>> +			pdev = pci_get_slot(child, PCI_DEVFN(0, 0));
>>>> +			pci_dev_put(pdev);
>>>> +			if (!pdev)
>>>> +				break;
>>>
>>> I don't really like this dance with iterating over the bus children,
>>> comparing parent to pp->bus, pci_get_slot(), pci_dev_put(), etc.
>>>
>>> I guess the idea is to bring only the directly-downstream devices to
>>> D0, not to do it for things deeper in the hierarchy?
>> Yes.
>>
>>> Is this some Tegra-specific wrinkle?  I don't think other drivers do
>>> this.
>> With Tegra PCIe controller, if the downstream device is in non-D0 states,
>> link doesn't go into L2 state. We observed this behavior with some of the
>> devices and the solution would be to bring them to D0 state and then attempt
>> sending PME_TurnOff message to put the link to L2 state.
>> Since spec also supports this mechanism (Rev.4.0 Ver.1.0 Page #428), we chose
>> to implement this.
> 
> Sounds like a Tegra oddity that should be documented as such so it
> doesn't get copied elsewhere.
I'll add a comment explicitly to state the same.

> 
>>> I see that an earlier patch added "bus" to struct pcie_port.  I think
>>> it would be better to somehow connect to the pci_host_bridge struct.
>>> Several other drivers already do this; see uses of
>>> pci_host_bridge_from_priv().
>> All non-DesignWare based implementations save their private data structure
>> in 'private' pointer of struct pci_host_bridge and use pci_host_bridge_from_priv()
>> to get it back. But, DesignWare based implementations save pcie_port in 'sysdata'
>> and nothing in 'private' pointer. So,  I'm not sure if pci_host_bridge_from_priv()
>> can be used in this case. Please do let me know if you think otherwise.
> 
> DesignWare-based drivers should have a way to retrieve the
> pci_host_bridge pointer.  It doesn't have to be *exactly* the
> same as non-DesignWare drivers, but it should be similar.
I gave my reasoning as to why with the current code, it is not possible to get the
pci_host_bridge structure pointer from struct pcie_port pointer in another thread as
a reply to Thierry Reding's comments. Since Jishen'g changes to support remove functionality
are accepted, I think using bus pointer saved in struct pcie_port pointer shouldn't be
any issue now. Please do let me know if that is something not acceptable.

> 
>>> That would give you the bus, as well as flags like no_ext_tags,
>>> native_aer, etc, which this driver, being a host bridge driver that's
>>> responsible for this part of the firmware/OS interface, may
>>> conceivably need.
> 
>>>> +static int tegra_pcie_dw_runtime_suspend(struct device *dev)
>>>> +{
>>>> +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
>>>> +
>>>> +	tegra_pcie_downstream_dev_to_D0(pcie);
>>>> +
>>>> +	pci_stop_root_bus(pcie->pci.pp.bus);
>>>> +	pci_remove_root_bus(pcie->pci.pp.bus);
>>>
>>> Why are you calling these?  No other drivers do this except in their
>>> .remove() methods.  Is there something special about Tegra, or is this
>>> something the other drivers *should* be doing?
>> Since this API is called by remove, I'm removing the hierarchy to safely
>> bring down all the devices. I'll have to re-visit this part as
>> Jisheng Zhang's patches https://patchwork.kernel.org/project/linux-pci/list/?series=98559
>> are now approved and I need to verify this part after cherry-picking
>> Jisheng's changes.
> 
> Tegra194 should do this the same way as other drivers, independent of
> Jisheng's changes.
When other Designware implementations add remove functionality, even they should
be calling these APIs (Jisheng also mentioned the same in his commit message)

> 
> Bjorn
>
Bjorn Helgaas April 3, 2019, 5:36 p.m. UTC | #8
On Wed, Apr 03, 2019 at 03:13:09PM +0530, Vidya Sagar wrote:
> On 4/3/2019 12:01 AM, Bjorn Helgaas wrote:
> > On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
> > > On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
> > > > On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
> > > > > Add support for Synopsys DesignWare core IP based PCIe host controller
> > > > > present in Tegra194 SoC.
> > 
> >    - Why does this chip require pcie_pme_disable_msi()?  The only other
> >      use is a DMI quirk for "MSI Wind U-100", added by c39fae1416d5
> >      ("PCI PM: Make it possible to force using INTx for PCIe PME
> >      signaling").
>
> Because Tegra194 doesn't support raising PME interrupts through MSI line.

What does the spec say about this?  Is hardware supposed to support
MSI for PME?  Given that MSI Wind U-100 and Tegra194 are the only two
cases we know about where PME via MSI isn't supported, it seems like
there must be either a requirement for that or some mechanism for the
OS to figure this out, e.g., a capability bit.

> > > > I see that an earlier patch added "bus" to struct pcie_port.
> > > > I think it would be better to somehow connect to the
> > > > pci_host_bridge struct.  Several other drivers already do
> > > > this; see uses of pci_host_bridge_from_priv().
> > >
> > > All non-DesignWare based implementations save their private data
> > > structure in 'private' pointer of struct pci_host_bridge and use
> > > pci_host_bridge_from_priv() to get it back. But, DesignWare
> > > based implementations save pcie_port in 'sysdata' and nothing in
> > > 'private' pointer. So,  I'm not sure if
> > > pci_host_bridge_from_priv() can be used in this case. Please do
> > > let me know if you think otherwise.
> > 
> > DesignWare-based drivers should have a way to retrieve the
> > pci_host_bridge pointer.  It doesn't have to be *exactly* the same
> > as non-DesignWare drivers, but it should be similar.
>
> I gave my reasoning as to why with the current code, it is not
> possible to get the pci_host_bridge structure pointer from struct
> pcie_port pointer in another thread as a reply to Thierry Reding's
> comments. Since Jishen'g changes to support remove functionality are
> accepted, I think using bus pointer saved in struct pcie_port
> pointer shouldn't be any issue now. Please do let me know if that is
> something not acceptable.
>
> > > > That would give you the bus, as well as flags like
> > > > no_ext_tags, native_aer, etc, which this driver, being a host
> > > > bridge driver that's responsible for this part of the
> > > > firmware/OS interface, may conceivably need.

I think saving the pp->root_bus pointer as Jisheng's patch does is a
sub-optimal solution.  If we figure out how to save the
pci_host_bridge pointer, we automatically get the root bus pointer as
well.

It may require some restructuring to save the pci_host_bridge pointer,
but I doubt it's really *impossible*.

> > > > > +static int tegra_pcie_dw_runtime_suspend(struct device *dev)
> > > > > +{
> > > > > +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
> > > > > +
> > > > > +	tegra_pcie_downstream_dev_to_D0(pcie);
> > > > > +
> > > > > +	pci_stop_root_bus(pcie->pci.pp.bus);
> > > > > +	pci_remove_root_bus(pcie->pci.pp.bus);
> > > > 
> > > > Why are you calling these?  No other drivers do this except in
> > > > their .remove() methods.  Is there something special about
> > > > Tegra, or is this something the other drivers *should* be
> > > > doing?
> > >
> > > Since this API is called by remove, I'm removing the hierarchy
> > > to safely bring down all the devices. I'll have to re-visit this
> > > part as Jisheng Zhang's patches
> > > https://patchwork.kernel.org/project/linux-pci/list/?series=98559
> > > are now approved and I need to verify this part after
> > > cherry-picking Jisheng's changes.
> > 
> > Tegra194 should do this the same way as other drivers, independent
> > of Jisheng's changes.
>
> When other Designware implementations add remove functionality, even
> they should be calling these APIs (Jisheng also mentioned the same
> in his commit message)

My point is that these APIs should be called from driver .remove()
methods, not from .runtime_suspend() methods.

Bjorn
Vidya Sagar April 4, 2019, 7:53 p.m. UTC | #9
On 4/3/2019 11:06 PM, Bjorn Helgaas wrote:
> On Wed, Apr 03, 2019 at 03:13:09PM +0530, Vidya Sagar wrote:
>> On 4/3/2019 12:01 AM, Bjorn Helgaas wrote:
>>> On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
>>>> On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
>>>>> On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
>>>>>> Add support for Synopsys DesignWare core IP based PCIe host controller
>>>>>> present in Tegra194 SoC.
>>>
>>>     - Why does this chip require pcie_pme_disable_msi()?  The only other
>>>       use is a DMI quirk for "MSI Wind U-100", added by c39fae1416d5
>>>       ("PCI PM: Make it possible to force using INTx for PCIe PME
>>>       signaling").
>>
>> Because Tegra194 doesn't support raising PME interrupts through MSI line.
> 
> What does the spec say about this?  Is hardware supposed to support
> MSI for PME?  Given that MSI Wind U-100 and Tegra194 are the only two
> cases we know about where PME via MSI isn't supported, it seems like
> there must be either a requirement for that or some mechanism for the
> OS to figure this out, e.g., a capability bit.
AFAIU, Spec doesn't say anything about whether PME interrupt should be through MSI
or by other means. As far as Tegra194 is concerned, there are only two
interrupt lanes that go from PCIe IP to GIC, one being legacy interrupt (that
represents legacy interrupts coming over PCIe bus from downstream devices and
also the events happening internal to root port) and the other being MSI
interrupt (that represents MSI interrupts coming over PCIe bus from downstream
devices). Since hardware folks had a choice to route PME interrupts either
through legacy interrupt line or through MSI interrupt line and legacy interrupt
line was chosen as a design choice. That being the case at hardware level, I tried
to inform the same through pcie_pme_disable_msi() to PCIe sub-system that
PME interrupts are not expected over MSI.

> 
>>>>> I see that an earlier patch added "bus" to struct pcie_port.
>>>>> I think it would be better to somehow connect to the
>>>>> pci_host_bridge struct.  Several other drivers already do
>>>>> this; see uses of pci_host_bridge_from_priv().
>>>>
>>>> All non-DesignWare based implementations save their private data
>>>> structure in 'private' pointer of struct pci_host_bridge and use
>>>> pci_host_bridge_from_priv() to get it back. But, DesignWare
>>>> based implementations save pcie_port in 'sysdata' and nothing in
>>>> 'private' pointer. So,  I'm not sure if
>>>> pci_host_bridge_from_priv() can be used in this case. Please do
>>>> let me know if you think otherwise.
>>>
>>> DesignWare-based drivers should have a way to retrieve the
>>> pci_host_bridge pointer.  It doesn't have to be *exactly* the same
>>> as non-DesignWare drivers, but it should be similar.
>>
>> I gave my reasoning as to why with the current code, it is not
>> possible to get the pci_host_bridge structure pointer from struct
>> pcie_port pointer in another thread as a reply to Thierry Reding's
>> comments. Since Jishen'g changes to support remove functionality are
>> accepted, I think using bus pointer saved in struct pcie_port
>> pointer shouldn't be any issue now. Please do let me know if that is
>> something not acceptable.
>>
>>>>> That would give you the bus, as well as flags like
>>>>> no_ext_tags, native_aer, etc, which this driver, being a host
>>>>> bridge driver that's responsible for this part of the
>>>>> firmware/OS interface, may conceivably need.
> 
> I think saving the pp->root_bus pointer as Jisheng's patch does is a
> sub-optimal solution.  If we figure out how to save the
> pci_host_bridge pointer, we automatically get the root bus pointer as
> well.
> 
> It may require some restructuring to save the pci_host_bridge pointer,
> but I doubt it's really *impossible*.
Is it OK to save pci_host_bridge pointer in pcie_port structure directly? I see
that as another way to get pci_host_bridge pointer from pcie_port
structure. My understanding is that, to get pci_host_bridge pointer, either
pcie_port structure should be part of pci_host_bridge structure (which is the
case with all non-DW implementations) or pcie_port should have a pointer to
some structure which is directly (and not by means of a pointer) part of
pci_host_bridge structure so that container_of() can be used to get pci_host_bridge.
As I see, there is no data object of pci_host_bridge whose pointer is saved in
pcie_port structure. In fact, in reverse, pcie_port's struct dev pointer is saved
as parent to pci_host_bridge's struct dev. So, another way would be to iterate over
the children of pcie_port's struct dev pointer to find pci_host_bridge's dev pointer
and from there get pci_host_bridge through container_of. But, I think is complicating
it more than using bus pointer from pcie_port. I'm not sure if I'm able to convey the
issue I'm facing here to get pci_host_bridge from pcie_port, but doing any other thing seems complicating it even more.

> 
>>>>>> +static int tegra_pcie_dw_runtime_suspend(struct device *dev)
>>>>>> +{
>>>>>> +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
>>>>>> +
>>>>>> +	tegra_pcie_downstream_dev_to_D0(pcie);
>>>>>> +
>>>>>> +	pci_stop_root_bus(pcie->pci.pp.bus);
>>>>>> +	pci_remove_root_bus(pcie->pci.pp.bus);
>>>>>
>>>>> Why are you calling these?  No other drivers do this except in
>>>>> their .remove() methods.  Is there something special about
>>>>> Tegra, or is this something the other drivers *should* be
>>>>> doing?
>>>>
>>>> Since this API is called by remove, I'm removing the hierarchy
>>>> to safely bring down all the devices. I'll have to re-visit this
>>>> part as Jisheng Zhang's patches
>>>> https://patchwork.kernel.org/project/linux-pci/list/?series=98559
>>>> are now approved and I need to verify this part after
>>>> cherry-picking Jisheng's changes.
>>>
>>> Tegra194 should do this the same way as other drivers, independent
>>> of Jisheng's changes.
>>
>> When other Designware implementations add remove functionality, even
>> they should be calling these APIs (Jisheng also mentioned the same
>> in his commit message)
> 
> My point is that these APIs should be called from driver .remove()
> methods, not from .runtime_suspend() methods.
.remove() internally calls pm_runtime_put_sync() API which calls
.runtime_suspend(). I made a new patch to add a host_deinit() call
which make all these calls. Since host_init() is called from inside
.runtime_resume() of this driver, to be in sync, I'm now calling
host_deinit() from inside .runtime_suspend() API.
  
> 
> Bjorn
>
Bjorn Helgaas April 5, 2019, 6:58 p.m. UTC | #10
On Fri, Apr 05, 2019 at 01:23:51AM +0530, Vidya Sagar wrote:
> On 4/3/2019 11:06 PM, Bjorn Helgaas wrote:
> > On Wed, Apr 03, 2019 at 03:13:09PM +0530, Vidya Sagar wrote:
> > > On 4/3/2019 12:01 AM, Bjorn Helgaas wrote:
> > > > On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
> > > > > On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
> > > > > > On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
> > > > > > > Add support for Synopsys DesignWare core IP based PCIe host controller
> > > > > > > present in Tegra194 SoC.
> > > > 
> > > >     - Why does this chip require pcie_pme_disable_msi()?  The only other
> > > >       use is a DMI quirk for "MSI Wind U-100", added by c39fae1416d5
> > > >       ("PCI PM: Make it possible to force using INTx for PCIe PME
> > > >       signaling").
> > > 
> > > Because Tegra194 doesn't support raising PME interrupts through MSI line.
> > 
> > What does the spec say about this?  Is hardware supposed to
> > support MSI for PME?  Given that MSI Wind U-100 and Tegra194 are
> > the only two cases we know about where PME via MSI isn't
> > supported, it seems like there must be either a requirement for
> > that or some mechanism for the OS to figure this out, e.g., a
> > capability bit.
>
> AFAIU, Spec doesn't say anything about whether PME interrupt should
> be through MSI or by other means. As far as Tegra194 is concerned,
> there are only two interrupt lanes that go from PCIe IP to GIC, one
> being legacy interrupt (that represents legacy interrupts coming
> over PCIe bus from downstream devices and also the events happening
> internal to root port) and the other being MSI interrupt (that
> represents MSI interrupts coming over PCIe bus from downstream
> devices). Since hardware folks had a choice to route PME interrupts
> either through legacy interrupt line or through MSI interrupt line
> and legacy interrupt line was chosen as a design choice. That being
> the case at hardware level, I tried to inform the same through
> pcie_pme_disable_msi() to PCIe sub-system that PME interrupts are
> not expected over MSI.

There's something wrong here.  Either the question of how PME is
signaled is generic and the spec provides a way for the OS to discover
that method, or it's part of the device-specific architecture that
each host bridge driver has to know about its device.  If the former,
we need to make the PCI core smart enough to figure it out.  If the
latter, we need a better interface than this ad hoc
pcie_pme_disable_msi() thing.  But if it is truly the latter, your
current code is sufficient and we can refine it over time.

> > > > > > I see that an earlier patch added "bus" to struct pcie_port.
> > > > > > I think it would be better to somehow connect to the
> > > > > > pci_host_bridge struct.  Several other drivers already do
> > > > > > this; see uses of pci_host_bridge_from_priv().
> > > > > 
> > > > > All non-DesignWare based implementations save their private data
> > > > > structure in 'private' pointer of struct pci_host_bridge and use
> > > > > pci_host_bridge_from_priv() to get it back. But, DesignWare
> > > > > based implementations save pcie_port in 'sysdata' and nothing in
> > > > > 'private' pointer. So,  I'm not sure if
> > > > > pci_host_bridge_from_priv() can be used in this case. Please do
> > > > > let me know if you think otherwise.
> > > > 
> > > > DesignWare-based drivers should have a way to retrieve the
> > > > pci_host_bridge pointer.  It doesn't have to be *exactly* the same
> > > > as non-DesignWare drivers, but it should be similar.
> > > 
> > > I gave my reasoning as to why with the current code, it is not
> > > possible to get the pci_host_bridge structure pointer from struct
> > > pcie_port pointer in another thread as a reply to Thierry Reding's
> > > comments. Since Jishen'g changes to support remove functionality are
> > > accepted, I think using bus pointer saved in struct pcie_port
> > > pointer shouldn't be any issue now. Please do let me know if that is
> > > something not acceptable.
> > > 
> > > > > > That would give you the bus, as well as flags like
> > > > > > no_ext_tags, native_aer, etc, which this driver, being a host
> > > > > > bridge driver that's responsible for this part of the
> > > > > > firmware/OS interface, may conceivably need.
> > 
> > I think saving the pp->root_bus pointer as Jisheng's patch does is a
> > sub-optimal solution.  If we figure out how to save the
> > pci_host_bridge pointer, we automatically get the root bus pointer as
> > well.
> > 
> > It may require some restructuring to save the pci_host_bridge pointer,
> > but I doubt it's really *impossible*.
>
> Is it OK to save pci_host_bridge pointer in pcie_port structure
> directly? I see that as another way to get pci_host_bridge pointer
> from pcie_port structure. My understanding is that, to get
> pci_host_bridge pointer, either pcie_port structure should be part
> of pci_host_bridge structure (which is the case with all non-DW
> implementations) or pcie_port should have a pointer to some
> structure which is directly (and not by means of a pointer) part of
> pci_host_bridge structure so that container_of() can be used to get
> pci_host_bridge.  As I see, there is no data object of
> pci_host_bridge whose pointer is saved in pcie_port structure. In
> fact, in reverse, pcie_port's struct dev pointer is saved as parent
> to pci_host_bridge's struct dev. So, another way would be to iterate
> over the children of pcie_port's struct dev pointer to find
> pci_host_bridge's dev pointer and from there get pci_host_bridge
> through container_of. But, I think is complicating it more than
> using bus pointer from pcie_port. I'm not sure if I'm able to convey
> the issue I'm facing here to get pci_host_bridge from pcie_port, but
> doing any other thing seems complicating it even more.

What I suspect should happen eventually is the DWC driver should call
devm_pci_alloc_host_bridge() directly, as all the non-DWC drivers do.
That would require a little reorganization of the DWC data structures,
but it would be good to be more consistent.

For now, I think stashing the pointer in pcie_port or dw_pcie would be
OK.  I'm not 100% clear on the difference, but it looks like either
should be common across all the DWC drivers, which is what we want.

(Tangent, dw_pcie_host_init() currently uses pci_alloc_host_bridge(),
not devm_pci_alloc_host_bridge(), even though it uses other devm
interfaces.  This looks like a probable buglet.)

(Tangent 2, devm_pci_alloc_host_bridge() doesn't initialize
bridge->native_aer, etc, as pci_alloc_host_bridge() does.  This looks
like my fault from 02bfeb484230 ("PCI/portdrv: Simplify PCIe feature
permission checking"), and it probably means none of those PCIe
services work correctly for these native host bridge drivers.)

> > > > > > > +static int tegra_pcie_dw_runtime_suspend(struct device *dev)
> > > > > > > +{
> > > > > > > +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
> > > > > > > +
> > > > > > > +	tegra_pcie_downstream_dev_to_D0(pcie);
> > > > > > > +
> > > > > > > +	pci_stop_root_bus(pcie->pci.pp.bus);
> > > > > > > +	pci_remove_root_bus(pcie->pci.pp.bus);
> > > > > > 
> > > > > > Why are you calling these?  No other drivers do this except in
> > > > > > their .remove() methods.  Is there something special about
> > > > > > Tegra, or is this something the other drivers *should* be
> > > > > > doing?
> > > > > 
> > > > > Since this API is called by remove, I'm removing the hierarchy
> > > > > to safely bring down all the devices. I'll have to re-visit this
> > > > > part as Jisheng Zhang's patches
> > > > > https://patchwork.kernel.org/project/linux-pci/list/?series=98559
> > > > > are now approved and I need to verify this part after
> > > > > cherry-picking Jisheng's changes.
> > > > 
> > > > Tegra194 should do this the same way as other drivers, independent
> > > > of Jisheng's changes.
> > > 
> > > When other Designware implementations add remove functionality, even
> > > they should be calling these APIs (Jisheng also mentioned the same
> > > in his commit message)
> > 
> > My point is that these APIs should be called from driver .remove()
> > methods, not from .runtime_suspend() methods.
>
> .remove() internally calls pm_runtime_put_sync() API which calls
> .runtime_suspend(). I made a new patch to add a host_deinit() call
> which make all these calls. Since host_init() is called from inside
> .runtime_resume() of this driver, to be in sync, I'm now calling
> host_deinit() from inside .runtime_suspend() API.

I think this is wrong.  pci_stop_root_bus() will detach all the
drivers from all the devices.  We don't want to do that if we're
merely runtime suspending the host bridge, do we?

Bjorn
Vidya Sagar April 9, 2019, 11:30 a.m. UTC | #11
On 4/6/2019 12:28 AM, Bjorn Helgaas wrote:
> On Fri, Apr 05, 2019 at 01:23:51AM +0530, Vidya Sagar wrote:
>> On 4/3/2019 11:06 PM, Bjorn Helgaas wrote:
>>> On Wed, Apr 03, 2019 at 03:13:09PM +0530, Vidya Sagar wrote:
>>>> On 4/3/2019 12:01 AM, Bjorn Helgaas wrote:
>>>>> On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
>>>>>> On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
>>>>>>> On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
>>>>>>>> Add support for Synopsys DesignWare core IP based PCIe host controller
>>>>>>>> present in Tegra194 SoC.
>>>>>
>>>>>      - Why does this chip require pcie_pme_disable_msi()?  The only other
>>>>>        use is a DMI quirk for "MSI Wind U-100", added by c39fae1416d5
>>>>>        ("PCI PM: Make it possible to force using INTx for PCIe PME
>>>>>        signaling").
>>>>
>>>> Because Tegra194 doesn't support raising PME interrupts through MSI line.
>>>
>>> What does the spec say about this?  Is hardware supposed to
>>> support MSI for PME?  Given that MSI Wind U-100 and Tegra194 are
>>> the only two cases we know about where PME via MSI isn't
>>> supported, it seems like there must be either a requirement for
>>> that or some mechanism for the OS to figure this out, e.g., a
>>> capability bit.
>>
>> AFAIU, Spec doesn't say anything about whether PME interrupt should
>> be through MSI or by other means. As far as Tegra194 is concerned,
>> there are only two interrupt lanes that go from PCIe IP to GIC, one
>> being legacy interrupt (that represents legacy interrupts coming
>> over PCIe bus from downstream devices and also the events happening
>> internal to root port) and the other being MSI interrupt (that
>> represents MSI interrupts coming over PCIe bus from downstream
>> devices). Since hardware folks had a choice to route PME interrupts
>> either through legacy interrupt line or through MSI interrupt line
>> and legacy interrupt line was chosen as a design choice. That being
>> the case at hardware level, I tried to inform the same through
>> pcie_pme_disable_msi() to PCIe sub-system that PME interrupts are
>> not expected over MSI.
> 
> There's something wrong here.  Either the question of how PME is
> signaled is generic and the spec provides a way for the OS to discover
> that method, or it's part of the device-specific architecture that
> each host bridge driver has to know about its device.  If the former,
> we need to make the PCI core smart enough to figure it out.  If the
> latter, we need a better interface than this ad hoc
> pcie_pme_disable_msi() thing.  But if it is truly the latter, your
> current code is sufficient and we can refine it over time.
In case of Tegra194, it is the latter case.

> 
>>>>>>> I see that an earlier patch added "bus" to struct pcie_port.
>>>>>>> I think it would be better to somehow connect to the
>>>>>>> pci_host_bridge struct.  Several other drivers already do
>>>>>>> this; see uses of pci_host_bridge_from_priv().
>>>>>>
>>>>>> All non-DesignWare based implementations save their private data
>>>>>> structure in 'private' pointer of struct pci_host_bridge and use
>>>>>> pci_host_bridge_from_priv() to get it back. But, DesignWare
>>>>>> based implementations save pcie_port in 'sysdata' and nothing in
>>>>>> 'private' pointer. So,  I'm not sure if
>>>>>> pci_host_bridge_from_priv() can be used in this case. Please do
>>>>>> let me know if you think otherwise.
>>>>>
>>>>> DesignWare-based drivers should have a way to retrieve the
>>>>> pci_host_bridge pointer.  It doesn't have to be *exactly* the same
>>>>> as non-DesignWare drivers, but it should be similar.
>>>>
>>>> I gave my reasoning as to why with the current code, it is not
>>>> possible to get the pci_host_bridge structure pointer from struct
>>>> pcie_port pointer in another thread as a reply to Thierry Reding's
>>>> comments. Since Jishen'g changes to support remove functionality are
>>>> accepted, I think using bus pointer saved in struct pcie_port
>>>> pointer shouldn't be any issue now. Please do let me know if that is
>>>> something not acceptable.
>>>>
>>>>>>> That would give you the bus, as well as flags like
>>>>>>> no_ext_tags, native_aer, etc, which this driver, being a host
>>>>>>> bridge driver that's responsible for this part of the
>>>>>>> firmware/OS interface, may conceivably need.
>>>
>>> I think saving the pp->root_bus pointer as Jisheng's patch does is a
>>> sub-optimal solution.  If we figure out how to save the
>>> pci_host_bridge pointer, we automatically get the root bus pointer as
>>> well.
>>>
>>> It may require some restructuring to save the pci_host_bridge pointer,
>>> but I doubt it's really *impossible*.
>>
>> Is it OK to save pci_host_bridge pointer in pcie_port structure
>> directly? I see that as another way to get pci_host_bridge pointer
>> from pcie_port structure. My understanding is that, to get
>> pci_host_bridge pointer, either pcie_port structure should be part
>> of pci_host_bridge structure (which is the case with all non-DW
>> implementations) or pcie_port should have a pointer to some
>> structure which is directly (and not by means of a pointer) part of
>> pci_host_bridge structure so that container_of() can be used to get
>> pci_host_bridge.  As I see, there is no data object of
>> pci_host_bridge whose pointer is saved in pcie_port structure. In
>> fact, in reverse, pcie_port's struct dev pointer is saved as parent
>> to pci_host_bridge's struct dev. So, another way would be to iterate
>> over the children of pcie_port's struct dev pointer to find
>> pci_host_bridge's dev pointer and from there get pci_host_bridge
>> through container_of. But, I think is complicating it more than
>> using bus pointer from pcie_port. I'm not sure if I'm able to convey
>> the issue I'm facing here to get pci_host_bridge from pcie_port, but
>> doing any other thing seems complicating it even more.
> 
> What I suspect should happen eventually is the DWC driver should call
> devm_pci_alloc_host_bridge() directly, as all the non-DWC drivers do.
> That would require a little reorganization of the DWC data structures,
> but it would be good to be more consistent.
> 
> For now, I think stashing the pointer in pcie_port or dw_pcie would be
> OK.  I'm not 100% clear on the difference, but it looks like either
> should be common across all the DWC drivers, which is what we want.
Since dw_pcie is common for both root port and end point mode structures,
I think it makes sense to keep the pointer in pcie_port as this structure
is specific to root port mode of operation.
I'll make a note to reorganize code to have devm_pci_alloc_host_bridge()
used in the beginning itself to be inline with non-DWC implementations.
But, I'll take it up later (after I'm done with upstreaming current series)

> 
> (Tangent, dw_pcie_host_init() currently uses pci_alloc_host_bridge(),
> not devm_pci_alloc_host_bridge(), even though it uses other devm
> interfaces.  This looks like a probable buglet.)
> 
> (Tangent 2, devm_pci_alloc_host_bridge() doesn't initialize
> bridge->native_aer, etc, as pci_alloc_host_bridge() does.  This looks
> like my fault from 02bfeb484230 ("PCI/portdrv: Simplify PCIe feature
> permission checking"), and it probably means none of those PCIe
> services work correctly for these native host bridge drivers.)
> 
>>>>>>>> +static int tegra_pcie_dw_runtime_suspend(struct device *dev)
>>>>>>>> +{
>>>>>>>> +	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
>>>>>>>> +
>>>>>>>> +	tegra_pcie_downstream_dev_to_D0(pcie);
>>>>>>>> +
>>>>>>>> +	pci_stop_root_bus(pcie->pci.pp.bus);
>>>>>>>> +	pci_remove_root_bus(pcie->pci.pp.bus);
>>>>>>>
>>>>>>> Why are you calling these?  No other drivers do this except in
>>>>>>> their .remove() methods.  Is there something special about
>>>>>>> Tegra, or is this something the other drivers *should* be
>>>>>>> doing?
>>>>>>
>>>>>> Since this API is called by remove, I'm removing the hierarchy
>>>>>> to safely bring down all the devices. I'll have to re-visit this
>>>>>> part as Jisheng Zhang's patches
>>>>>> https://patchwork.kernel.org/project/linux-pci/list/?series=98559
>>>>>> are now approved and I need to verify this part after
>>>>>> cherry-picking Jisheng's changes.
>>>>>
>>>>> Tegra194 should do this the same way as other drivers, independent
>>>>> of Jisheng's changes.
>>>>
>>>> When other Designware implementations add remove functionality, even
>>>> they should be calling these APIs (Jisheng also mentioned the same
>>>> in his commit message)
>>>
>>> My point is that these APIs should be called from driver .remove()
>>> methods, not from .runtime_suspend() methods.
>>
>> .remove() internally calls pm_runtime_put_sync() API which calls
>> .runtime_suspend(). I made a new patch to add a host_deinit() call
>> which make all these calls. Since host_init() is called from inside
>> .runtime_resume() of this driver, to be in sync, I'm now calling
>> host_deinit() from inside .runtime_suspend() API.
> 
> I think this is wrong.  pci_stop_root_bus() will detach all the
> drivers from all the devices.  We don't want to do that if we're
> merely runtime suspending the host bridge, do we?
In the current driver, the scenarios in which .runtime_suspend() is called
are
a) during .remove() call and
b) when there is no endpoint found and controller would be shutdown
In both cases, it is required to stop the root bus and remove all devices,
so, instead of having same call present in respective paths, I kept them
in .runtime_suspend() itself to avoid code duplication.

> 
> Bjorn
>
Bjorn Helgaas April 9, 2019, 1:26 p.m. UTC | #12
On Tue, Apr 09, 2019 at 05:00:53PM +0530, Vidya Sagar wrote:
> On 4/6/2019 12:28 AM, Bjorn Helgaas wrote:
> > On Fri, Apr 05, 2019 at 01:23:51AM +0530, Vidya Sagar wrote:
> > > On 4/3/2019 11:06 PM, Bjorn Helgaas wrote:
> > > > On Wed, Apr 03, 2019 at 03:13:09PM +0530, Vidya Sagar wrote:
> > > > > On 4/3/2019 12:01 AM, Bjorn Helgaas wrote:
> > > > > > On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
> > > > > > > On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
> > > > > > > > On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
> > > > > > > > > Add support for Synopsys DesignWare core IP based PCIe host controller
> > > > > > > > > present in Tegra194 SoC.
> > > > > > 
> > > > > >      - Why does this chip require pcie_pme_disable_msi()?  The only other
> > > > > >        use is a DMI quirk for "MSI Wind U-100", added by c39fae1416d5
> > > > > >        ("PCI PM: Make it possible to force using INTx for PCIe PME
> > > > > >        signaling").
> > > > > 
> > > > > Because Tegra194 doesn't support raising PME interrupts through MSI line.

> > There's something wrong here.  Either the question of how PME is
> > signaled is generic and the spec provides a way for the OS to discover
> > that method, or it's part of the device-specific architecture that
> > each host bridge driver has to know about its device.  If the former,
> > we need to make the PCI core smart enough to figure it out.  If the
> > latter, we need a better interface than this ad hoc
> > pcie_pme_disable_msi() thing.  But if it is truly the latter, your
> > current code is sufficient and we can refine it over time.
>
> In case of Tegra194, it is the latter case.

This isn't a Tegra194 question; it's a question of whether this
behavior is covered by the PCIe spec.

> > What I suspect should happen eventually is the DWC driver should call
> > devm_pci_alloc_host_bridge() directly, as all the non-DWC drivers do.
> > That would require a little reorganization of the DWC data structures,
> > but it would be good to be more consistent.
> > 
> > For now, I think stashing the pointer in pcie_port or dw_pcie would be
> > OK.  I'm not 100% clear on the difference, but it looks like either
> > should be common across all the DWC drivers, which is what we want.
>
> Since dw_pcie is common for both root port and end point mode structures,
> I think it makes sense to keep the pointer in pcie_port as this structure
> is specific to root port mode of operation.
> I'll make a note to reorganize code to have devm_pci_alloc_host_bridge()
> used in the beginning itself to be inline with non-DWC implementations.
> But, I'll take it up later (after I'm done with upstreaming current series)

Fair enough.

> > > .remove() internally calls pm_runtime_put_sync() API which calls
> > > .runtime_suspend(). I made a new patch to add a host_deinit() call
> > > which make all these calls. Since host_init() is called from inside
> > > .runtime_resume() of this driver, to be in sync, I'm now calling
> > > host_deinit() from inside .runtime_suspend() API.
> > 
> > I think this is wrong.  pci_stop_root_bus() will detach all the
> > drivers from all the devices.  We don't want to do that if we're
> > merely runtime suspending the host bridge, do we?
>
> In the current driver, the scenarios in which .runtime_suspend() is called
> are
> a) during .remove() call and

It makes sense that you should call pci_stop_root_bus() during
.remove(), i.e., when the host controller driver is being removed,
because the PCI bus will no longer be accessible.  I think you should
call it *directly* from tegra_pcie_dw_remove() because that will match
what other drivers do.

> b) when there is no endpoint found and controller would be shutdown
> In both cases, it is required to stop the root bus and remove all devices,
> so, instead of having same call present in respective paths, I kept them
> in .runtime_suspend() itself to avoid code duplication.

I don't understand this part.  We should be able to runtime suspend
the host controller without detaching drivers for child devices.

If you shutdown the controller completely and detach the *host
controller driver*, sure, it makes sense to detach drivers from child
devices.  But that would be handled by the host controller .remove()
method, not by the runtime suspend method.

Bjorn
Vidya Sagar April 10, 2019, 6:10 a.m. UTC | #13
On 4/9/2019 6:56 PM, Bjorn Helgaas wrote:
> On Tue, Apr 09, 2019 at 05:00:53PM +0530, Vidya Sagar wrote:
>> On 4/6/2019 12:28 AM, Bjorn Helgaas wrote:
>>> On Fri, Apr 05, 2019 at 01:23:51AM +0530, Vidya Sagar wrote:
>>>> On 4/3/2019 11:06 PM, Bjorn Helgaas wrote:
>>>>> On Wed, Apr 03, 2019 at 03:13:09PM +0530, Vidya Sagar wrote:
>>>>>> On 4/3/2019 12:01 AM, Bjorn Helgaas wrote:
>>>>>>> On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
>>>>>>>> On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
>>>>>>>>> On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
>>>>>>>>>> Add support for Synopsys DesignWare core IP based PCIe host controller
>>>>>>>>>> present in Tegra194 SoC.
>>>>>>>
>>>>>>>       - Why does this chip require pcie_pme_disable_msi()?  The only other
>>>>>>>         use is a DMI quirk for "MSI Wind U-100", added by c39fae1416d5
>>>>>>>         ("PCI PM: Make it possible to force using INTx for PCIe PME
>>>>>>>         signaling").
>>>>>>
>>>>>> Because Tegra194 doesn't support raising PME interrupts through MSI line.
> 
>>> There's something wrong here.  Either the question of how PME is
>>> signaled is generic and the spec provides a way for the OS to discover
>>> that method, or it's part of the device-specific architecture that
>>> each host bridge driver has to know about its device.  If the former,
>>> we need to make the PCI core smart enough to figure it out.  If the
>>> latter, we need a better interface than this ad hoc
>>> pcie_pme_disable_msi() thing.  But if it is truly the latter, your
>>> current code is sufficient and we can refine it over time.
>>
>> In case of Tegra194, it is the latter case.
> 
> This isn't a Tegra194 question; it's a question of whether this
> behavior is covered by the PCIe spec.
AFAIU the spec and what I heard from Nvidia hardware folks is that spec doesn't
explicitly talk about this and it was a design choice made by Nvidia hardware
folks to route these interrupts through legacy line instead of MSI line.

> 
>>> What I suspect should happen eventually is the DWC driver should call
>>> devm_pci_alloc_host_bridge() directly, as all the non-DWC drivers do.
>>> That would require a little reorganization of the DWC data structures,
>>> but it would be good to be more consistent.
>>>
>>> For now, I think stashing the pointer in pcie_port or dw_pcie would be
>>> OK.  I'm not 100% clear on the difference, but it looks like either
>>> should be common across all the DWC drivers, which is what we want.
>>
>> Since dw_pcie is common for both root port and end point mode structures,
>> I think it makes sense to keep the pointer in pcie_port as this structure
>> is specific to root port mode of operation.
>> I'll make a note to reorganize code to have devm_pci_alloc_host_bridge()
>> used in the beginning itself to be inline with non-DWC implementations.
>> But, I'll take it up later (after I'm done with upstreaming current series)
> 
> Fair enough.
> 
>>>> .remove() internally calls pm_runtime_put_sync() API which calls
>>>> .runtime_suspend(). I made a new patch to add a host_deinit() call
>>>> which make all these calls. Since host_init() is called from inside
>>>> .runtime_resume() of this driver, to be in sync, I'm now calling
>>>> host_deinit() from inside .runtime_suspend() API.
>>>
>>> I think this is wrong.  pci_stop_root_bus() will detach all the
>>> drivers from all the devices.  We don't want to do that if we're
>>> merely runtime suspending the host bridge, do we?
>>
>> In the current driver, the scenarios in which .runtime_suspend() is called
>> are
>> a) during .remove() call and
> 
> It makes sense that you should call pci_stop_root_bus() during
> .remove(), i.e., when the host controller driver is being removed,
> because the PCI bus will no longer be accessible.  I think you should
> call it *directly* from tegra_pcie_dw_remove() because that will match
> what other drivers do.
> 
>> b) when there is no endpoint found and controller would be shutdown
>> In both cases, it is required to stop the root bus and remove all devices,
>> so, instead of having same call present in respective paths, I kept them
>> in .runtime_suspend() itself to avoid code duplication.
> 
> I don't understand this part.  We should be able to runtime suspend
> the host controller without detaching drivers for child devices.
> 
> If you shutdown the controller completely and detach the *host
> controller driver*, sure, it makes sense to detach drivers from child
> devices.  But that would be handled by the host controller .remove()
> method, not by the runtime suspend method.
I think it is time I give some background about why I chose to implement
.runtime_suspend() and .runtime_resume() APIs in the first place. We wanted to
powerdown PCIe controller if there is no link up (i.e. slot is open and no endpoint
devices are connected). We want to achieve this without returning a failure in
.probe() call. Given PCIe IP power partitioning is handled by generic power domain
framework, power partition gets unpowergated before .probe() gets called and gets
powergated either when a failure is returned in .probe() or when pm_runtime_put_sync()
is called. So, I chose to call pm_runtime_put_sync() in no-link-up scenario and chose
to implement .runtime_suspend() to handle all the cleanup work before PCIe partition
getting powergated. In fact, to match this, I'm doing all the PCIe IP bring up
activity in .runtime_resume() implementation which gets invoked by pm_runtime_get_sync()
which in turn is called in .probe() path. In fact the very dw_pcie_host_init() itself
is called from .runtime_resume() implementation. So, it is because of these reasons that
I called pci_stop_root_bus() and pci_remove_root_bus() as part of .runtime_suspend()
implementation as pm_runtime_put_sync() is called from both .remove() and also during
no-link-up scenario. Please do let me know if there is a better way to do this.

> 
> Bjorn
>
Liviu Dudau April 10, 2019, 8:14 a.m. UTC | #14
On Wed, Apr 10, 2019 at 11:40:40AM +0530, Vidya Sagar wrote:
> On 4/9/2019 6:56 PM, Bjorn Helgaas wrote:
> > On Tue, Apr 09, 2019 at 05:00:53PM +0530, Vidya Sagar wrote:
> > > On 4/6/2019 12:28 AM, Bjorn Helgaas wrote:
> > > > On Fri, Apr 05, 2019 at 01:23:51AM +0530, Vidya Sagar wrote:
> > > > > On 4/3/2019 11:06 PM, Bjorn Helgaas wrote:
> > > > > > On Wed, Apr 03, 2019 at 03:13:09PM +0530, Vidya Sagar wrote:
> > > > > > > On 4/3/2019 12:01 AM, Bjorn Helgaas wrote:
> > > > > > > > On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
> > > > > > > > > On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
> > > > > > > > > > On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
> > > > > > > > > > > Add support for Synopsys DesignWare core IP based PCIe host controller
> > > > > > > > > > > present in Tegra194 SoC.
> > > > > > > > 
> > > > > > > >       - Why does this chip require pcie_pme_disable_msi()?  The only other
> > > > > > > >         use is a DMI quirk for "MSI Wind U-100", added by c39fae1416d5
> > > > > > > >         ("PCI PM: Make it possible to force using INTx for PCIe PME
> > > > > > > >         signaling").
> > > > > > > 
> > > > > > > Because Tegra194 doesn't support raising PME interrupts through MSI line.
> > 
> > > > There's something wrong here.  Either the question of how PME is
> > > > signaled is generic and the spec provides a way for the OS to discover
> > > > that method, or it's part of the device-specific architecture that
> > > > each host bridge driver has to know about its device.  If the former,
> > > > we need to make the PCI core smart enough to figure it out.  If the
> > > > latter, we need a better interface than this ad hoc
> > > > pcie_pme_disable_msi() thing.  But if it is truly the latter, your
> > > > current code is sufficient and we can refine it over time.
> > > 
> > > In case of Tegra194, it is the latter case.
> > 
> > This isn't a Tegra194 question; it's a question of whether this
> > behavior is covered by the PCIe spec.
> AFAIU the spec and what I heard from Nvidia hardware folks is that spec doesn't
> explicitly talk about this and it was a design choice made by Nvidia hardware
> folks to route these interrupts through legacy line instead of MSI line.
> 
> > 
> > > > What I suspect should happen eventually is the DWC driver should call
> > > > devm_pci_alloc_host_bridge() directly, as all the non-DWC drivers do.
> > > > That would require a little reorganization of the DWC data structures,
> > > > but it would be good to be more consistent.
> > > > 
> > > > For now, I think stashing the pointer in pcie_port or dw_pcie would be
> > > > OK.  I'm not 100% clear on the difference, but it looks like either
> > > > should be common across all the DWC drivers, which is what we want.
> > > 
> > > Since dw_pcie is common for both root port and end point mode structures,
> > > I think it makes sense to keep the pointer in pcie_port as this structure
> > > is specific to root port mode of operation.
> > > I'll make a note to reorganize code to have devm_pci_alloc_host_bridge()
> > > used in the beginning itself to be inline with non-DWC implementations.
> > > But, I'll take it up later (after I'm done with upstreaming current series)
> > 
> > Fair enough.
> > 
> > > > > .remove() internally calls pm_runtime_put_sync() API which calls
> > > > > .runtime_suspend(). I made a new patch to add a host_deinit() call
> > > > > which make all these calls. Since host_init() is called from inside
> > > > > .runtime_resume() of this driver, to be in sync, I'm now calling
> > > > > host_deinit() from inside .runtime_suspend() API.
> > > > 
> > > > I think this is wrong.  pci_stop_root_bus() will detach all the
> > > > drivers from all the devices.  We don't want to do that if we're
> > > > merely runtime suspending the host bridge, do we?
> > > 
> > > In the current driver, the scenarios in which .runtime_suspend() is called
> > > are
> > > a) during .remove() call and
> > 
> > It makes sense that you should call pci_stop_root_bus() during
> > .remove(), i.e., when the host controller driver is being removed,
> > because the PCI bus will no longer be accessible.  I think you should
> > call it *directly* from tegra_pcie_dw_remove() because that will match
> > what other drivers do.
> > 
> > > b) when there is no endpoint found and controller would be shutdown
> > > In both cases, it is required to stop the root bus and remove all devices,
> > > so, instead of having same call present in respective paths, I kept them
> > > in .runtime_suspend() itself to avoid code duplication.
> > 
> > I don't understand this part.  We should be able to runtime suspend
> > the host controller without detaching drivers for child devices.
> > 
> > If you shutdown the controller completely and detach the *host
> > controller driver*, sure, it makes sense to detach drivers from child
> > devices.  But that would be handled by the host controller .remove()
> > method, not by the runtime suspend method.
> I think it is time I give some background about why I chose to implement
> .runtime_suspend() and .runtime_resume() APIs in the first place. We wanted to
> powerdown PCIe controller if there is no link up (i.e. slot is open and no endpoint
> devices are connected). We want to achieve this without returning a failure in
> .probe() call. Given PCIe IP power partitioning is handled by generic power domain
> framework, power partition gets unpowergated before .probe() gets called and gets
> powergated either when a failure is returned in .probe() or when pm_runtime_put_sync()
> is called. So, I chose to call pm_runtime_put_sync() in no-link-up scenario and chose
> to implement .runtime_suspend() to handle all the cleanup work before PCIe partition
> getting powergated. In fact, to match this, I'm doing all the PCIe IP bring up
> activity in .runtime_resume() implementation which gets invoked by pm_runtime_get_sync()
> which in turn is called in .probe() path. In fact the very dw_pcie_host_init() itself
> is called from .runtime_resume() implementation. So, it is because of these reasons that
> I called pci_stop_root_bus() and pci_remove_root_bus() as part of .runtime_suspend()
> implementation as pm_runtime_put_sync() is called from both .remove() and also during
> no-link-up scenario. Please do let me know if there is a better way to do this.

I think you're missing the case where .runtime_suspend() is called when
there are no *active* devices on the bus, i.e. everyone is dormant. It
doesn't mean that you need to remove them from the bus and re-probe them
back on .runtime_resume(). Most of the drivers for PCI devices don't
expect to be removed during idle, as they will configure the hardware to
be in a "quick wake" state (see PCIe Dx power states).

You should probe and configure the bus during .probe() and remove and
detach all drivers during .remove(). For .runtime_suspend() all you need
to do is put the host controller in low power mode if it has one, or
stop all clocks that are not required for responding to devices waking
up from PCIe Dx state. For .runtime_resume() you then restore the
clocks, without re-scanning the bus.

Best regards,
Liviu


> 
> > 
> > Bjorn
> > 
>
Vidya Sagar April 10, 2019, 9:53 a.m. UTC | #15
On 4/10/2019 1:44 PM, Liviu Dudau wrote:
> On Wed, Apr 10, 2019 at 11:40:40AM +0530, Vidya Sagar wrote:
>> On 4/9/2019 6:56 PM, Bjorn Helgaas wrote:
>>> On Tue, Apr 09, 2019 at 05:00:53PM +0530, Vidya Sagar wrote:
>>>> On 4/6/2019 12:28 AM, Bjorn Helgaas wrote:
>>>>> On Fri, Apr 05, 2019 at 01:23:51AM +0530, Vidya Sagar wrote:
>>>>>> On 4/3/2019 11:06 PM, Bjorn Helgaas wrote:
>>>>>>> On Wed, Apr 03, 2019 at 03:13:09PM +0530, Vidya Sagar wrote:
>>>>>>>> On 4/3/2019 12:01 AM, Bjorn Helgaas wrote:
>>>>>>>>> On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
>>>>>>>>>> On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
>>>>>>>>>>> On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
>>>>>>>>>>>> Add support for Synopsys DesignWare core IP based PCIe host controller
>>>>>>>>>>>> present in Tegra194 SoC.
>>>>>>>>>
>>>>>>>>>        - Why does this chip require pcie_pme_disable_msi()?  The only other
>>>>>>>>>          use is a DMI quirk for "MSI Wind U-100", added by c39fae1416d5
>>>>>>>>>          ("PCI PM: Make it possible to force using INTx for PCIe PME
>>>>>>>>>          signaling").
>>>>>>>>
>>>>>>>> Because Tegra194 doesn't support raising PME interrupts through MSI line.
>>>
>>>>> There's something wrong here.  Either the question of how PME is
>>>>> signaled is generic and the spec provides a way for the OS to discover
>>>>> that method, or it's part of the device-specific architecture that
>>>>> each host bridge driver has to know about its device.  If the former,
>>>>> we need to make the PCI core smart enough to figure it out.  If the
>>>>> latter, we need a better interface than this ad hoc
>>>>> pcie_pme_disable_msi() thing.  But if it is truly the latter, your
>>>>> current code is sufficient and we can refine it over time.
>>>>
>>>> In case of Tegra194, it is the latter case.
>>>
>>> This isn't a Tegra194 question; it's a question of whether this
>>> behavior is covered by the PCIe spec.
>> AFAIU the spec and what I heard from Nvidia hardware folks is that spec doesn't
>> explicitly talk about this and it was a design choice made by Nvidia hardware
>> folks to route these interrupts through legacy line instead of MSI line.
>>
>>>
>>>>> What I suspect should happen eventually is the DWC driver should call
>>>>> devm_pci_alloc_host_bridge() directly, as all the non-DWC drivers do.
>>>>> That would require a little reorganization of the DWC data structures,
>>>>> but it would be good to be more consistent.
>>>>>
>>>>> For now, I think stashing the pointer in pcie_port or dw_pcie would be
>>>>> OK.  I'm not 100% clear on the difference, but it looks like either
>>>>> should be common across all the DWC drivers, which is what we want.
>>>>
>>>> Since dw_pcie is common for both root port and end point mode structures,
>>>> I think it makes sense to keep the pointer in pcie_port as this structure
>>>> is specific to root port mode of operation.
>>>> I'll make a note to reorganize code to have devm_pci_alloc_host_bridge()
>>>> used in the beginning itself to be inline with non-DWC implementations.
>>>> But, I'll take it up later (after I'm done with upstreaming current series)
>>>
>>> Fair enough.
>>>
>>>>>> .remove() internally calls pm_runtime_put_sync() API which calls
>>>>>> .runtime_suspend(). I made a new patch to add a host_deinit() call
>>>>>> which make all these calls. Since host_init() is called from inside
>>>>>> .runtime_resume() of this driver, to be in sync, I'm now calling
>>>>>> host_deinit() from inside .runtime_suspend() API.
>>>>>
>>>>> I think this is wrong.  pci_stop_root_bus() will detach all the
>>>>> drivers from all the devices.  We don't want to do that if we're
>>>>> merely runtime suspending the host bridge, do we?
>>>>
>>>> In the current driver, the scenarios in which .runtime_suspend() is called
>>>> are
>>>> a) during .remove() call and
>>>
>>> It makes sense that you should call pci_stop_root_bus() during
>>> .remove(), i.e., when the host controller driver is being removed,
>>> because the PCI bus will no longer be accessible.  I think you should
>>> call it *directly* from tegra_pcie_dw_remove() because that will match
>>> what other drivers do.
>>>
>>>> b) when there is no endpoint found and controller would be shutdown
>>>> In both cases, it is required to stop the root bus and remove all devices,
>>>> so, instead of having same call present in respective paths, I kept them
>>>> in .runtime_suspend() itself to avoid code duplication.
>>>
>>> I don't understand this part.  We should be able to runtime suspend
>>> the host controller without detaching drivers for child devices.
>>>
>>> If you shutdown the controller completely and detach the *host
>>> controller driver*, sure, it makes sense to detach drivers from child
>>> devices.  But that would be handled by the host controller .remove()
>>> method, not by the runtime suspend method.
>> I think it is time I give some background about why I chose to implement
>> .runtime_suspend() and .runtime_resume() APIs in the first place. We wanted to
>> powerdown PCIe controller if there is no link up (i.e. slot is open and no endpoint
>> devices are connected). We want to achieve this without returning a failure in
>> .probe() call. Given PCIe IP power partitioning is handled by generic power domain
>> framework, power partition gets unpowergated before .probe() gets called and gets
>> powergated either when a failure is returned in .probe() or when pm_runtime_put_sync()
>> is called. So, I chose to call pm_runtime_put_sync() in no-link-up scenario and chose
>> to implement .runtime_suspend() to handle all the cleanup work before PCIe partition
>> getting powergated. In fact, to match this, I'm doing all the PCIe IP bring up
>> activity in .runtime_resume() implementation which gets invoked by pm_runtime_get_sync()
>> which in turn is called in .probe() path. In fact the very dw_pcie_host_init() itself
>> is called from .runtime_resume() implementation. So, it is because of these reasons that
>> I called pci_stop_root_bus() and pci_remove_root_bus() as part of .runtime_suspend()
>> implementation as pm_runtime_put_sync() is called from both .remove() and also during
>> no-link-up scenario. Please do let me know if there is a better way to do this.
> 
> I think you're missing the case where .runtime_suspend() is called when
> there are no *active* devices on the bus, i.e. everyone is dormant. It
> doesn't mean that you need to remove them from the bus and re-probe them
> back on .runtime_resume(). Most of the drivers for PCI devices don't
> expect to be removed during idle, as they will configure the hardware to
> be in a "quick wake" state (see PCIe Dx power states).
> 
> You should probe and configure the bus during .probe() and remove and
> detach all drivers during .remove(). For .runtime_suspend() all you need
> to do is put the host controller in low power mode if it has one, or
> stop all clocks that are not required for responding to devices waking
> up from PCIe Dx state. For .runtime_resume() you then restore the
> clocks, without re-scanning the bus.
Since this is a host controller driver and the device as such is sitting on platform
bus instead of PCIe bus, is it still the case that .runtime_suspend() and
.runtime_resume() of this driver get called when devices on PCIe bus are idle?

Having asked that, I start to feel what I'm doing as part of .runtime_suspend() and
.runtime_resume() doesn't really justify these API names. Since I know where I'm
calling pm_runtime_get/put_sync() APIs (which eventually call .runtime_resume/suspend())
I should probably move the content of these APIs before calling pm_runtime_get/put_sync().
Do you agree with that?

> 
> Best regards,
> Liviu
> 
> 
>>
>>>
>>> Bjorn
>>>
>>
>
Liviu Dudau April 10, 2019, 11:35 a.m. UTC | #16
On Wed, Apr 10, 2019 at 03:23:39PM +0530, Vidya Sagar wrote:
> On 4/10/2019 1:44 PM, Liviu Dudau wrote:
> > On Wed, Apr 10, 2019 at 11:40:40AM +0530, Vidya Sagar wrote:
> > > On 4/9/2019 6:56 PM, Bjorn Helgaas wrote:
> > > > On Tue, Apr 09, 2019 at 05:00:53PM +0530, Vidya Sagar wrote:
> > > > > On 4/6/2019 12:28 AM, Bjorn Helgaas wrote:
> > > > > > On Fri, Apr 05, 2019 at 01:23:51AM +0530, Vidya Sagar wrote:
> > > > > > > On 4/3/2019 11:06 PM, Bjorn Helgaas wrote:
> > > > > > > > On Wed, Apr 03, 2019 at 03:13:09PM +0530, Vidya Sagar wrote:
> > > > > > > > > On 4/3/2019 12:01 AM, Bjorn Helgaas wrote:
> > > > > > > > > > On Tue, Apr 02, 2019 at 12:47:48PM +0530, Vidya Sagar wrote:
> > > > > > > > > > > On 3/30/2019 2:22 AM, Bjorn Helgaas wrote:
> > > > > > > > > > > > On Tue, Mar 26, 2019 at 08:43:26PM +0530, Vidya Sagar wrote:
> > > > > > > > > > > > > Add support for Synopsys DesignWare core IP based PCIe host controller
> > > > > > > > > > > > > present in Tegra194 SoC.
> > > > > > > > > > 
> > > > > > > > > >        - Why does this chip require pcie_pme_disable_msi()?  The only other
> > > > > > > > > >          use is a DMI quirk for "MSI Wind U-100", added by c39fae1416d5
> > > > > > > > > >          ("PCI PM: Make it possible to force using INTx for PCIe PME
> > > > > > > > > >          signaling").
> > > > > > > > > 
> > > > > > > > > Because Tegra194 doesn't support raising PME interrupts through MSI line.
> > > > 
> > > > > > There's something wrong here.  Either the question of how PME is
> > > > > > signaled is generic and the spec provides a way for the OS to discover
> > > > > > that method, or it's part of the device-specific architecture that
> > > > > > each host bridge driver has to know about its device.  If the former,
> > > > > > we need to make the PCI core smart enough to figure it out.  If the
> > > > > > latter, we need a better interface than this ad hoc
> > > > > > pcie_pme_disable_msi() thing.  But if it is truly the latter, your
> > > > > > current code is sufficient and we can refine it over time.
> > > > > 
> > > > > In case of Tegra194, it is the latter case.
> > > > 
> > > > This isn't a Tegra194 question; it's a question of whether this
> > > > behavior is covered by the PCIe spec.
> > > AFAIU the spec and what I heard from Nvidia hardware folks is that spec doesn't
> > > explicitly talk about this and it was a design choice made by Nvidia hardware
> > > folks to route these interrupts through legacy line instead of MSI line.
> > > 
> > > > 
> > > > > > What I suspect should happen eventually is the DWC driver should call
> > > > > > devm_pci_alloc_host_bridge() directly, as all the non-DWC drivers do.
> > > > > > That would require a little reorganization of the DWC data structures,
> > > > > > but it would be good to be more consistent.
> > > > > > 
> > > > > > For now, I think stashing the pointer in pcie_port or dw_pcie would be
> > > > > > OK.  I'm not 100% clear on the difference, but it looks like either
> > > > > > should be common across all the DWC drivers, which is what we want.
> > > > > 
> > > > > Since dw_pcie is common for both root port and end point mode structures,
> > > > > I think it makes sense to keep the pointer in pcie_port as this structure
> > > > > is specific to root port mode of operation.
> > > > > I'll make a note to reorganize code to have devm_pci_alloc_host_bridge()
> > > > > used in the beginning itself to be inline with non-DWC implementations.
> > > > > But, I'll take it up later (after I'm done with upstreaming current series)
> > > > 
> > > > Fair enough.
> > > > 
> > > > > > > .remove() internally calls pm_runtime_put_sync() API which calls
> > > > > > > .runtime_suspend(). I made a new patch to add a host_deinit() call
> > > > > > > which make all these calls. Since host_init() is called from inside
> > > > > > > .runtime_resume() of this driver, to be in sync, I'm now calling
> > > > > > > host_deinit() from inside .runtime_suspend() API.
> > > > > > 
> > > > > > I think this is wrong.  pci_stop_root_bus() will detach all the
> > > > > > drivers from all the devices.  We don't want to do that if we're
> > > > > > merely runtime suspending the host bridge, do we?
> > > > > 
> > > > > In the current driver, the scenarios in which .runtime_suspend() is called
> > > > > are
> > > > > a) during .remove() call and
> > > > 
> > > > It makes sense that you should call pci_stop_root_bus() during
> > > > .remove(), i.e., when the host controller driver is being removed,
> > > > because the PCI bus will no longer be accessible.  I think you should
> > > > call it *directly* from tegra_pcie_dw_remove() because that will match
> > > > what other drivers do.
> > > > 
> > > > > b) when there is no endpoint found and controller would be shutdown
> > > > > In both cases, it is required to stop the root bus and remove all devices,
> > > > > so, instead of having same call present in respective paths, I kept them
> > > > > in .runtime_suspend() itself to avoid code duplication.
> > > > 
> > > > I don't understand this part.  We should be able to runtime suspend
> > > > the host controller without detaching drivers for child devices.
> > > > 
> > > > If you shutdown the controller completely and detach the *host
> > > > controller driver*, sure, it makes sense to detach drivers from child
> > > > devices.  But that would be handled by the host controller .remove()
> > > > method, not by the runtime suspend method.
> > > I think it is time I give some background about why I chose to implement
> > > .runtime_suspend() and .runtime_resume() APIs in the first place. We wanted to
> > > powerdown PCIe controller if there is no link up (i.e. slot is open and no endpoint
> > > devices are connected). We want to achieve this without returning a failure in
> > > .probe() call. Given PCIe IP power partitioning is handled by generic power domain
> > > framework, power partition gets unpowergated before .probe() gets called and gets
> > > powergated either when a failure is returned in .probe() or when pm_runtime_put_sync()
> > > is called. So, I chose to call pm_runtime_put_sync() in no-link-up scenario and chose
> > > to implement .runtime_suspend() to handle all the cleanup work before PCIe partition
> > > getting powergated. In fact, to match this, I'm doing all the PCIe IP bring up
> > > activity in .runtime_resume() implementation which gets invoked by pm_runtime_get_sync()
> > > which in turn is called in .probe() path. In fact the very dw_pcie_host_init() itself
> > > is called from .runtime_resume() implementation. So, it is because of these reasons that
> > > I called pci_stop_root_bus() and pci_remove_root_bus() as part of .runtime_suspend()
> > > implementation as pm_runtime_put_sync() is called from both .remove() and also during
> > > no-link-up scenario. Please do let me know if there is a better way to do this.
> > 
> > I think you're missing the case where .runtime_suspend() is called when
> > there are no *active* devices on the bus, i.e. everyone is dormant. It
> > doesn't mean that you need to remove them from the bus and re-probe them
> > back on .runtime_resume(). Most of the drivers for PCI devices don't
> > expect to be removed during idle, as they will configure the hardware to
> > be in a "quick wake" state (see PCIe Dx power states).
> > 
> > You should probe and configure the bus during .probe() and remove and
> > detach all drivers during .remove(). For .runtime_suspend() all you need
> > to do is put the host controller in low power mode if it has one, or
> > stop all clocks that are not required for responding to devices waking
> > up from PCIe Dx state. For .runtime_resume() you then restore the
> > clocks, without re-scanning the bus.
> Since this is a host controller driver and the device as such is sitting on platform
> bus instead of PCIe bus, is it still the case that .runtime_suspend() and
> .runtime_resume() of this driver get called when devices on PCIe bus are idle?

The functions will be called when the device is determined to be idle,
i.e. when there are no PM references being held by other drivers.

Think of it the other way: even if the device is sitting on the platform
bus for configuration reasons, you don't want to turn it off when the
PCIe bus needs to be active, right?


> 
> Having asked that, I start to feel what I'm doing as part of .runtime_suspend() and
> .runtime_resume() doesn't really justify these API names. Since I know where I'm
> calling pm_runtime_get/put_sync() APIs (which eventually call .runtime_resume/suspend())
> I should probably move the content of these APIs before calling pm_runtime_get/put_sync().
> Do you agree with that?

Yeah, I think you are right. Also, I believe there are pm_runtime_get()
calls in the pci framework as well, you need to audit the code, I
haven't looked at it in a while.

Best regards,
Liviu

> 
> > 
> > Best regards,
> > Liviu
> > 
> > 
> > > 
> > > > 
> > > > Bjorn
> > > > 
> > > 
> > 
>
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
index 6ea74b1c0d94..d80f2d77892a 100644
--- a/drivers/pci/controller/dwc/Kconfig
+++ b/drivers/pci/controller/dwc/Kconfig
@@ -213,4 +213,14 @@  config PCIE_UNIPHIER
 	  Say Y here if you want PCIe controller support on UniPhier SoCs.
 	  This driver supports LD20 and PXs3 SoCs.
 
+config PCIE_TEGRA194
+	bool "NVIDIA Tegra (T194) PCIe controller"
+	depends on TEGRA_BPMP && (ARCH_TEGRA || COMPILE_TEST)
+	depends on PCI_MSI_IRQ_DOMAIN
+	select PCIE_DW_HOST
+	select PHY_TEGRA194_PCIE_P2U
+	help
+	  Say Y here if you want support for DesignWare core based PCIe host
+	  controller found in NVIDIA Tegra T194 SoC.
+
 endmenu
diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
index b5f3b83cc2b3..4362f0ea89ac 100644
--- a/drivers/pci/controller/dwc/Makefile
+++ b/drivers/pci/controller/dwc/Makefile
@@ -16,6 +16,7 @@  obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
 obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
 obj-$(CONFIG_PCI_MESON) += pci-meson.o
 obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
+obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
 
 # The following drivers are for devices that use the generic ACPI
 # pci_root.c driver but don't support standard ECAM config access.
diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
new file mode 100644
index 000000000000..7f6be38c8456
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-tegra194.c
@@ -0,0 +1,1862 @@ 
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCIe host controller driver for Tegra T194 SoC
+ *
+ * Copyright (C) 2018 NVIDIA Corporation.
+ *
+ * Author: Vidya Sagar <vidyas@nvidia.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/kfifo.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/pci-aspm.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/random.h>
+#include <linux/reset.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+#include "pcie-designware.h"
+#include <soc/tegra/bpmp.h>
+#include <soc/tegra/bpmp-abi.h>
+#include "../../pcie/portdrv.h"
+
+#define dw_pcie_to_tegra_pcie(x) container_of(x, struct tegra_pcie_dw, pci)
+
+#define CTRL_5	5
+
+#define APPL_PINMUX				0x0
+#define APPL_PINMUX_PEX_RST			BIT(0)
+#define APPL_PINMUX_CLKREQ_OVERRIDE_EN		BIT(2)
+#define APPL_PINMUX_CLKREQ_OVERRIDE		BIT(3)
+#define APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE_EN	BIT(4)
+#define APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE	BIT(5)
+#define APPL_PINMUX_CLKREQ_OUT_OVRD_EN		BIT(9)
+#define APPL_PINMUX_CLKREQ_OUT_OVRD		BIT(10)
+
+#define APPL_CTRL				0x4
+#define APPL_CTRL_SYS_PRE_DET_STATE		BIT(6)
+#define APPL_CTRL_LTSSM_EN			BIT(7)
+#define APPL_CTRL_HW_HOT_RST_EN			BIT(20)
+#define APPL_CTRL_HW_HOT_RST_MODE_MASK		GENMASK(1, 0)
+#define APPL_CTRL_HW_HOT_RST_MODE_SHIFT		22
+#define APPL_CTRL_HW_HOT_RST_MODE_IMDT_RST	0x1
+
+#define APPL_INTR_EN_L0_0			0x8
+#define APPL_INTR_EN_L0_0_LINK_STATE_INT_EN	BIT(0)
+#define APPL_INTR_EN_L0_0_MSI_RCV_INT_EN	BIT(4)
+#define APPL_INTR_EN_L0_0_INT_INT_EN		BIT(8)
+#define APPL_INTR_EN_L0_0_CDM_REG_CHK_INT_EN	BIT(19)
+#define APPL_INTR_EN_L0_0_SYS_INTR_EN		BIT(30)
+#define APPL_INTR_EN_L0_0_SYS_MSI_INTR_EN	BIT(31)
+
+#define APPL_INTR_STATUS_L0			0xC
+#define APPL_INTR_STATUS_L0_LINK_STATE_INT	BIT(0)
+#define APPL_INTR_STATUS_L0_INT_INT		BIT(8)
+#define APPL_INTR_STATUS_L0_CDM_REG_CHK_INT	BIT(18)
+
+#define APPL_INTR_EN_L1_0_0				0x1C
+#define APPL_INTR_EN_L1_0_0_LINK_REQ_RST_NOT_INT_EN	BIT(1)
+
+#define APPL_INTR_STATUS_L1_0_0				0x20
+#define APPL_INTR_STATUS_L1_0_0_LINK_REQ_RST_NOT_CHGED	BIT(1)
+
+#define APPL_INTR_STATUS_L1_1			0x2C
+#define APPL_INTR_STATUS_L1_2			0x30
+#define APPL_INTR_STATUS_L1_3			0x34
+#define APPL_INTR_STATUS_L1_6			0x3C
+#define APPL_INTR_STATUS_L1_7			0x40
+
+#define APPL_INTR_EN_L1_8_0			0x44
+#define APPL_INTR_EN_L1_8_BW_MGT_INT_EN		BIT(2)
+#define APPL_INTR_EN_L1_8_AUTO_BW_INT_EN	BIT(3)
+#define APPL_INTR_EN_L1_8_INTX_EN		BIT(11)
+#define APPL_INTR_EN_L1_8_AER_INT_EN		BIT(15)
+
+#define APPL_INTR_STATUS_L1_8_0			0x4C
+#define APPL_INTR_STATUS_L1_8_0_EDMA_INT_MASK	GENMASK(11, 6)
+#define APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS	BIT(2)
+#define APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS	BIT(3)
+
+#define APPL_INTR_STATUS_L1_9			0x54
+#define APPL_INTR_STATUS_L1_10			0x58
+#define APPL_INTR_STATUS_L1_11			0x64
+#define APPL_INTR_STATUS_L1_13			0x74
+#define APPL_INTR_STATUS_L1_14			0x78
+#define APPL_INTR_STATUS_L1_15			0x7C
+#define APPL_INTR_STATUS_L1_17			0x88
+
+#define APPL_INTR_EN_L1_18				0x90
+#define APPL_INTR_EN_L1_18_CDM_REG_CHK_CMPLT		BIT(2)
+#define APPL_INTR_EN_L1_18_CDM_REG_CHK_CMP_ERR		BIT(1)
+#define APPL_INTR_EN_L1_18_CDM_REG_CHK_LOGIC_ERR	BIT(0)
+
+#define APPL_INTR_STATUS_L1_18				0x94
+#define APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMPLT	BIT(2)
+#define APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMP_ERR	BIT(1)
+#define APPL_INTR_STATUS_L1_18_CDM_REG_CHK_LOGIC_ERR	BIT(0)
+
+#define APPL_MSI_CTRL_2				0xB0
+
+#define APPL_LTR_MSG_1				0xC4
+#define LTR_MSG_REQ				BIT(15)
+#define LTR_MST_NO_SNOOP_SHIFT			16
+
+#define APPL_LTR_MSG_2				0xC8
+#define APPL_LTR_MSG_2_LTR_MSG_REQ_STATE	BIT(3)
+
+#define APPL_LINK_STATUS			0xCC
+#define APPL_LINK_STATUS_RDLH_LINK_UP		BIT(0)
+
+#define APPL_DEBUG				0xD0
+#define APPL_DEBUG_PM_LINKST_IN_L2_LAT		BIT(21)
+#define APPL_DEBUG_PM_LINKST_IN_L0		0x11
+#define APPL_DEBUG_LTSSM_STATE_MASK		GENMASK(8, 3)
+#define APPL_DEBUG_LTSSM_STATE_SHIFT		3
+#define LTSSM_STATE_PRE_DETECT			5
+
+#define APPL_RADM_STATUS			0xE4
+#define APPL_PM_XMT_TURNOFF_STATE		BIT(0)
+
+#define APPL_DM_TYPE				0x100
+#define APPL_DM_TYPE_MASK			GENMASK(3, 0)
+#define APPL_DM_TYPE_RP				0x4
+#define APPL_DM_TYPE_EP				0x0
+
+#define APPL_CFG_BASE_ADDR			0x104
+#define APPL_CFG_BASE_ADDR_MASK			GENMASK(31, 12)
+
+#define APPL_CFG_IATU_DMA_BASE_ADDR		0x108
+#define APPL_CFG_IATU_DMA_BASE_ADDR_MASK	GENMASK(31, 18)
+
+#define APPL_CFG_MISC				0x110
+#define APPL_CFG_MISC_SLV_EP_MODE		BIT(14)
+#define APPL_CFG_MISC_ARCACHE_MASK		GENMASK(13, 10)
+#define APPL_CFG_MISC_ARCACHE_SHIFT		10
+#define APPL_CFG_MISC_ARCACHE_VAL		3
+
+#define APPL_CFG_SLCG_OVERRIDE			0x114
+#define APPL_CFG_SLCG_OVERRIDE_SLCG_EN_MASTER	BIT(0)
+
+#define APPL_CAR_RESET_OVRD				0x12C
+#define APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N	BIT(0)
+
+#define IO_BASE_IO_DECODE				BIT(0)
+#define IO_BASE_IO_DECODE_BIT8				BIT(8)
+
+#define CFG_PREF_MEM_LIMIT_BASE_MEM_DECODE		BIT(0)
+#define CFG_PREF_MEM_LIMIT_BASE_MEM_LIMIT_DECODE	BIT(16)
+
+#define CFG_LINK_CAP				0x7C
+
+#define CFG_DEV_STATUS_CONTROL			0x78
+#define CFG_DEV_STATUS_CONTROL_MPS_SHIFT	5
+
+#define CFG_LINK_CONTROL		0x80
+
+#define CFG_LINK_STATUS			0x82
+
+#define CFG_LINK_CONTROL_2		0xA0
+
+#define CFG_LINK_STATUS_2		0xA2
+#define CFG_LINK_STATUS_2_PCIE_CAP_EQ_CPL	BIT(17)
+
+#define CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF	0x718
+#define CFG_TIMER_CTRL_ACK_NAK_SHIFT	(19)
+
+#define  PCI_L1SS_CAP_CM_RTM_SHIFT	8	/* Common mode restore mask */
+#define  PCI_L1SS_CAP_PWRN_VAL_SHIFT	19	/* T_POWER_ON val shift */
+
+#define EVENT_COUNTER_ALL_CLEAR		0x3
+#define EVENT_COUNTER_ENABLE_ALL	0x7
+#define EVENT_COUNTER_ENABLE_SHIFT	2
+#define EVENT_COUNTER_EVENT_SEL_MASK	GENMASK(7, 0)
+#define EVENT_COUNTER_EVENT_SEL_SHIFT	16
+#define EVENT_COUNTER_EVENT_Tx_L0S	0x2
+#define EVENT_COUNTER_EVENT_Rx_L0S	0x3
+#define EVENT_COUNTER_EVENT_L1		0x5
+#define EVENT_COUNTER_EVENT_L1_1	0x7
+#define EVENT_COUNTER_EVENT_L1_2	0x8
+#define EVENT_COUNTER_GROUP_SEL_SHIFT	24
+#define EVENT_COUNTER_GROUP_5		0x5
+
+#define DL_FEATURE_EXCHANGE_EN		BIT(31)
+
+#define PORT_LOGIC_ACK_F_ASPM_CTRL			0x70C
+#define ENTER_ASPM					BIT(30)
+#define L0S_ENTRANCE_LAT_SHIFT				24
+#define L0S_ENTRANCE_LAT_MASK				GENMASK(26, 24)
+#define L1_ENTRANCE_LAT_SHIFT				27
+#define L1_ENTRANCE_LAT_MASK				GENMASK(29, 27)
+#define N_FTS_SHIFT					8
+#define N_FTS_MASK					GENMASK(7, 0)
+#define N_FTS_VAL					52
+
+#define PORT_LOGIC_GEN2_CTRL				0x80C
+#define PORT_LOGIC_GEN2_CTRL_DIRECT_SPEED_CHANGE	BIT(17)
+#define FTS_MASK					GENMASK(7, 0)
+#define FTS_VAL						52
+
+#define PORT_LOGIC_MSI_CTRL_INT_0_EN		0x828
+
+#define GEN3_EQ_CONTROL_OFF			0x8a8
+#define GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_SHIFT	8
+#define GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_MASK	GENMASK(23, 8)
+#define GEN3_EQ_CONTROL_OFF_FB_MODE_MASK	GENMASK(3, 0)
+
+#define GEN3_RELATED_OFF			0x890
+#define GEN3_RELATED_OFF_GEN3_ZRXDC_NONCOMPL	BIT(0)
+#define GEN3_RELATED_OFF_GEN3_EQ_DISABLE	BIT(16)
+#define GEN3_RELATED_OFF_RATE_SHADOW_SEL_SHIFT	24
+#define GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK	GENMASK(25, 24)
+
+#define PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT	0x8D0
+#define AMBA_ERROR_RESPONSE_CRS_SHIFT		3
+#define AMBA_ERROR_RESPONSE_CRS_MASK		GENMASK(1, 0)
+#define AMBA_ERROR_RESPONSE_CRS_OKAY		0
+#define AMBA_ERROR_RESPONSE_CRS_OKAY_FFFFFFFF	1
+#define AMBA_ERROR_RESPONSE_CRS_OKAY_FFFF0001	2
+
+#define PORT_LOGIC_MSIX_DOORBELL			0x948
+
+#define PORT_LOGIC_PL_CHK_REG_CONTROL_STATUS		0xB20
+#define PORT_LOGIC_PL_CHK_REG_CHK_REG_START		BIT(0)
+#define PORT_LOGIC_PL_CHK_REG_CHK_REG_CONTINUOUS	BIT(1)
+#define PORT_LOGIC_PL_CHK_REG_CHK_REG_COMPARISON_ERROR	BIT(16)
+#define PORT_LOGIC_PL_CHK_REG_CHK_REG_LOGIC_ERROR	BIT(17)
+#define PORT_LOGIC_PL_CHK_REG_CHK_REG_COMPLETE		BIT(18)
+
+#define PORT_LOGIC_MISC_CONTROL			0x8bc
+#define PORT_LOGIC_MISC_CONTROL_DBI_RO_WR_EN	BIT(0)
+
+#define PORT_LOGIC_PL_CHK_REG_ERR_ADDR		0xB28
+
+#define CAP_SPCIE_CAP_OFF			0x154
+#define CAP_SPCIE_CAP_OFF_DSP_TX_PRESET0_MASK	GENMASK(3, 0)
+#define CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_MASK	GENMASK(11, 8)
+#define CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_SHIFT	8
+
+#define PL16G_CAP_OFF				0x188
+#define PL16G_CAP_OFF_DSP_16G_TX_PRESET_MASK	GENMASK(3, 0)
+#define PL16G_CAP_OFF_USP_16G_TX_PRESET_MASK	GENMASK(7, 4)
+#define PL16G_CAP_OFF_USP_16G_TX_PRESET_SHIFT	4
+
+#define PME_ACK_TIMEOUT 10000
+
+#define LTSSM_TIMEOUT 50000	/* 50ms */
+
+#define GEN3_GEN4_EQ_PRESET_INIT	5
+
+#define GEN1_CORE_CLK_FREQ	62500000
+#define GEN2_CORE_CLK_FREQ	125000000
+#define GEN3_CORE_CLK_FREQ	250000000
+#define GEN4_CORE_CLK_FREQ	500000000
+
+static unsigned int pcie_gen_freq[] = {
+	GEN1_CORE_CLK_FREQ,
+	GEN2_CORE_CLK_FREQ,
+	GEN3_CORE_CLK_FREQ,
+	GEN4_CORE_CLK_FREQ
+};
+
+struct tegra_pcie_dw {
+	struct device		*dev;
+	struct resource		*appl_res;
+	struct resource		*dbi_res;
+	struct resource		*atu_dma_res;
+	void __iomem		*appl_base;
+	struct clk		*core_clk;
+	struct reset_control	*core_apb_rst;
+	struct reset_control	*core_rst;
+	struct dw_pcie		pci;
+	enum dw_pcie_device_mode mode;
+
+	bool disable_clock_request;
+	bool power_down_en;
+	u8 init_link_width;
+	bool link_state;
+	u32 msi_ctrl_int;
+	u32 num_lanes;
+	u32 max_speed;
+	u32 init_speed;
+	bool cdm_check;
+	u32 cid;
+	int pex_wake;
+	bool update_fc_fixup;
+	int n_gpios;
+	int *gpios;
+#if defined(CONFIG_PCIEASPM)
+	u32 cfg_link_cap_l1sub;
+	u32 event_cntr_ctrl;
+	u32 event_cntr_data;
+	u32 aspm_cmrt;
+	u32 aspm_pwr_on_t;
+	u32 aspm_l0s_enter_lat;
+	u32 disabled_aspm_states;
+#endif
+
+	struct regulator	*pex_ctl_reg;
+
+	int			phy_count;
+	struct phy		**phy;
+
+	struct dentry		*debugfs;
+};
+
+struct tegra_pcie_of_data {
+	enum dw_pcie_device_mode mode;
+};
+
+static void apply_bad_link_workaround(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
+	u16 val;
+
+	/*
+	 * NOTE:- Since this scenario is uncommon and link as
+	 * such is not stable anyway, not waiting to confirm
+	 * if link is really transiting to Gen-2 speed
+	 */
+	val = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
+	if (val & PCI_EXP_LNKSTA_LBMS) {
+		if (pcie->init_link_width >
+		    (val & PCI_EXP_LNKSTA_NLW) >> PCI_EXP_LNKSTA_NLW_SHIFT) {
+			dev_warn(pci->dev, "PCIe link is bad, width reduced\n");
+			val = dw_pcie_readw_dbi(pci, CFG_LINK_CONTROL_2);
+			val &= ~PCI_EXP_LNKCTL2_TLS;
+			val |= PCI_EXP_LNKCTL2_TLS_2_5GT;
+			dw_pcie_writew_dbi(pci, CFG_LINK_CONTROL_2, val);
+
+			val = dw_pcie_readw_dbi(pci, CFG_LINK_CONTROL);
+			val |= PCI_EXP_LNKCTL_RL;
+			dw_pcie_writew_dbi(pci, CFG_LINK_CONTROL, val);
+		}
+	}
+}
+
+static irqreturn_t tegra_pcie_rp_irq_handler(struct tegra_pcie_dw *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	struct pcie_port *pp = &pci->pp;
+	u32 val, tmp;
+	u16 val_w;
+
+	val = readl(pcie->appl_base + APPL_INTR_STATUS_L0);
+	dev_dbg(pci->dev, "APPL_INTR_STATUS_L0 = 0x%08X\n", val);
+	if (val & APPL_INTR_STATUS_L0_LINK_STATE_INT) {
+		val = readl(pcie->appl_base + APPL_INTR_STATUS_L1_0_0);
+		dev_dbg(pci->dev, "APPL_INTR_STATUS_L1_0_0 = 0x%08X\n", val);
+		if (val & APPL_INTR_STATUS_L1_0_0_LINK_REQ_RST_NOT_CHGED) {
+			writel(val, pcie->appl_base + APPL_INTR_STATUS_L1_0_0);
+
+			/* SBR & Surprise Link Down WAR */
+			val = readl(pcie->appl_base + APPL_CAR_RESET_OVRD);
+			val &= ~APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N;
+			writel(val, pcie->appl_base + APPL_CAR_RESET_OVRD);
+			udelay(1);
+			val = readl(pcie->appl_base + APPL_CAR_RESET_OVRD);
+			val |= APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N;
+			writel(val, pcie->appl_base + APPL_CAR_RESET_OVRD);
+
+			val = dw_pcie_readl_dbi(pci, PORT_LOGIC_GEN2_CTRL);
+			val |= PORT_LOGIC_GEN2_CTRL_DIRECT_SPEED_CHANGE;
+			dw_pcie_writel_dbi(pci, PORT_LOGIC_GEN2_CTRL, val);
+		}
+	}
+	if (val & APPL_INTR_STATUS_L0_INT_INT) {
+		val = readl(pcie->appl_base + APPL_INTR_STATUS_L1_8_0);
+		dev_dbg(pci->dev, "APPL_INTR_STATUS_L1_8_0 = 0x%08X\n", val);
+		if (val & APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS) {
+			writel(APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS,
+			       pcie->appl_base + APPL_INTR_STATUS_L1_8_0);
+			apply_bad_link_workaround(pp);
+		}
+		if (val & APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS) {
+			writel(APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS,
+			       pcie->appl_base + APPL_INTR_STATUS_L1_8_0);
+
+			val_w = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
+			dev_dbg(pci->dev, "Link Speed : Gen-%u\n", val_w &
+				PCI_EXP_LNKSTA_CLS);
+		}
+	}
+	val = readl(pcie->appl_base + APPL_INTR_STATUS_L0);
+	if (val & APPL_INTR_STATUS_L0_CDM_REG_CHK_INT) {
+		val = readl(pcie->appl_base + APPL_INTR_STATUS_L1_18);
+		tmp = dw_pcie_readl_dbi(pci,
+					PORT_LOGIC_PL_CHK_REG_CONTROL_STATUS);
+		dev_dbg(pci->dev, "APPL_INTR_STATUS_L1_18 = 0x%08X\n", val);
+		if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMPLT) {
+			dev_err(pci->dev, "CDM check complete\n");
+			tmp |= PORT_LOGIC_PL_CHK_REG_CHK_REG_COMPLETE;
+		}
+		if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMP_ERR) {
+			dev_err(pci->dev, "CDM comparison mismatch\n");
+			tmp |= PORT_LOGIC_PL_CHK_REG_CHK_REG_COMPARISON_ERROR;
+		}
+		if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_LOGIC_ERR) {
+			dev_err(pci->dev, "CDM Logic error\n");
+			tmp |= PORT_LOGIC_PL_CHK_REG_CHK_REG_LOGIC_ERROR;
+		}
+		dw_pcie_writel_dbi(pci, PORT_LOGIC_PL_CHK_REG_CONTROL_STATUS,
+				   tmp);
+		tmp = dw_pcie_readl_dbi(pci, PORT_LOGIC_PL_CHK_REG_ERR_ADDR);
+		dev_err(pci->dev, "CDM Error Address Offset = 0x%08X\n", tmp);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t tegra_pcie_irq_handler(int irq, void *arg)
+{
+	struct tegra_pcie_dw *pcie = (struct tegra_pcie_dw *)arg;
+
+	if (pcie->mode == DW_PCIE_RC_TYPE)
+		return tegra_pcie_rp_irq_handler(pcie);
+
+	return IRQ_NONE;
+}
+
+static irqreturn_t tegra_pcie_msi_irq_handler(int irq, void *arg)
+{
+	struct pcie_port *pp = arg;
+
+	return dw_handle_msi_irq(pp);
+}
+
+static int tegra_pcie_dw_rd_own_conf(struct pcie_port *pp, int where, int size,
+				     u32 *val)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+
+	/*
+	 * This is an endpoint mode specific register happen to appear even
+	 * when controller is operating in root port mode and system hangs
+	 * when it is accessed with link being in ASPM-L1 state.
+	 * So skip accessing it altogether
+	 */
+	if (where == PORT_LOGIC_MSIX_DOORBELL) {
+		*val = 0x00000000;
+		return PCIBIOS_SUCCESSFUL;
+	} else {
+		return dw_pcie_read(pci->dbi_base + where, size, val);
+	}
+}
+
+static int tegra_pcie_dw_wr_own_conf(struct pcie_port *pp, int where, int size,
+				     u32 val)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+
+	/* This is EP specific register and system hangs when it is
+	 * accessed with link being in ASPM-L1 state.
+	 * So skip accessing it altogether
+	 */
+	if (where == PORT_LOGIC_MSIX_DOORBELL)
+		return PCIBIOS_SUCCESSFUL;
+	else
+		return dw_pcie_write(pci->dbi_base + where, size, val);
+}
+
+static void config_plat_gpio(struct tegra_pcie_dw *pcie, bool flag)
+{
+	int count;
+
+	for (count = 0; count < pcie->n_gpios; ++count)
+		gpiod_set_value(gpio_to_desc(pcie->gpios[count]), flag);
+}
+
+#if defined(CONFIG_PCIEASPM)
+static void disable_aspm_l0s(struct tegra_pcie_dw *pcie)
+{
+	u32 val;
+
+	val = dw_pcie_readl_dbi(&pcie->pci, CFG_LINK_CAP);
+	val &= ~(PCI_EXP_LNKCTL_ASPM_L0S << 10);
+	dw_pcie_writel_dbi(&pcie->pci, CFG_LINK_CAP, val);
+}
+
+static void disable_aspm_l10(struct tegra_pcie_dw *pcie)
+{
+	u32 val;
+
+	val = dw_pcie_readl_dbi(&pcie->pci, CFG_LINK_CAP);
+	val &= ~(PCI_EXP_LNKCTL_ASPM_L1 << 10);
+	dw_pcie_writel_dbi(&pcie->pci, CFG_LINK_CAP, val);
+}
+
+static void disable_aspm_l11(struct tegra_pcie_dw *pcie)
+{
+	u32 val;
+
+	val = dw_pcie_readl_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub);
+	val &= ~PCI_L1SS_CAP_ASPM_L1_1;
+	dw_pcie_writel_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub, val);
+}
+
+static void disable_aspm_l12(struct tegra_pcie_dw *pcie)
+{
+	u32 val;
+
+	val = dw_pcie_readl_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub);
+	val &= ~PCI_L1SS_CAP_ASPM_L1_2;
+	dw_pcie_writel_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub, val);
+}
+
+static inline u32 event_counter_prog(struct tegra_pcie_dw *pcie, u32 event)
+{
+	u32 val;
+
+	val = dw_pcie_readl_dbi(&pcie->pci, pcie->event_cntr_ctrl);
+	val &= ~(EVENT_COUNTER_EVENT_SEL_MASK << EVENT_COUNTER_EVENT_SEL_SHIFT);
+	val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
+	val |= event << EVENT_COUNTER_EVENT_SEL_SHIFT;
+	val |= EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
+	dw_pcie_writel_dbi(&pcie->pci, pcie->event_cntr_ctrl, val);
+	val = dw_pcie_readl_dbi(&pcie->pci, pcie->event_cntr_data);
+	return val;
+}
+
+static int aspm_state_cnt(struct seq_file *s, void *data)
+{
+	struct tegra_pcie_dw *pcie = (struct tegra_pcie_dw *)(s->private);
+	u32 val;
+
+	seq_printf(s, "Tx L0s entry count : %u\n",
+		   event_counter_prog(pcie, EVENT_COUNTER_EVENT_Tx_L0S));
+
+	seq_printf(s, "Rx L0s entry count : %u\n",
+		   event_counter_prog(pcie, EVENT_COUNTER_EVENT_Rx_L0S));
+
+	seq_printf(s, "Link L1 entry count : %u\n",
+		   event_counter_prog(pcie, EVENT_COUNTER_EVENT_L1));
+
+	seq_printf(s, "Link L1.1 entry count : %u\n",
+		   event_counter_prog(pcie, EVENT_COUNTER_EVENT_L1_1));
+
+	seq_printf(s, "Link L1.2 entry count : %u\n",
+		   event_counter_prog(pcie, EVENT_COUNTER_EVENT_L1_2));
+
+	/* Clear all counters */
+	dw_pcie_writel_dbi(&pcie->pci, pcie->event_cntr_ctrl,
+			   EVENT_COUNTER_ALL_CLEAR);
+
+	/* Re-enable counting */
+	val = EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
+	val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
+	dw_pcie_writel_dbi(&pcie->pci, pcie->event_cntr_ctrl, val);
+
+	return 0;
+}
+
+#define DEFINE_ENTRY(__name)	\
+static int __name ## _open(struct inode *inode, struct file *file)	\
+{									\
+	return single_open(file, __name, inode->i_private); \
+}									\
+static const struct file_operations __name ## _fops = {	\
+	.open		= __name ## _open,	\
+	.read		= seq_read,	\
+	.llseek		= seq_lseek,	\
+	.release	= single_release,	\
+}
+
+DEFINE_ENTRY(aspm_state_cnt);
+#endif
+
+static int init_debugfs(struct tegra_pcie_dw *pcie)
+{
+#if defined(CONFIG_PCIEASPM)
+	struct dentry *d;
+
+	d = debugfs_create_file("aspm_state_cnt", 0444, pcie->debugfs,
+				(void *)pcie, &aspm_state_cnt_fops);
+	if (!d)
+		dev_err(pcie->dev, "debugfs for aspm_state_cnt failed\n");
+#endif
+	return 0;
+}
+
+static void tegra_pcie_enable_system_interrupts(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
+	u32 val;
+	u16 val_w;
+
+	val = readl(pcie->appl_base + APPL_INTR_EN_L0_0);
+	val |= APPL_INTR_EN_L0_0_LINK_STATE_INT_EN;
+	writel(val, pcie->appl_base + APPL_INTR_EN_L0_0);
+
+	val = readl(pcie->appl_base + APPL_INTR_EN_L1_0_0);
+	val |= APPL_INTR_EN_L1_0_0_LINK_REQ_RST_NOT_INT_EN;
+	writel(val, pcie->appl_base + APPL_INTR_EN_L1_0_0);
+
+	if (pcie->cdm_check) {
+		val = readl(pcie->appl_base + APPL_INTR_EN_L0_0);
+		val |= APPL_INTR_EN_L0_0_CDM_REG_CHK_INT_EN;
+		writel(val, pcie->appl_base + APPL_INTR_EN_L0_0);
+
+		val = readl(pcie->appl_base + APPL_INTR_EN_L1_18);
+		val |= APPL_INTR_EN_L1_18_CDM_REG_CHK_CMP_ERR;
+		val |= APPL_INTR_EN_L1_18_CDM_REG_CHK_LOGIC_ERR;
+		writel(val, pcie->appl_base + APPL_INTR_EN_L1_18);
+	}
+
+	val_w = dw_pcie_readw_dbi(&pcie->pci, CFG_LINK_STATUS);
+	pcie->init_link_width = (val_w & PCI_EXP_LNKSTA_NLW) >>
+				PCI_EXP_LNKSTA_NLW_SHIFT;
+
+	val_w = dw_pcie_readw_dbi(&pcie->pci, CFG_LINK_CONTROL);
+	val_w |= PCI_EXP_LNKCTL_LBMIE;
+	dw_pcie_writew_dbi(&pcie->pci, CFG_LINK_CONTROL, val_w);
+}
+
+static void tegra_pcie_enable_legacy_interrupts(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
+	u32 val;
+
+	/* enable legacy interrupt generation */
+	val = readl(pcie->appl_base + APPL_INTR_EN_L0_0);
+	val |= APPL_INTR_EN_L0_0_SYS_INTR_EN;
+	val |= APPL_INTR_EN_L0_0_INT_INT_EN;
+	writel(val, pcie->appl_base + APPL_INTR_EN_L0_0);
+
+	val = readl(pcie->appl_base + APPL_INTR_EN_L1_8_0);
+	val |= APPL_INTR_EN_L1_8_INTX_EN;
+	val |= APPL_INTR_EN_L1_8_AUTO_BW_INT_EN;
+	val |= APPL_INTR_EN_L1_8_BW_MGT_INT_EN;
+	if (IS_ENABLED(CONFIG_PCIEAER))
+		val |= APPL_INTR_EN_L1_8_AER_INT_EN;
+	writel(val, pcie->appl_base + APPL_INTR_EN_L1_8_0);
+}
+
+static void tegra_pcie_enable_msi_interrupts(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
+	u32 val;
+
+	dw_pcie_msi_init(pp);
+
+	/* enable MSI interrupt generation */
+	val = readl(pcie->appl_base + APPL_INTR_EN_L0_0);
+	val |= APPL_INTR_EN_L0_0_SYS_MSI_INTR_EN;
+	val |= APPL_INTR_EN_L0_0_MSI_RCV_INT_EN;
+	writel(val, pcie->appl_base + APPL_INTR_EN_L0_0);
+}
+
+static void tegra_pcie_enable_interrupts(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
+
+	/* Clear interrupt statuses before enabling interrupts */
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L0);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_0_0);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_1);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_2);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_3);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_6);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_7);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_8_0);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_9);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_10);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_11);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_13);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_14);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_15);
+	writel(0xFFFFFFFF, pcie->appl_base + APPL_INTR_STATUS_L1_17);
+
+	tegra_pcie_enable_system_interrupts(pp);
+	tegra_pcie_enable_legacy_interrupts(pp);
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		tegra_pcie_enable_msi_interrupts(pp);
+}
+
+static void config_gen3_gen4_eq_presets(struct tegra_pcie_dw *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	int i;
+	u32 val, offset;
+
+	/* program init preset */
+	for (i = 0; i < pcie->num_lanes; i++) {
+		dw_pcie_read(pci->dbi_base + CAP_SPCIE_CAP_OFF
+				 + (i * 2), 2, &val);
+		val &= ~CAP_SPCIE_CAP_OFF_DSP_TX_PRESET0_MASK;
+		val |= GEN3_GEN4_EQ_PRESET_INIT;
+		val &= ~CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_MASK;
+		val |= (GEN3_GEN4_EQ_PRESET_INIT <<
+			   CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_SHIFT);
+		dw_pcie_write(pci->dbi_base + CAP_SPCIE_CAP_OFF
+				 + (i * 2), 2, val);
+
+		offset = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_PL) +
+				PCI_PL_16GT_LE_CTRL;
+		dw_pcie_read(pci->dbi_base + offset + i, 1, &val);
+		val &= ~PL16G_CAP_OFF_DSP_16G_TX_PRESET_MASK;
+		val |= GEN3_GEN4_EQ_PRESET_INIT;
+		val &= ~PL16G_CAP_OFF_USP_16G_TX_PRESET_MASK;
+		val |= (GEN3_GEN4_EQ_PRESET_INIT <<
+			PL16G_CAP_OFF_USP_16G_TX_PRESET_SHIFT);
+		dw_pcie_write(pci->dbi_base + offset + i, 1, val);
+	}
+
+	val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+	val &= ~GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK;
+	dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+
+	val = dw_pcie_readl_dbi(pci, GEN3_EQ_CONTROL_OFF);
+	val &= ~GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_MASK;
+	val |= (0x3ff << GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_SHIFT);
+	val &= ~GEN3_EQ_CONTROL_OFF_FB_MODE_MASK;
+	dw_pcie_writel_dbi(pci, GEN3_EQ_CONTROL_OFF, val);
+
+	val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+	val &= ~GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK;
+	val |= (0x1 << GEN3_RELATED_OFF_RATE_SHADOW_SEL_SHIFT);
+	dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+
+	val = dw_pcie_readl_dbi(pci, GEN3_EQ_CONTROL_OFF);
+	val &= ~GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_MASK;
+	val |= (0x360 << GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_SHIFT);
+	val &= ~GEN3_EQ_CONTROL_OFF_FB_MODE_MASK;
+	dw_pcie_writel_dbi(pci, GEN3_EQ_CONTROL_OFF, val);
+
+	val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+	val &= ~GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK;
+	dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+}
+
+static int tegra_pcie_dw_host_init(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
+	int count = 200;
+	u32 val, tmp, offset;
+	u16 val_w;
+
+#if defined(CONFIG_PCIEASPM)
+	pcie->cfg_link_cap_l1sub =
+		dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_L1SS) +
+		PCI_L1SS_CAP;
+#endif
+	val = dw_pcie_readl_dbi(pci, PCI_IO_BASE);
+	val &= ~(IO_BASE_IO_DECODE | IO_BASE_IO_DECODE_BIT8);
+	dw_pcie_writel_dbi(pci, PCI_IO_BASE, val);
+
+	val = dw_pcie_readl_dbi(pci, PCI_PREF_MEMORY_BASE);
+	val |= CFG_PREF_MEM_LIMIT_BASE_MEM_DECODE;
+	val |= CFG_PREF_MEM_LIMIT_BASE_MEM_LIMIT_DECODE;
+	dw_pcie_writel_dbi(pci, PCI_PREF_MEMORY_BASE, val);
+
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0);
+
+	/* Configure FTS */
+	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL);
+	val &= ~(N_FTS_MASK << N_FTS_SHIFT);
+	val |= N_FTS_VAL << N_FTS_SHIFT;
+	dw_pcie_writel_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL, val);
+
+	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_GEN2_CTRL);
+	val &= ~FTS_MASK;
+	val |= FTS_VAL;
+	dw_pcie_writel_dbi(pci, PORT_LOGIC_GEN2_CTRL, val);
+
+	/* Enable as 0xFFFF0001 response for CRS */
+	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT);
+	val &= ~(AMBA_ERROR_RESPONSE_CRS_MASK << AMBA_ERROR_RESPONSE_CRS_SHIFT);
+	val |= (AMBA_ERROR_RESPONSE_CRS_OKAY_FFFF0001 <<
+		AMBA_ERROR_RESPONSE_CRS_SHIFT);
+	dw_pcie_writel_dbi(pci, PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT, val);
+
+	/* Set MPS to 256 in DEV_CTL */
+	val = dw_pcie_readl_dbi(pci, CFG_DEV_STATUS_CONTROL);
+	val &= ~PCI_EXP_DEVCTL_PAYLOAD;
+	val |= (1 << CFG_DEV_STATUS_CONTROL_MPS_SHIFT);
+	dw_pcie_writel_dbi(pci, CFG_DEV_STATUS_CONTROL, val);
+
+	/* Configure Max Speed from DT */
+	val = dw_pcie_readl_dbi(pci, CFG_LINK_CAP);
+	val &= ~PCI_EXP_LNKCAP_SLS;
+	val |= pcie->max_speed;
+	dw_pcie_writel_dbi(pci, CFG_LINK_CAP, val);
+
+	val = dw_pcie_readw_dbi(pci, CFG_LINK_CONTROL_2);
+	val &= ~PCI_EXP_LNKCTL2_TLS;
+	val |= pcie->init_speed;
+	dw_pcie_writew_dbi(pci, CFG_LINK_CONTROL_2, val);
+
+	/* Configure Max lane width from DT */
+	val = dw_pcie_readl_dbi(pci, CFG_LINK_CAP);
+	val &= ~PCI_EXP_LNKCAP_MLW;
+	val |= (pcie->num_lanes << PCI_EXP_LNKSTA_NLW_SHIFT);
+	dw_pcie_writel_dbi(pci, CFG_LINK_CAP, val);
+
+	config_gen3_gen4_eq_presets(pcie);
+
+#if defined(CONFIG_PCIEASPM)
+	/* Enable ASPM counters */
+	val = EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
+	val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
+	dw_pcie_writel_dbi(pci, pcie->event_cntr_ctrl, val);
+
+	/* Program T_cmrt and T_pwr_on values */
+	val = dw_pcie_readl_dbi(pci, pcie->cfg_link_cap_l1sub);
+	val &= ~(PCI_L1SS_CAP_CM_RESTORE_TIME | PCI_L1SS_CAP_P_PWR_ON_VALUE);
+	val |= (pcie->aspm_cmrt << PCI_L1SS_CAP_CM_RTM_SHIFT);
+	val |= (pcie->aspm_pwr_on_t << PCI_L1SS_CAP_PWRN_VAL_SHIFT);
+	dw_pcie_writel_dbi(pci, pcie->cfg_link_cap_l1sub, val);
+
+	/* Program L0s and L1 entrance latencies */
+	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL);
+	val &= ~L0S_ENTRANCE_LAT_MASK;
+	val |= (pcie->aspm_l0s_enter_lat << L0S_ENTRANCE_LAT_SHIFT);
+	val |= ENTER_ASPM;
+	dw_pcie_writel_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL, val);
+
+	/* Program what ASPM states sould get advertised */
+	if (pcie->disabled_aspm_states & 0x1)
+		disable_aspm_l0s(pcie); /* Disable L0s */
+	if (pcie->disabled_aspm_states & 0x2) {
+		disable_aspm_l10(pcie); /* Disable L1 */
+		disable_aspm_l11(pcie); /* Disable L1.1 */
+		disable_aspm_l12(pcie); /* Disable L1.2 */
+	}
+	if (pcie->disabled_aspm_states & 0x4)
+		disable_aspm_l11(pcie); /* Disable L1.1 */
+	if (pcie->disabled_aspm_states & 0x8)
+		disable_aspm_l12(pcie); /* Disable L1.2 */
+#endif
+	val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+	val &= ~GEN3_RELATED_OFF_GEN3_ZRXDC_NONCOMPL;
+	dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+
+	if (pcie->update_fc_fixup) {
+		val = dw_pcie_readl_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF);
+		val |= 0x1 << CFG_TIMER_CTRL_ACK_NAK_SHIFT;
+		dw_pcie_writel_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF, val);
+	}
+
+	/* CDM check enable */
+	if (pcie->cdm_check) {
+		val = dw_pcie_readl_dbi(pci,
+					PORT_LOGIC_PL_CHK_REG_CONTROL_STATUS);
+		val |= PORT_LOGIC_PL_CHK_REG_CHK_REG_CONTINUOUS;
+		val |= PORT_LOGIC_PL_CHK_REG_CHK_REG_START;
+		dw_pcie_writel_dbi(pci, PORT_LOGIC_PL_CHK_REG_CONTROL_STATUS,
+				   val);
+	}
+
+	dw_pcie_setup_rc(pp);
+
+	clk_set_rate(pcie->core_clk, GEN4_CORE_CLK_FREQ);
+
+	/* assert RST */
+	val = readl(pcie->appl_base + APPL_PINMUX);
+	val &= ~APPL_PINMUX_PEX_RST;
+	writel(val, pcie->appl_base + APPL_PINMUX);
+
+	usleep_range(100, 200);
+
+	/* enable LTSSM */
+	val = readl(pcie->appl_base + APPL_CTRL);
+	val |= APPL_CTRL_LTSSM_EN;
+	writel(val, pcie->appl_base + APPL_CTRL);
+
+	/* de-assert RST */
+	val = readl(pcie->appl_base + APPL_PINMUX);
+	val |= APPL_PINMUX_PEX_RST;
+	writel(val, pcie->appl_base + APPL_PINMUX);
+
+	msleep(100);
+
+	val_w = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
+	while (!(val_w & PCI_EXP_LNKSTA_DLLLA)) {
+		if (!count) {
+			val = readl(pcie->appl_base + APPL_DEBUG);
+			val &= APPL_DEBUG_LTSSM_STATE_MASK;
+			val >>= APPL_DEBUG_LTSSM_STATE_SHIFT;
+			tmp = readl(pcie->appl_base + APPL_LINK_STATUS);
+			tmp &= APPL_LINK_STATUS_RDLH_LINK_UP;
+			if (val == 0x11 && !tmp) {
+				dev_info(pci->dev, "link is down in DLL");
+				dev_info(pci->dev,
+					 "trying again with DLFE disabled\n");
+				/* disable LTSSM */
+				val = readl(pcie->appl_base + APPL_CTRL);
+				val &= ~APPL_CTRL_LTSSM_EN;
+				writel(val, pcie->appl_base + APPL_CTRL);
+
+				reset_control_assert(pcie->core_rst);
+				reset_control_deassert(pcie->core_rst);
+
+				offset =
+				dw_pcie_find_ext_capability(pci,
+							    PCI_EXT_CAP_ID_DLF)
+				+ PCI_DLF_CAP;
+				val = dw_pcie_readl_dbi(pci, offset);
+				val &= ~DL_FEATURE_EXCHANGE_EN;
+				dw_pcie_writel_dbi(pci, offset, val);
+
+				tegra_pcie_dw_host_init(&pcie->pci.pp);
+				return 0;
+			}
+			dev_info(pci->dev, "link is down\n");
+			return 0;
+		}
+		dev_dbg(pci->dev, "polling for link up\n");
+		usleep_range(1000, 2000);
+		val_w = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
+		count--;
+	}
+	dev_info(pci->dev, "link is up\n");
+
+	tegra_pcie_enable_interrupts(pp);
+
+	return 0;
+}
+
+static int tegra_pcie_dw_link_up(struct dw_pcie *pci)
+{
+	u32 val = dw_pcie_readw_dbi(pci, CFG_LINK_STATUS);
+
+	return !!(val & PCI_EXP_LNKSTA_DLLLA);
+}
+
+static void tegra_pcie_dw_scan_bus(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = dw_pcie_to_tegra_pcie(pci);
+	u32 speed;
+
+	if (!tegra_pcie_dw_link_up(pci))
+		return;
+
+	speed = (dw_pcie_readw_dbi(pci, CFG_LINK_STATUS) & PCI_EXP_LNKSTA_CLS);
+	clk_set_rate(pcie->core_clk, pcie_gen_freq[speed - 1]);
+}
+
+static void tegra_pcie_set_msi_vec_num(struct pcie_port *pp)
+{
+	pp->num_vectors = MAX_MSI_IRQS;
+}
+
+static const struct dw_pcie_ops tegra_dw_pcie_ops = {
+	.link_up = tegra_pcie_dw_link_up,
+};
+
+static struct dw_pcie_host_ops tegra_pcie_dw_host_ops = {
+	.rd_own_conf = tegra_pcie_dw_rd_own_conf,
+	.wr_own_conf = tegra_pcie_dw_wr_own_conf,
+	.host_init = tegra_pcie_dw_host_init,
+	.scan_bus = tegra_pcie_dw_scan_bus,
+	.set_num_vectors = tegra_pcie_set_msi_vec_num,
+};
+
+static void tegra_pcie_disable_phy(struct tegra_pcie_dw *pcie)
+{
+	int phy_count = pcie->phy_count;
+
+	while (phy_count--) {
+		phy_power_off(pcie->phy[phy_count]);
+		phy_exit(pcie->phy[phy_count]);
+	}
+}
+
+static int tegra_pcie_enable_phy(struct tegra_pcie_dw *pcie)
+{
+	int phy_count = pcie->phy_count;
+	int ret;
+	int i;
+
+	for (i = 0; i < phy_count; i++) {
+		ret = phy_init(pcie->phy[i]);
+		if (ret < 0)
+			goto err_phy_init;
+
+		ret = phy_power_on(pcie->phy[i]);
+		if (ret < 0) {
+			phy_exit(pcie->phy[i]);
+			goto err_phy_power_on;
+		}
+	}
+
+	return 0;
+
+	while (i >= 0) {
+		phy_power_off(pcie->phy[i]);
+err_phy_power_on:
+		phy_exit(pcie->phy[i]);
+err_phy_init:
+		i--;
+	}
+
+	return ret;
+}
+
+static int tegra_pcie_dw_parse_dt(struct tegra_pcie_dw *pcie)
+{
+	struct device_node *np = pcie->dev->of_node;
+	int ret;
+
+#if defined(CONFIG_PCIEASPM)
+	ret = of_property_read_u32(np, "nvidia,event-cntr-ctrl",
+				   &pcie->event_cntr_ctrl);
+	if (ret < 0) {
+		dev_err(pcie->dev, "fail to read event-cntr-ctrl: %d\n", ret);
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "nvidia,event-cntr-data",
+				   &pcie->event_cntr_data);
+	if (ret < 0) {
+		dev_err(pcie->dev, "fail to read event-cntr-data: %d\n", ret);
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "nvidia,aspm-cmrt", &pcie->aspm_cmrt);
+	if (ret < 0) {
+		dev_info(pcie->dev, "fail to read ASPM T_cmrt: %d\n", ret);
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "nvidia,aspm-pwr-on-t",
+				   &pcie->aspm_pwr_on_t);
+	if (ret < 0)
+		dev_info(pcie->dev, "fail to read ASPM Power On time: %d\n",
+			 ret);
+
+	ret = of_property_read_u32(np, "nvidia,aspm-l0s-entrance-latency",
+				   &pcie->aspm_l0s_enter_lat);
+	if (ret < 0)
+		dev_info(pcie->dev,
+			 "fail to read ASPM L0s Entrance latency: %d\n", ret);
+
+	ret = of_property_read_u32(np, "nvidia,disable-aspm-states",
+				   &pcie->disabled_aspm_states);
+	if (ret < 0) {
+		dev_info(pcie->dev,
+			 "Disabling advertisement of all ASPM states\n");
+		pcie->disabled_aspm_states = 0xF;
+	}
+#endif
+	ret = of_property_read_u32(np, "num-lanes", &pcie->num_lanes);
+	if (ret < 0) {
+		dev_err(pcie->dev, "fail to read num-lanes: %d\n", ret);
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "nvidia,max-speed", &pcie->max_speed);
+	if (ret < 0 || (pcie->max_speed < 1 || pcie->max_speed > 4)) {
+		dev_err(pcie->dev, "invalid max-speed (err=%d), set to Gen-1\n",
+			ret);
+		pcie->max_speed = 1;
+	}
+
+	ret = of_property_read_u32(np, "nvidia,init-speed", &pcie->init_speed);
+	if (ret < 0 || (pcie->init_speed < 1 || pcie->init_speed > 4)) {
+		dev_dbg(pcie->dev, "Setting init speed to max speed\n");
+		pcie->init_speed = pcie->max_speed;
+	}
+
+	ret = of_property_read_u32_index(np, "nvidia,controller-id", 1,
+					 &pcie->cid);
+	if (ret) {
+		dev_err(pcie->dev, "Controller-ID is missing in DT: %d\n", ret);
+		return ret;
+	}
+
+	pcie->phy_count = of_property_count_strings(np, "phy-names");
+	if (pcie->phy_count < 0) {
+		dev_err(pcie->dev, "unable to find phy entries\n");
+		return pcie->phy_count;
+	}
+
+	if (of_property_read_bool(np, "nvidia,update-fc-fixup"))
+		pcie->update_fc_fixup = true;
+
+	pcie->pex_wake = of_get_named_gpio(np, "nvidia,pex-wake", 0);
+
+	pcie->power_down_en = of_property_read_bool(pcie->dev->of_node,
+						    "nvidia,enable-power-down");
+
+	pcie->disable_clock_request =
+		of_property_read_bool(pcie->dev->of_node,
+				      "nvidia,disable-clock-request");
+
+	pcie->cdm_check = of_property_read_bool(np, "nvidia,cdm-check");
+
+	pcie->n_gpios = of_gpio_named_count(np, "nvidia,plat-gpios");
+	if (pcie->n_gpios > 0) {
+		int count, gpio;
+		enum of_gpio_flags flags;
+		unsigned long f;
+
+		pcie->gpios = devm_kzalloc(pcie->dev,
+					   pcie->n_gpios * sizeof(int),
+					   GFP_KERNEL);
+		if (!pcie->gpios)
+			return -ENOMEM;
+
+		for (count = 0; count < pcie->n_gpios; ++count) {
+			gpio = of_get_named_gpio_flags(np,
+						       "nvidia,plat-gpios",
+						       count, &flags);
+			if (gpio < 0 && (gpio != -ENOENT))
+				return gpio;
+
+			f = (flags & OF_GPIO_ACTIVE_LOW) ?
+			    (GPIOF_OUT_INIT_HIGH | GPIOF_ACTIVE_LOW) :
+			     GPIOF_OUT_INIT_LOW;
+
+			ret = devm_gpio_request_one(pcie->dev, gpio, f,
+						    NULL);
+			if (ret < 0) {
+				dev_err(pcie->dev,
+					"gpio %d request failed\n",
+					gpio);
+				return ret;
+			}
+			pcie->gpios[count] = gpio;
+		}
+	}
+
+	return 0;
+}
+
+static int tegra_pcie_config_rp(struct tegra_pcie_dw *pcie)
+{
+	struct pcie_port *pp = &pcie->pci.pp;
+	char *name;
+	int ret;
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		pp->msi_irq = of_irq_get_byname(pcie->dev->of_node, "msi");
+		if (!pp->msi_irq) {
+			dev_err(pcie->dev, "failed to get msi interrupt\n");
+			return -ENODEV;
+		}
+
+		ret = devm_request_irq(pcie->dev, pp->msi_irq,
+				       tegra_pcie_msi_irq_handler,
+				       IRQF_SHARED | IRQF_NO_THREAD,
+				       "tegra-pcie-msi", pp);
+		if (ret) {
+			dev_err(pcie->dev, "failed to request \"msi\" irq\n");
+			return ret;
+		}
+	}
+
+	pm_runtime_enable(pcie->dev);
+	ret = pm_runtime_get_sync(pcie->dev);
+	if (ret < 0) {
+		dev_err(pcie->dev, "failed to enable pcie dev");
+		pm_runtime_disable(pcie->dev);
+		return ret;
+	}
+
+	pcie->link_state = tegra_pcie_dw_link_up(&pcie->pci);
+
+	if (!pcie->link_state && pcie->power_down_en) {
+		ret = -ENOMEDIUM;
+		goto fail_host_init;
+	}
+
+	name = kasprintf(GFP_KERNEL, "pcie@%x",
+			 (uint32_t)pcie->appl_res->start);
+	if (!name) {
+		ret = -ENOMEM;
+		goto fail_host_init;
+	}
+
+	pcie->debugfs = debugfs_create_dir(name, NULL);
+	if (!pcie->debugfs)
+		dev_err(pcie->dev, "debugfs creation failed\n");
+	else
+		init_debugfs(pcie);
+	kfree(name);
+
+	return ret;
+
+fail_host_init:
+	pm_runtime_put_sync(pcie->dev);
+	return ret;
+}
+
+static const struct tegra_pcie_of_data tegra_pcie_rc_of_data = {
+	.mode = DW_PCIE_RC_TYPE,
+};
+
+static const struct of_device_id tegra_pcie_dw_of_match[] = {
+	{
+		.compatible = "nvidia,tegra194-pcie",
+		.data = &tegra_pcie_rc_of_data,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, tegra_pcie_dw_of_match);
+
+static int tegra_pcie_dw_probe(struct platform_device *pdev)
+{
+	struct tegra_pcie_dw *pcie;
+	struct pcie_port *pp;
+	struct dw_pcie *pci;
+	struct phy **phy;
+	struct resource	*dbi_res;
+	struct resource	*atu_dma_res;
+	const struct of_device_id *match;
+	const struct tegra_pcie_of_data *data;
+	char *name;
+	int ret, i;
+
+	pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pci = &pcie->pci;
+	pci->dev = &pdev->dev;
+	pci->ops = &tegra_dw_pcie_ops;
+	pp = &pci->pp;
+	pcie->dev = &pdev->dev;
+
+	match = of_match_device(of_match_ptr(tegra_pcie_dw_of_match),
+				&pdev->dev);
+	if (!match)
+		return -EINVAL;
+
+	data = (struct tegra_pcie_of_data *)match->data;
+	pcie->mode = (enum dw_pcie_device_mode)data->mode;
+
+	ret = tegra_pcie_dw_parse_dt(pcie);
+	if (ret < 0) {
+		dev_err(pcie->dev, "device tree parsing failed: %d\n", ret);
+		return ret;
+	}
+
+	if (gpio_is_valid(pcie->pex_wake)) {
+		ret = devm_gpio_request(pcie->dev, pcie->pex_wake,
+					"pcie_wake");
+		if (ret < 0) {
+			if (ret == -EBUSY) {
+				dev_err(pcie->dev,
+					"pex_wake already in use\n");
+				pcie->pex_wake = -EINVAL;
+			} else {
+				dev_err(pcie->dev,
+					"pcie_wake gpio_request failed %d\n",
+					ret);
+				return ret;
+			}
+		}
+
+		ret = gpio_direction_input(pcie->pex_wake);
+		if (ret < 0) {
+			dev_err(pcie->dev,
+				"setting pcie_wake input direction failed %d\n",
+				ret);
+			return ret;
+		}
+		device_init_wakeup(pcie->dev, true);
+	}
+
+	pcie->pex_ctl_reg = devm_regulator_get(&pdev->dev, "vddio-pex-ctl");
+	if (IS_ERR(pcie->pex_ctl_reg)) {
+		dev_err(&pdev->dev, "fail to get regulator: %ld\n",
+			PTR_ERR(pcie->pex_ctl_reg));
+		return PTR_ERR(pcie->pex_ctl_reg);
+	}
+
+	pcie->core_clk = devm_clk_get(&pdev->dev, "core_clk");
+	if (IS_ERR(pcie->core_clk)) {
+		dev_err(&pdev->dev, "Failed to get core clock\n");
+		return PTR_ERR(pcie->core_clk);
+	}
+
+	pcie->appl_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						      "appl");
+	if (!pcie->appl_res) {
+		dev_err(&pdev->dev, "missing appl space\n");
+		return PTR_ERR(pcie->appl_res);
+	}
+	pcie->appl_base = devm_ioremap_resource(&pdev->dev, pcie->appl_res);
+	if (IS_ERR(pcie->appl_base)) {
+		dev_err(&pdev->dev, "mapping appl space failed\n");
+		return PTR_ERR(pcie->appl_base);
+	}
+
+	pcie->core_apb_rst = devm_reset_control_get(pcie->dev, "core_apb_rst");
+	if (IS_ERR(pcie->core_apb_rst)) {
+		dev_err(pcie->dev, "PCIE : core_apb_rst reset is missing\n");
+		return PTR_ERR(pcie->core_apb_rst);
+	}
+
+	phy = devm_kcalloc(pcie->dev, pcie->phy_count, sizeof(*phy),
+			   GFP_KERNEL);
+	if (!phy)
+		return PTR_ERR(phy);
+
+	for (i = 0; i < pcie->phy_count; i++) {
+		name = kasprintf(GFP_KERNEL, "pcie-p2u-%u", i);
+		if (!name) {
+			dev_err(pcie->dev, "failed to create p2u string\n");
+			return -ENOMEM;
+		}
+		phy[i] = devm_phy_get(pcie->dev, name);
+		kfree(name);
+		if (IS_ERR(phy[i])) {
+			ret = PTR_ERR(phy[i]);
+			dev_err(pcie->dev, "phy_get error: %d\n", ret);
+			return ret;
+		}
+	}
+
+	pcie->phy = phy;
+
+	dbi_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+	if (!dbi_res) {
+		dev_err(&pdev->dev, "missing config space\n");
+		return PTR_ERR(dbi_res);
+	}
+	pcie->dbi_res = dbi_res;
+
+	pci->dbi_base = devm_ioremap_resource(&pdev->dev, dbi_res);
+	if (IS_ERR(pci->dbi_base)) {
+		dev_err(&pdev->dev, "mapping dbi space failed\n");
+		return PTR_ERR(pci->dbi_base);
+	}
+
+	/* Tegra HW locates DBI2 at a fixed offset from DBI */
+	pci->dbi_base2 = pci->dbi_base + 0x1000;
+
+	atu_dma_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						   "atu_dma");
+	if (!atu_dma_res) {
+		dev_err(&pdev->dev, "missing atu_dma space\n");
+		return PTR_ERR(atu_dma_res);
+	}
+	pcie->atu_dma_res = atu_dma_res;
+	pci->atu_base = devm_ioremap_resource(&pdev->dev, atu_dma_res);
+	if (IS_ERR(pci->atu_base)) {
+		dev_err(&pdev->dev, "mapping atu space failed\n");
+		return PTR_ERR(pci->atu_base);
+	}
+
+	pcie->core_rst = devm_reset_control_get(pcie->dev, "core_rst");
+	if (IS_ERR(pcie->core_rst)) {
+		dev_err(pcie->dev, "PCIE : core_rst reset is missing\n");
+		return PTR_ERR(pcie->core_rst);
+	}
+
+	pp->irq = platform_get_irq_byname(pdev, "intr");
+	if (!pp->irq) {
+		dev_err(pcie->dev, "failed to get intr interrupt\n");
+		return -ENODEV;
+	}
+
+	ret = devm_request_irq(&pdev->dev, pp->irq, tegra_pcie_irq_handler,
+			       IRQF_SHARED, "tegra-pcie-intr", pcie);
+	if (ret) {
+		dev_err(pcie->dev, "failed to request \"intr\" irq\n");
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, pcie);
+
+	if (pcie->mode == DW_PCIE_RC_TYPE) {
+		ret = tegra_pcie_config_rp(pcie);
+		if (ret == -ENOMEDIUM)
+			ret = 0;
+	}
+
+	return ret;
+}
+
+static int tegra_pcie_try_link_l2(struct tegra_pcie_dw *pcie)
+{
+	u32 val;
+
+	if (!tegra_pcie_dw_link_up(&pcie->pci))
+		return 0;
+
+	val = readl(pcie->appl_base + APPL_RADM_STATUS);
+	val |= APPL_PM_XMT_TURNOFF_STATE;
+	writel(val, pcie->appl_base + APPL_RADM_STATUS);
+
+	return readl_poll_timeout_atomic(pcie->appl_base + APPL_DEBUG, val,
+				 val & APPL_DEBUG_PM_LINKST_IN_L2_LAT,
+				 1, PME_ACK_TIMEOUT);
+}
+
+static void tegra_pcie_downstream_dev_to_D0(struct tegra_pcie_dw *pcie)
+{
+	struct pci_dev *pdev = NULL;
+	struct pci_bus *child;
+	struct pcie_port *pp = &pcie->pci.pp;
+
+	list_for_each_entry(child, &pp->bus->children, node) {
+		/* Bring downstream devices to D0 if they are not already in */
+		if (child->parent == pp->bus) {
+			pdev = pci_get_slot(child, PCI_DEVFN(0, 0));
+			pci_dev_put(pdev);
+			if (!pdev)
+				break;
+
+			if (pci_set_power_state(pdev, PCI_D0))
+				dev_err(pcie->dev, "D0 transition failed\n");
+		}
+	}
+}
+
+static void tegra_pcie_dw_pme_turnoff(struct tegra_pcie_dw *pcie)
+{
+	u32 data;
+	int err;
+
+	if (!tegra_pcie_dw_link_up(&pcie->pci)) {
+		dev_dbg(pcie->dev, "PCIe link is not up...!\n");
+		return;
+	}
+
+	if (tegra_pcie_try_link_l2(pcie)) {
+		dev_info(pcie->dev, "Link didn't transit to L2 state\n");
+		/* TX lane clock freq will reset to Gen1 only if link is in L2
+		 * or detect state.
+		 * So apply pex_rst to end point to force RP to go into detect
+		 * state
+		 */
+		data = readl(pcie->appl_base + APPL_PINMUX);
+		data &= ~APPL_PINMUX_PEX_RST;
+		writel(data, pcie->appl_base + APPL_PINMUX);
+
+		err = readl_poll_timeout_atomic(pcie->appl_base + APPL_DEBUG,
+						data,
+						((data &
+						APPL_DEBUG_LTSSM_STATE_MASK) >>
+						APPL_DEBUG_LTSSM_STATE_SHIFT) ==
+						LTSSM_STATE_PRE_DETECT,
+						1, LTSSM_TIMEOUT);
+		if (err) {
+			dev_info(pcie->dev, "Link didn't go to detect state\n");
+		} else {
+			/* Disable LTSSM after link is in detect state */
+			data = readl(pcie->appl_base + APPL_CTRL);
+			data &= ~APPL_CTRL_LTSSM_EN;
+			writel(data, pcie->appl_base + APPL_CTRL);
+		}
+	}
+	/* DBI registers may not be accessible after this as PLL-E would be
+	 * down depending on how CLKREQ is pulled by end point
+	 */
+	data = readl(pcie->appl_base + APPL_PINMUX);
+	data |= (APPL_PINMUX_CLKREQ_OVERRIDE_EN | APPL_PINMUX_CLKREQ_OVERRIDE);
+	/* Cut REFCLK to slot */
+	data |= APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE_EN;
+	data &= ~APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE;
+	writel(data, pcie->appl_base + APPL_PINMUX);
+}
+
+static int tegra_pcie_dw_remove(struct platform_device *pdev)
+{
+	struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
+
+	if (pcie->mode == DW_PCIE_RC_TYPE) {
+		if (!pcie->link_state && pcie->power_down_en)
+			return 0;
+
+		debugfs_remove_recursive(pcie->debugfs);
+		pm_runtime_put_sync(pcie->dev);
+		pm_runtime_disable(pcie->dev);
+	}
+
+	return 0;
+}
+
+static int tegra_pcie_bpmp_set_ctrl_state(struct tegra_pcie_dw *pcie,
+					  int enable)
+{
+	struct mrq_uphy_request req;
+	struct mrq_uphy_response resp;
+	struct tegra_bpmp_message msg;
+	struct tegra_bpmp *bpmp;
+	int err;
+
+	memset(&req, 0, sizeof(req));
+	memset(&resp, 0, sizeof(resp));
+
+	req.cmd = CMD_UPHY_PCIE_CONTROLLER_STATE;
+	req.controller_state.pcie_controller = pcie->cid;
+	req.controller_state.enable = enable;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.mrq = MRQ_UPHY;
+	msg.tx.data = &req;
+	msg.tx.size = sizeof(req);
+	msg.rx.data = &resp;
+	msg.rx.size = sizeof(resp);
+
+	bpmp = tegra_bpmp_get(pcie->dev);
+	if (IS_ERR(bpmp))
+		return PTR_ERR(bpmp);
+
+	if (irqs_disabled())
+		err = tegra_bpmp_transfer_atomic(bpmp, &msg);
+	else
+		err = tegra_bpmp_transfer(bpmp, &msg);
+
+	tegra_bpmp_put(bpmp);
+
+	return err;
+}
+
+static int tegra_pcie_config_controller(struct tegra_pcie_dw *pcie,
+					bool en_hw_hot_rst)
+{
+	int ret;
+	u32 val;
+
+	if (pcie->cid != CTRL_5) {
+		ret = tegra_pcie_bpmp_set_ctrl_state(pcie, true);
+		if (ret) {
+			dev_err(pcie->dev, "Enabling controller-%d failed:%d\n",
+				pcie->cid, ret);
+			return ret;
+		}
+	}
+
+	config_plat_gpio(pcie, 1);
+
+	ret = regulator_enable(pcie->pex_ctl_reg);
+	if (ret < 0) {
+		dev_err(pcie->dev, "regulator enable failed: %d\n", ret);
+		goto fail_reg_en;
+	}
+
+	ret = clk_prepare_enable(pcie->core_clk);
+	if (ret) {
+		dev_err(pcie->dev, "Failed to enable core clock\n");
+		goto fail_core_clk;
+	}
+
+	reset_control_deassert(pcie->core_apb_rst);
+
+	if (en_hw_hot_rst) {
+		/* Enable HW_HOT_RST mode */
+		val = readl(pcie->appl_base + APPL_CTRL);
+		val &= ~(APPL_CTRL_HW_HOT_RST_MODE_MASK <<
+			  APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+		val |= APPL_CTRL_HW_HOT_RST_EN;
+		writel(val, pcie->appl_base + APPL_CTRL);
+	}
+
+	ret = tegra_pcie_enable_phy(pcie);
+	if (ret) {
+		dev_err(pcie->dev, "failed to enable phy\n");
+		goto fail_phy;
+	}
+
+	/* update CFG base address */
+	writel(pcie->dbi_res->start & APPL_CFG_BASE_ADDR_MASK,
+	       pcie->appl_base + APPL_CFG_BASE_ADDR);
+
+	/* configure this core for RP mode operation */
+	writel(APPL_DM_TYPE_RP, pcie->appl_base + APPL_DM_TYPE);
+
+	writel(0x0, pcie->appl_base + APPL_CFG_SLCG_OVERRIDE);
+
+	val = readl(pcie->appl_base + APPL_CTRL);
+	writel(val | APPL_CTRL_SYS_PRE_DET_STATE, pcie->appl_base + APPL_CTRL);
+
+	val = readl(pcie->appl_base + APPL_CFG_MISC);
+	val |= (APPL_CFG_MISC_ARCACHE_VAL << APPL_CFG_MISC_ARCACHE_SHIFT);
+	writel(val, pcie->appl_base + APPL_CFG_MISC);
+
+	if (pcie->disable_clock_request) {
+		val = readl(pcie->appl_base + APPL_PINMUX);
+		val |= APPL_PINMUX_CLKREQ_OUT_OVRD_EN;
+		val |= APPL_PINMUX_CLKREQ_OUT_OVRD;
+		writel(val, pcie->appl_base + APPL_PINMUX);
+
+		/* Disable ASPM-L1SS adv as there is no CLKREQ routing */
+		disable_aspm_l11(pcie); /* Disable L1.1 */
+		disable_aspm_l12(pcie); /* Disable L1.2 */
+	}
+
+	/* update iATU_DMA base address */
+	writel(pcie->atu_dma_res->start & APPL_CFG_IATU_DMA_BASE_ADDR_MASK,
+	       pcie->appl_base + APPL_CFG_IATU_DMA_BASE_ADDR);
+
+	reset_control_deassert(pcie->core_rst);
+
+	return ret;
+
+fail_phy:
+	reset_control_assert(pcie->core_apb_rst);
+	clk_disable_unprepare(pcie->core_clk);
+fail_core_clk:
+	regulator_disable(pcie->pex_ctl_reg);
+fail_reg_en:
+	config_plat_gpio(pcie, 0);
+	if (pcie->cid != CTRL_5)
+		tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+
+	return ret;
+}
+
+static int tegra_pcie_dw_runtime_suspend(struct device *dev)
+{
+	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+
+	tegra_pcie_downstream_dev_to_D0(pcie);
+
+	pci_stop_root_bus(pcie->pci.pp.bus);
+	pci_remove_root_bus(pcie->pci.pp.bus);
+
+	tegra_pcie_dw_pme_turnoff(pcie);
+
+	reset_control_assert(pcie->core_rst);
+	tegra_pcie_disable_phy(pcie);
+	reset_control_assert(pcie->core_apb_rst);
+	clk_disable_unprepare(pcie->core_clk);
+	regulator_disable(pcie->pex_ctl_reg);
+	config_plat_gpio(pcie, 0);
+
+	if (pcie->cid != CTRL_5)
+		tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+
+	return 0;
+}
+
+static int tegra_pcie_dw_runtime_resume(struct device *dev)
+{
+	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+	struct dw_pcie *pci = &pcie->pci;
+	struct pcie_port *pp = &pci->pp;
+	int ret = 0;
+
+	ret = tegra_pcie_config_controller(pcie, false);
+	if (ret < 0)
+		return ret;
+
+	/* program to use MPS of 256 whereever possible */
+	pcie_bus_config = PCIE_BUS_SAFE;
+
+	pp->root_bus_nr = -1;
+	pp->ops = &tegra_pcie_dw_host_ops;
+
+	/* Disable MSI interrupts for PME messages */
+	pcie_pme_disable_msi();
+
+	ret = dw_pcie_host_init(pp);
+	if (ret < 0) {
+		dev_err(pcie->dev, "PCIE : Add PCIe port failed: %d\n", ret);
+		goto fail_host_init;
+	}
+
+	return 0;
+
+fail_host_init:
+	reset_control_assert(pcie->core_rst);
+	tegra_pcie_disable_phy(pcie);
+	reset_control_assert(pcie->core_apb_rst);
+	clk_disable_unprepare(pcie->core_clk);
+	regulator_disable(pcie->pex_ctl_reg);
+	config_plat_gpio(pcie, 0);
+	if (pcie->cid != CTRL_5)
+		tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+
+	return ret;
+}
+
+static int tegra_pcie_dw_suspend_late(struct device *dev)
+{
+	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+	u32 val;
+
+	if (!pcie->link_state)
+		return 0;
+
+	/* Enable HW_HOT_RST mode */
+	val = readl(pcie->appl_base + APPL_CTRL);
+	val &= ~(APPL_CTRL_HW_HOT_RST_MODE_MASK <<
+		  APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+	val |= APPL_CTRL_HW_HOT_RST_EN;
+	writel(val, pcie->appl_base + APPL_CTRL);
+
+	return 0;
+}
+
+static int tegra_pcie_dw_suspend_noirq(struct device *dev)
+{
+	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+	int ret = 0;
+
+	if (!pcie->link_state)
+		return 0;
+
+	/* save MSI interrutp vector*/
+	pcie->msi_ctrl_int = dw_pcie_readl_dbi(&pcie->pci,
+					       PORT_LOGIC_MSI_CTRL_INT_0_EN);
+	tegra_pcie_downstream_dev_to_D0(pcie);
+	tegra_pcie_dw_pme_turnoff(pcie);
+	reset_control_assert(pcie->core_rst);
+	tegra_pcie_disable_phy(pcie);
+	reset_control_assert(pcie->core_apb_rst);
+	clk_disable_unprepare(pcie->core_clk);
+	regulator_disable(pcie->pex_ctl_reg);
+	config_plat_gpio(pcie, 0);
+	if (pcie->cid != CTRL_5) {
+		ret = tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+		if (ret) {
+			dev_err(pcie->dev, "Disabling ctrl-%d failed:%d\n",
+				pcie->cid, ret);
+			return ret;
+		}
+	}
+	if (gpio_is_valid(pcie->pex_wake) && device_may_wakeup(dev)) {
+		ret = enable_irq_wake(gpio_to_irq(pcie->pex_wake));
+		if (ret < 0)
+			dev_err(dev, "enable wake irq failed: %d\n", ret);
+	}
+
+	return ret;
+}
+
+static int tegra_pcie_dw_resume_noirq(struct device *dev)
+{
+	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+	int ret;
+
+	if (!pcie->link_state)
+		return 0;
+
+	if (gpio_is_valid(pcie->pex_wake) && device_may_wakeup(dev)) {
+		ret = disable_irq_wake(gpio_to_irq(pcie->pex_wake));
+		if (ret < 0)
+			dev_err(dev, "disable wake irq failed: %d\n", ret);
+	}
+
+	ret = tegra_pcie_config_controller(pcie, true);
+	if (ret < 0)
+		return ret;
+
+	ret = tegra_pcie_dw_host_init(&pcie->pci.pp);
+	if (ret < 0) {
+		dev_err(dev, "failed to init host: %d\n", ret);
+		goto fail_host_init;
+	}
+
+	/* restore MSI interrutp vector*/
+	dw_pcie_writel_dbi(&pcie->pci, PORT_LOGIC_MSI_CTRL_INT_0_EN,
+			   pcie->msi_ctrl_int);
+
+	tegra_pcie_dw_scan_bus(&pcie->pci.pp);
+
+	return 0;
+fail_host_init:
+	reset_control_assert(pcie->core_rst);
+	tegra_pcie_disable_phy(pcie);
+	reset_control_assert(pcie->core_apb_rst);
+	clk_disable_unprepare(pcie->core_clk);
+	regulator_disable(pcie->pex_ctl_reg);
+	config_plat_gpio(pcie, 0);
+	if (pcie->cid != CTRL_5)
+		tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+
+	return ret;
+}
+
+static int tegra_pcie_dw_resume_early(struct device *dev)
+{
+	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+	u32 val;
+
+	if (!pcie->link_state)
+		return 0;
+
+	/* Disable HW_HOT_RST mode */
+	val = readl(pcie->appl_base + APPL_CTRL);
+	val &= ~(APPL_CTRL_HW_HOT_RST_MODE_MASK <<
+		  APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+	val |= APPL_CTRL_HW_HOT_RST_MODE_IMDT_RST <<
+		APPL_CTRL_HW_HOT_RST_MODE_SHIFT;
+	val &= ~APPL_CTRL_HW_HOT_RST_EN;
+	writel(val, pcie->appl_base + APPL_CTRL);
+
+	return 0;
+}
+
+static void tegra_pcie_dw_shutdown(struct platform_device *pdev)
+{
+	struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
+
+	if (pcie->mode == DW_PCIE_RC_TYPE) {
+		if (!pcie->link_state && pcie->power_down_en)
+			return;
+
+		debugfs_remove_recursive(pcie->debugfs);
+		tegra_pcie_downstream_dev_to_D0(pcie);
+
+		/* Disable interrupts */
+		disable_irq(pcie->pci.pp.irq);
+		if (IS_ENABLED(CONFIG_PCI_MSI))
+			disable_irq(pcie->pci.pp.msi_irq);
+
+		tegra_pcie_dw_pme_turnoff(pcie);
+
+		reset_control_assert(pcie->core_rst);
+		tegra_pcie_disable_phy(pcie);
+		reset_control_assert(pcie->core_apb_rst);
+		clk_disable_unprepare(pcie->core_clk);
+		regulator_disable(pcie->pex_ctl_reg);
+		config_plat_gpio(pcie, 0);
+		if (pcie->cid != CTRL_5)
+			tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+	}
+}
+
+static const struct dev_pm_ops tegra_pcie_dw_pm_ops = {
+	.suspend_late = tegra_pcie_dw_suspend_late,
+	.suspend_noirq = tegra_pcie_dw_suspend_noirq,
+	.resume_noirq = tegra_pcie_dw_resume_noirq,
+	.resume_early = tegra_pcie_dw_resume_early,
+	.runtime_suspend = tegra_pcie_dw_runtime_suspend,
+	.runtime_resume = tegra_pcie_dw_runtime_resume,
+};
+
+static struct platform_driver tegra_pcie_dw_driver = {
+	.probe = tegra_pcie_dw_probe,
+	.remove = tegra_pcie_dw_remove,
+	.shutdown = tegra_pcie_dw_shutdown,
+	.driver = {
+		.name	= "pcie-tegra",
+#ifdef CONFIG_PM
+		.pm = &tegra_pcie_dw_pm_ops,
+#endif
+		.of_match_table = tegra_pcie_dw_of_match,
+	},
+};
+module_platform_driver(tegra_pcie_dw_driver);
+
+MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA PCIe host controller driver");
+MODULE_LICENSE("GPL v2");