diff mbox series

[v4] PCI: dwc: pci-dra7xx: Fix MSI IRQ handling

Message ID 20200327095434.945-1-vigneshr@ti.com (mailing list archive)
State New, archived
Headers show
Series [v4] PCI: dwc: pci-dra7xx: Fix MSI IRQ handling | expand

Commit Message

Vignesh Raghavendra March 27, 2020, 9:54 a.m. UTC
Due an issue with PCIe wrapper around DWC PCIe IP on dra7xx, driver
needs to ensure that there are no pending MSI IRQ vector set (i.e
PCIE_MSI_INTR0_STATUS reads 0 at least once) before exiting IRQ handler.
Else, the dra7xx PCIe wrapper will not register new MSI IRQs even though
PCIE_MSI_INTR0_STATUS shows IRQs are pending.

Therefore its no longer possible to use default IRQ handler provided by
DWC library. So, add irqchip implementation inside pci-dra7xx.c and
install new MSI IRQ handler to handle above errata.

This fixes a bug, where PCIe wifi cards with 4 DMA queues like Intel
8260 used to throw following error and stall during ping/iperf3 tests.

[   97.776310] iwlwifi 0000:01:00.0: Queue 9 stuck for 2500 ms.

Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Kishon Vijay Abraham I <kishon@ti.com>
---

v4:
Simplify dra7xx_pcie_handle_msi_irq() by spliting inner loop into
separte function as suggested by Lorenzo

v3:
- Move loop to service all MSI IRQs into dra7xx_pcie_handle_msi_irq()
- Add a warning msg when loop counter overflows


 drivers/pci/controller/dwc/pci-dra7xx.c | 231 ++++++++++++++++++++----
 1 file changed, 195 insertions(+), 36 deletions(-)

Comments

Lorenzo Pieralisi March 27, 2020, 3:01 p.m. UTC | #1
On Fri, Mar 27, 2020 at 03:24:34PM +0530, Vignesh Raghavendra wrote:
> Due an issue with PCIe wrapper around DWC PCIe IP on dra7xx, driver
> needs to ensure that there are no pending MSI IRQ vector set (i.e
> PCIE_MSI_INTR0_STATUS reads 0 at least once) before exiting IRQ handler.
> Else, the dra7xx PCIe wrapper will not register new MSI IRQs even though
> PCIE_MSI_INTR0_STATUS shows IRQs are pending.
> 
> Therefore its no longer possible to use default IRQ handler provided by
> DWC library. So, add irqchip implementation inside pci-dra7xx.c and
> install new MSI IRQ handler to handle above errata.
> 
> This fixes a bug, where PCIe wifi cards with 4 DMA queues like Intel
> 8260 used to throw following error and stall during ping/iperf3 tests.
> 
> [   97.776310] iwlwifi 0000:01:00.0: Queue 9 stuck for 2500 ms.
> 
> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
> Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
> Tested-by: Kishon Vijay Abraham I <kishon@ti.com>
> ---
> 
> v4:
> Simplify dra7xx_pcie_handle_msi_irq() by spliting inner loop into
> separte function as suggested by Lorenzo
> 
> v3:
> - Move loop to service all MSI IRQs into dra7xx_pcie_handle_msi_irq()
> - Add a warning msg when loop counter overflows
> 
> 
>  drivers/pci/controller/dwc/pci-dra7xx.c | 231 ++++++++++++++++++++----
>  1 file changed, 195 insertions(+), 36 deletions(-)

Applied to pci/dwc, thanks.

Lorenzo

> diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c
> index 9bf7fa99b103..3b0e58f2de58 100644
> --- a/drivers/pci/controller/dwc/pci-dra7xx.c
> +++ b/drivers/pci/controller/dwc/pci-dra7xx.c
> @@ -215,10 +215,6 @@ static int dra7xx_pcie_host_init(struct pcie_port *pp)
>  	return 0;
>  }
>  
> -static const struct dw_pcie_host_ops dra7xx_pcie_host_ops = {
> -	.host_init = dra7xx_pcie_host_init,
> -};
> -
>  static int dra7xx_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
>  				irq_hw_number_t hwirq)
>  {
> @@ -233,43 +229,77 @@ static const struct irq_domain_ops intx_domain_ops = {
>  	.xlate = pci_irqd_intx_xlate,
>  };
>  
> -static int dra7xx_pcie_init_irq_domain(struct pcie_port *pp)
> +static int dra7xx_pcie_handle_msi(struct pcie_port *pp, int index)
>  {
>  	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> -	struct device *dev = pci->dev;
> -	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
> -	struct device_node *node = dev->of_node;
> -	struct device_node *pcie_intc_node =  of_get_next_child(node, NULL);
> +	unsigned long val;
> +	int pos, irq;
>  
> -	if (!pcie_intc_node) {
> -		dev_err(dev, "No PCIe Intc node found\n");
> -		return -ENODEV;
> -	}
> +	val = dw_pcie_readl_dbi(pci, PCIE_MSI_INTR0_STATUS +
> +				   (index * MSI_REG_CTRL_BLOCK_SIZE));
> +	if (!val)
> +		return 0;
>  
> -	dra7xx->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
> -						   &intx_domain_ops, pp);
> -	of_node_put(pcie_intc_node);
> -	if (!dra7xx->irq_domain) {
> -		dev_err(dev, "Failed to get a INTx IRQ domain\n");
> -		return -ENODEV;
> +	pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, 0);
> +	while (pos != MAX_MSI_IRQS_PER_CTRL) {
> +		irq = irq_find_mapping(pp->irq_domain,
> +				       (index * MAX_MSI_IRQS_PER_CTRL) + pos);
> +		generic_handle_irq(irq);
> +		pos++;
> +		pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, pos);
>  	}
>  
> -	return 0;
> +	return 1;
>  }
>  
> -static irqreturn_t dra7xx_pcie_msi_irq_handler(int irq, void *arg)
> +static void dra7xx_pcie_handle_msi_irq(struct pcie_port *pp)
>  {
> -	struct dra7xx_pcie *dra7xx = arg;
> -	struct dw_pcie *pci = dra7xx->pci;
> -	struct pcie_port *pp = &pci->pp;
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	int ret, i, count, num_ctrls;
> +
> +	num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL;
> +
> +	/**
> +	 * Need to make sure all MSI status bits read 0 before exiting.
> +	 * Else, new MSI IRQs are not registered by the wrapper. Have an
> +	 * upperbound for the loop and exit the IRQ in case of IRQ flood
> +	 * to avoid locking up system in interrupt context.
> +	 */
> +	count = 0;
> +	do {
> +		ret = 0;
> +
> +		for (i = 0; i < num_ctrls; i++)
> +			ret |= dra7xx_pcie_handle_msi(pp, i);
> +		count++;
> +	} while (ret && count <= 1000);
> +
> +	if (count > 1000)
> +		dev_warn_ratelimited(pci->dev,
> +				     "Too many MSI IRQs to handle\n");
> +}
> +
> +static void dra7xx_pcie_msi_irq_handler(struct irq_desc *desc)
> +{
> +	struct irq_chip *chip = irq_desc_get_chip(desc);
> +	struct dra7xx_pcie *dra7xx;
> +	struct dw_pcie *pci;
> +	struct pcie_port *pp;
>  	unsigned long reg;
>  	u32 virq, bit;
>  
> +	chained_irq_enter(chip, desc);
> +
> +	pp = irq_desc_get_handler_data(desc);
> +	pci = to_dw_pcie_from_pp(pp);
> +	dra7xx = to_dra7xx_pcie(pci);
> +
>  	reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI);
> +	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI, reg);
>  
>  	switch (reg) {
>  	case MSI:
> -		dw_handle_msi_irq(pp);
> +		dra7xx_pcie_handle_msi_irq(pp);
>  		break;
>  	case INTA:
>  	case INTB:
> @@ -283,9 +313,7 @@ static irqreturn_t dra7xx_pcie_msi_irq_handler(int irq, void *arg)
>  		break;
>  	}
>  
> -	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI, reg);
> -
> -	return IRQ_HANDLED;
> +	chained_irq_exit(chip, desc);
>  }
>  
>  static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg)
> @@ -347,6 +375,145 @@ static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg)
>  	return IRQ_HANDLED;
>  }
>  
> +static int dra7xx_pcie_init_irq_domain(struct pcie_port *pp)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	struct device *dev = pci->dev;
> +	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
> +	struct device_node *node = dev->of_node;
> +	struct device_node *pcie_intc_node =  of_get_next_child(node, NULL);
> +
> +	if (!pcie_intc_node) {
> +		dev_err(dev, "No PCIe Intc node found\n");
> +		return -ENODEV;
> +	}
> +
> +	irq_set_chained_handler_and_data(pp->irq, dra7xx_pcie_msi_irq_handler,
> +					 pp);
> +	dra7xx->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
> +						   &intx_domain_ops, pp);
> +	of_node_put(pcie_intc_node);
> +	if (!dra7xx->irq_domain) {
> +		dev_err(dev, "Failed to get a INTx IRQ domain\n");
> +		return -ENODEV;
> +	}
> +
> +	return 0;
> +}
> +
> +static void dra7xx_pcie_setup_msi_msg(struct irq_data *d, struct msi_msg *msg)
> +{
> +	struct pcie_port *pp = irq_data_get_irq_chip_data(d);
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	u64 msi_target;
> +
> +	msi_target = (u64)pp->msi_data;
> +
> +	msg->address_lo = lower_32_bits(msi_target);
> +	msg->address_hi = upper_32_bits(msi_target);
> +
> +	msg->data = d->hwirq;
> +
> +	dev_dbg(pci->dev, "msi#%d address_hi %#x address_lo %#x\n",
> +		(int)d->hwirq, msg->address_hi, msg->address_lo);
> +}
> +
> +static int dra7xx_pcie_msi_set_affinity(struct irq_data *d,
> +					const struct cpumask *mask,
> +					bool force)
> +{
> +	return -EINVAL;
> +}
> +
> +static void dra7xx_pcie_bottom_mask(struct irq_data *d)
> +{
> +	struct pcie_port *pp = irq_data_get_irq_chip_data(d);
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	unsigned int res, bit, ctrl;
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&pp->lock, flags);
> +
> +	ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
> +	res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
> +	bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
> +
> +	pp->irq_mask[ctrl] |= BIT(bit);
> +	dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res,
> +			   pp->irq_mask[ctrl]);
> +
> +	raw_spin_unlock_irqrestore(&pp->lock, flags);
> +}
> +
> +static void dra7xx_pcie_bottom_unmask(struct irq_data *d)
> +{
> +	struct pcie_port *pp = irq_data_get_irq_chip_data(d);
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	unsigned int res, bit, ctrl;
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&pp->lock, flags);
> +
> +	ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
> +	res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
> +	bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
> +
> +	pp->irq_mask[ctrl] &= ~BIT(bit);
> +	dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res,
> +			   pp->irq_mask[ctrl]);
> +
> +	raw_spin_unlock_irqrestore(&pp->lock, flags);
> +}
> +
> +static void dra7xx_pcie_bottom_ack(struct irq_data *d)
> +{
> +	struct pcie_port *pp  = irq_data_get_irq_chip_data(d);
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	unsigned int res, bit, ctrl;
> +
> +	ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
> +	res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
> +	bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
> +
> +	dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_STATUS + res, BIT(bit));
> +}
> +
> +static struct irq_chip dra7xx_pci_msi_bottom_irq_chip = {
> +	.name = "DRA7XX-PCI-MSI",
> +	.irq_ack = dra7xx_pcie_bottom_ack,
> +	.irq_compose_msi_msg = dra7xx_pcie_setup_msi_msg,
> +	.irq_set_affinity = dra7xx_pcie_msi_set_affinity,
> +	.irq_mask = dra7xx_pcie_bottom_mask,
> +	.irq_unmask = dra7xx_pcie_bottom_unmask,
> +};
> +
> +static int dra7xx_pcie_msi_host_init(struct pcie_port *pp)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	u32 ctrl, num_ctrls;
> +
> +	pp->msi_irq_chip = &dra7xx_pci_msi_bottom_irq_chip;
> +
> +	num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL;
> +	/* Initialize IRQ Status array */
> +	for (ctrl = 0; ctrl < num_ctrls; ctrl++) {
> +		pp->irq_mask[ctrl] = ~0;
> +		dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK +
> +				    (ctrl * MSI_REG_CTRL_BLOCK_SIZE),
> +				    pp->irq_mask[ctrl]);
> +		dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_ENABLE +
> +				    (ctrl * MSI_REG_CTRL_BLOCK_SIZE),
> +				    ~0);
> +	}
> +
> +	return dw_pcie_allocate_domains(pp);
> +}
> +
> +static const struct dw_pcie_host_ops dra7xx_pcie_host_ops = {
> +	.host_init = dra7xx_pcie_host_init,
> +	.msi_host_init = dra7xx_pcie_msi_host_init,
> +};
> +
>  static void dra7xx_pcie_ep_init(struct dw_pcie_ep *ep)
>  {
>  	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> @@ -467,14 +634,6 @@ static int __init dra7xx_add_pcie_port(struct dra7xx_pcie *dra7xx,
>  		return pp->irq;
>  	}
>  
> -	ret = devm_request_irq(dev, pp->irq, dra7xx_pcie_msi_irq_handler,
> -			       IRQF_SHARED | IRQF_NO_THREAD,
> -			       "dra7-pcie-msi",	dra7xx);
> -	if (ret) {
> -		dev_err(dev, "failed to request irq\n");
> -		return ret;
> -	}
> -
>  	ret = dra7xx_pcie_init_irq_domain(pp);
>  	if (ret < 0)
>  		return ret;
> -- 
> 2.26.0
>
Bjorn Helgaas March 30, 2020, 4:29 p.m. UTC | #2
[+cc Marc, Thomas]

On Fri, Mar 27, 2020 at 03:24:34PM +0530, Vignesh Raghavendra wrote:
> Due an issue with PCIe wrapper around DWC PCIe IP on dra7xx, driver
> needs to ensure that there are no pending MSI IRQ vector set (i.e
> PCIE_MSI_INTR0_STATUS reads 0 at least once) before exiting IRQ handler.
> Else, the dra7xx PCIe wrapper will not register new MSI IRQs even though
> PCIE_MSI_INTR0_STATUS shows IRQs are pending.

I'm not an IRQ guy (real IRQ guys CC'd), but I'm wondering if this is
really a symptom of a problem in the generic DWC IRQ handling, not a
problem in dra7xx itself.

I thought it was sort of standard behavior that a device would not
send a new MSI unless there was a transition from "no status bits set"
to "at least one status bit set".  I'm looking at this text from the
PCIe r5.0 spec, sec 6.7.3.4:

  If the Port is enabled for edge-triggered interrupt signaling using
  MSI or MSI-X, an interrupt message must be sent every time the
  logical AND of the following conditions transitions from FALSE to
  TRUE:

    - The associated vector is unmasked (not applicable if MSI does
      not support PVM).

    - The Hot-Plug Interrupt Enable bit in the Slot Control register
      is set to 1b.

    - At least one hot-plug event status bit in the Slot Status
      register and its associated enable bit in the Slot Control
      register are both set to 1b.

and this related commit: https://git.kernel.org/linus/fad214b0aa72

> Therefore its no longer possible to use default IRQ handler provided by
> DWC library. So, add irqchip implementation inside pci-dra7xx.c and
> install new MSI IRQ handler to handle above errata.
> 
> This fixes a bug, where PCIe wifi cards with 4 DMA queues like Intel
> 8260 used to throw following error and stall during ping/iperf3 tests.
> 
> [   97.776310] iwlwifi 0000:01:00.0: Queue 9 stuck for 2500 ms.
> 
> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
> Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
> Tested-by: Kishon Vijay Abraham I <kishon@ti.com>
> ---
> 
> v4:
> Simplify dra7xx_pcie_handle_msi_irq() by spliting inner loop into
> separte function as suggested by Lorenzo
> 
> v3:
> - Move loop to service all MSI IRQs into dra7xx_pcie_handle_msi_irq()
> - Add a warning msg when loop counter overflows
> 
> 
>  drivers/pci/controller/dwc/pci-dra7xx.c | 231 ++++++++++++++++++++----
>  1 file changed, 195 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c
> index 9bf7fa99b103..3b0e58f2de58 100644
> --- a/drivers/pci/controller/dwc/pci-dra7xx.c
> +++ b/drivers/pci/controller/dwc/pci-dra7xx.c
> @@ -215,10 +215,6 @@ static int dra7xx_pcie_host_init(struct pcie_port *pp)
>  	return 0;
>  }
>  
> -static const struct dw_pcie_host_ops dra7xx_pcie_host_ops = {
> -	.host_init = dra7xx_pcie_host_init,
> -};
> -
>  static int dra7xx_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
>  				irq_hw_number_t hwirq)
>  {
> @@ -233,43 +229,77 @@ static const struct irq_domain_ops intx_domain_ops = {
>  	.xlate = pci_irqd_intx_xlate,
>  };
>  
> -static int dra7xx_pcie_init_irq_domain(struct pcie_port *pp)
> +static int dra7xx_pcie_handle_msi(struct pcie_port *pp, int index)
>  {
>  	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> -	struct device *dev = pci->dev;
> -	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
> -	struct device_node *node = dev->of_node;
> -	struct device_node *pcie_intc_node =  of_get_next_child(node, NULL);
> +	unsigned long val;
> +	int pos, irq;
>  
> -	if (!pcie_intc_node) {
> -		dev_err(dev, "No PCIe Intc node found\n");
> -		return -ENODEV;
> -	}
> +	val = dw_pcie_readl_dbi(pci, PCIE_MSI_INTR0_STATUS +
> +				   (index * MSI_REG_CTRL_BLOCK_SIZE));
> +	if (!val)
> +		return 0;
>  
> -	dra7xx->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
> -						   &intx_domain_ops, pp);
> -	of_node_put(pcie_intc_node);
> -	if (!dra7xx->irq_domain) {
> -		dev_err(dev, "Failed to get a INTx IRQ domain\n");
> -		return -ENODEV;
> +	pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, 0);
> +	while (pos != MAX_MSI_IRQS_PER_CTRL) {
> +		irq = irq_find_mapping(pp->irq_domain,
> +				       (index * MAX_MSI_IRQS_PER_CTRL) + pos);
> +		generic_handle_irq(irq);
> +		pos++;
> +		pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, pos);
>  	}
>  
> -	return 0;
> +	return 1;
>  }
>  
> -static irqreturn_t dra7xx_pcie_msi_irq_handler(int irq, void *arg)
> +static void dra7xx_pcie_handle_msi_irq(struct pcie_port *pp)
>  {
> -	struct dra7xx_pcie *dra7xx = arg;
> -	struct dw_pcie *pci = dra7xx->pci;
> -	struct pcie_port *pp = &pci->pp;
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	int ret, i, count, num_ctrls;
> +
> +	num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL;
> +
> +	/**
> +	 * Need to make sure all MSI status bits read 0 before exiting.
> +	 * Else, new MSI IRQs are not registered by the wrapper. Have an
> +	 * upperbound for the loop and exit the IRQ in case of IRQ flood
> +	 * to avoid locking up system in interrupt context.
> +	 */
> +	count = 0;
> +	do {
> +		ret = 0;
> +
> +		for (i = 0; i < num_ctrls; i++)
> +			ret |= dra7xx_pcie_handle_msi(pp, i);
> +		count++;
> +	} while (ret && count <= 1000);
> +
> +	if (count > 1000)
> +		dev_warn_ratelimited(pci->dev,
> +				     "Too many MSI IRQs to handle\n");
> +}
> +
> +static void dra7xx_pcie_msi_irq_handler(struct irq_desc *desc)
> +{
> +	struct irq_chip *chip = irq_desc_get_chip(desc);
> +	struct dra7xx_pcie *dra7xx;
> +	struct dw_pcie *pci;
> +	struct pcie_port *pp;
>  	unsigned long reg;
>  	u32 virq, bit;
>  
> +	chained_irq_enter(chip, desc);
> +
> +	pp = irq_desc_get_handler_data(desc);
> +	pci = to_dw_pcie_from_pp(pp);
> +	dra7xx = to_dra7xx_pcie(pci);
> +
>  	reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI);
> +	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI, reg);
>  
>  	switch (reg) {
>  	case MSI:
> -		dw_handle_msi_irq(pp);
> +		dra7xx_pcie_handle_msi_irq(pp);
>  		break;
>  	case INTA:
>  	case INTB:
> @@ -283,9 +313,7 @@ static irqreturn_t dra7xx_pcie_msi_irq_handler(int irq, void *arg)
>  		break;
>  	}
>  
> -	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI, reg);
> -
> -	return IRQ_HANDLED;
> +	chained_irq_exit(chip, desc);
>  }
>  
>  static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg)
> @@ -347,6 +375,145 @@ static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg)
>  	return IRQ_HANDLED;
>  }
>  
> +static int dra7xx_pcie_init_irq_domain(struct pcie_port *pp)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	struct device *dev = pci->dev;
> +	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
> +	struct device_node *node = dev->of_node;
> +	struct device_node *pcie_intc_node =  of_get_next_child(node, NULL);
> +
> +	if (!pcie_intc_node) {
> +		dev_err(dev, "No PCIe Intc node found\n");
> +		return -ENODEV;
> +	}
> +
> +	irq_set_chained_handler_and_data(pp->irq, dra7xx_pcie_msi_irq_handler,
> +					 pp);
> +	dra7xx->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
> +						   &intx_domain_ops, pp);
> +	of_node_put(pcie_intc_node);
> +	if (!dra7xx->irq_domain) {
> +		dev_err(dev, "Failed to get a INTx IRQ domain\n");
> +		return -ENODEV;
> +	}
> +
> +	return 0;
> +}
> +
> +static void dra7xx_pcie_setup_msi_msg(struct irq_data *d, struct msi_msg *msg)
> +{
> +	struct pcie_port *pp = irq_data_get_irq_chip_data(d);
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	u64 msi_target;
> +
> +	msi_target = (u64)pp->msi_data;
> +
> +	msg->address_lo = lower_32_bits(msi_target);
> +	msg->address_hi = upper_32_bits(msi_target);
> +
> +	msg->data = d->hwirq;
> +
> +	dev_dbg(pci->dev, "msi#%d address_hi %#x address_lo %#x\n",
> +		(int)d->hwirq, msg->address_hi, msg->address_lo);
> +}
> +
> +static int dra7xx_pcie_msi_set_affinity(struct irq_data *d,
> +					const struct cpumask *mask,
> +					bool force)
> +{
> +	return -EINVAL;
> +}
> +
> +static void dra7xx_pcie_bottom_mask(struct irq_data *d)
> +{
> +	struct pcie_port *pp = irq_data_get_irq_chip_data(d);
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	unsigned int res, bit, ctrl;
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&pp->lock, flags);
> +
> +	ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
> +	res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
> +	bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
> +
> +	pp->irq_mask[ctrl] |= BIT(bit);
> +	dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res,
> +			   pp->irq_mask[ctrl]);
> +
> +	raw_spin_unlock_irqrestore(&pp->lock, flags);
> +}
> +
> +static void dra7xx_pcie_bottom_unmask(struct irq_data *d)
> +{
> +	struct pcie_port *pp = irq_data_get_irq_chip_data(d);
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	unsigned int res, bit, ctrl;
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&pp->lock, flags);
> +
> +	ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
> +	res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
> +	bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
> +
> +	pp->irq_mask[ctrl] &= ~BIT(bit);
> +	dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res,
> +			   pp->irq_mask[ctrl]);
> +
> +	raw_spin_unlock_irqrestore(&pp->lock, flags);
> +}
> +
> +static void dra7xx_pcie_bottom_ack(struct irq_data *d)
> +{
> +	struct pcie_port *pp  = irq_data_get_irq_chip_data(d);
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	unsigned int res, bit, ctrl;
> +
> +	ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
> +	res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
> +	bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
> +
> +	dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_STATUS + res, BIT(bit));
> +}
> +
> +static struct irq_chip dra7xx_pci_msi_bottom_irq_chip = {
> +	.name = "DRA7XX-PCI-MSI",
> +	.irq_ack = dra7xx_pcie_bottom_ack,
> +	.irq_compose_msi_msg = dra7xx_pcie_setup_msi_msg,
> +	.irq_set_affinity = dra7xx_pcie_msi_set_affinity,
> +	.irq_mask = dra7xx_pcie_bottom_mask,
> +	.irq_unmask = dra7xx_pcie_bottom_unmask,
> +};
> +
> +static int dra7xx_pcie_msi_host_init(struct pcie_port *pp)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	u32 ctrl, num_ctrls;
> +
> +	pp->msi_irq_chip = &dra7xx_pci_msi_bottom_irq_chip;
> +
> +	num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL;
> +	/* Initialize IRQ Status array */
> +	for (ctrl = 0; ctrl < num_ctrls; ctrl++) {
> +		pp->irq_mask[ctrl] = ~0;
> +		dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK +
> +				    (ctrl * MSI_REG_CTRL_BLOCK_SIZE),
> +				    pp->irq_mask[ctrl]);
> +		dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_ENABLE +
> +				    (ctrl * MSI_REG_CTRL_BLOCK_SIZE),
> +				    ~0);
> +	}
> +
> +	return dw_pcie_allocate_domains(pp);
> +}
> +
> +static const struct dw_pcie_host_ops dra7xx_pcie_host_ops = {
> +	.host_init = dra7xx_pcie_host_init,
> +	.msi_host_init = dra7xx_pcie_msi_host_init,
> +};
> +
>  static void dra7xx_pcie_ep_init(struct dw_pcie_ep *ep)
>  {
>  	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> @@ -467,14 +634,6 @@ static int __init dra7xx_add_pcie_port(struct dra7xx_pcie *dra7xx,
>  		return pp->irq;
>  	}
>  
> -	ret = devm_request_irq(dev, pp->irq, dra7xx_pcie_msi_irq_handler,
> -			       IRQF_SHARED | IRQF_NO_THREAD,
> -			       "dra7-pcie-msi",	dra7xx);
> -	if (ret) {
> -		dev_err(dev, "failed to request irq\n");
> -		return ret;
> -	}
> -
>  	ret = dra7xx_pcie_init_irq_domain(pp);
>  	if (ret < 0)
>  		return ret;
> -- 
> 2.26.0
>
Bjorn Helgaas March 30, 2020, 4:37 p.m. UTC | #3
On Mon, Mar 30, 2020 at 11:29:52AM -0500, Bjorn Helgaas wrote:
> [+cc Marc, Thomas]
> 
> On Fri, Mar 27, 2020 at 03:24:34PM +0530, Vignesh Raghavendra wrote:
> > Due an issue with PCIe wrapper around DWC PCIe IP on dra7xx, driver
> > needs to ensure that there are no pending MSI IRQ vector set (i.e
> > PCIE_MSI_INTR0_STATUS reads 0 at least once) before exiting IRQ handler.
> > Else, the dra7xx PCIe wrapper will not register new MSI IRQs even though
> > PCIE_MSI_INTR0_STATUS shows IRQs are pending.
> 
> I'm not an IRQ guy (real IRQ guys CC'd), but I'm wondering if this is
> really a symptom of a problem in the generic DWC IRQ handling, not a
> problem in dra7xx itself.
> 
> I thought it was sort of standard behavior that a device would not
> send a new MSI unless there was a transition from "no status bits set"
> to "at least one status bit set".  I'm looking at this text from the
> PCIe r5.0 spec, sec 6.7.3.4:
> 
>   If the Port is enabled for edge-triggered interrupt signaling using
>   MSI or MSI-X, an interrupt message must be sent every time the
>   logical AND of the following conditions transitions from FALSE to
>   TRUE:
> 
>     - The associated vector is unmasked (not applicable if MSI does
>       not support PVM).
> 
>     - The Hot-Plug Interrupt Enable bit in the Slot Control register
>       is set to 1b.
> 
>     - At least one hot-plug event status bit in the Slot Status
>       register and its associated enable bit in the Slot Control
>       register are both set to 1b.
> 
> and this related commit: https://git.kernel.org/linus/fad214b0aa72

and this one: https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git/commit/?id=87d94ad41bd2
Thomas Gleixner March 30, 2020, 9:12 p.m. UTC | #4
Bjorn,

Bjorn Helgaas <helgaas@kernel.org> writes:
> On Fri, Mar 27, 2020 at 03:24:34PM +0530, Vignesh Raghavendra wrote:
>> Due an issue with PCIe wrapper around DWC PCIe IP on dra7xx, driver
>> needs to ensure that there are no pending MSI IRQ vector set (i.e
>> PCIE_MSI_INTR0_STATUS reads 0 at least once) before exiting IRQ handler.
>> Else, the dra7xx PCIe wrapper will not register new MSI IRQs even though
>> PCIE_MSI_INTR0_STATUS shows IRQs are pending.
>
> I'm not an IRQ guy (real IRQ guys CC'd), but I'm wondering if this is
> really a symptom of a problem in the generic DWC IRQ handling, not a
> problem in dra7xx itself.
>
> I thought it was sort of standard behavior that a device would not
> send a new MSI unless there was a transition from "no status bits set"
> to "at least one status bit set".  I'm looking at this text from the
> PCIe r5.0 spec, sec 6.7.3.4:

That's for the device side. But this is the host side and that consists
of two components:

     1) The actual PCIe host controller (DWC)

     2) Some hardware wrapper around #1 to glue the host controller IP
        into the TI SoC.

#1 contains a MSI message receiver unit. PCIE_MSI_INTR0_STATUS is part
that.

If there is a MSI message sent to the host then the bit which is
corresponding to the sent message (vector) is set in the status
register. If a bit is set in the status register then the host
controller raises an interrupt at its output.

Here, if I deciphered the above changelog correctly, comes the wrapper
glue #2 into play, which seems to be involved in forwarding the host
controller interrupt to the CPU's interrupt controller (GIC) and that
forwarding mechanism seems to have some issue.

The changelog is unspecific enough, so I can't explain for sure how the
wrapper hardware confuses itself.

My assumption is that after the host controller raised an interrupt at
the input of the wrapper logic the wrapper logic requires a transition
back to quiescent state before it can raise another interrupt in the
GIC. And that seems to require that all bits of PCIE_MSI_INTR0_STATUS
are cleared.

If my interpretation is correct, then the DWC side is not at fault and
it's soleley the TI specific hardware glue which makes this thing
misbehave.

Of course I might be completely wrong, but the TI folks should be able
to tell.

OTOH, what I do not understand in the patch is the reimplementation of
the interrupt chip. All functions are copies, except for the actual
register writes.

The drax version uses dw_pcie_writel_dbi() which invokes
dw_pcie_write_dbi() and that function does:

void dw_pcie_write_dbi(struct dw_pcie *pci, u32 reg, size_t size, u32 val)
{
	int ret;

	if (pci->ops->write_dbi) {
		pci->ops->write_dbi(pci, pci->dbi_base, reg, size, val);
		return;
	}

	ret = dw_pcie_write(pci->dbi_base + reg, size, val);
	if (ret)
		dev_err(pci->dev, "Write DBI address failed\n");
}

The dwc version uses dw_pcie_wr_own_conf() which does:

static int dw_pcie_wr_own_conf(struct pcie_port *pp, int where, int size,
			       u32 val)
{
	struct dw_pcie *pci;

	if (pp->ops->wr_own_conf)
		return pp->ops->wr_own_conf(pp, where, size, val);

	pci = to_dw_pcie_from_pp(pp);
	return dw_pcie_write(pci->dbi_base + where, size, val);
}

As dra7xx does neither implement pp->ops->wr_own_conf() nor
pci->ops->write_dbi. Ergo the interrupt chip is just a copy with some
obfuscation.

I might be missing some detail, but asided of the actual interrupt
service routine, this looks like a massive copy and paste orgy.

Thanks,

        tglx
Vignesh Raghavendra March 31, 2020, 11:05 a.m. UTC | #5
On 30/03/20 10:07 pm, Bjorn Helgaas wrote:
> On Mon, Mar 30, 2020 at 11:29:52AM -0500, Bjorn Helgaas wrote:
>> [+cc Marc, Thomas]
>>
>> On Fri, Mar 27, 2020 at 03:24:34PM +0530, Vignesh Raghavendra wrote:
>>> Due an issue with PCIe wrapper around DWC PCIe IP on dra7xx, driver
>>> needs to ensure that there are no pending MSI IRQ vector set (i.e
>>> PCIE_MSI_INTR0_STATUS reads 0 at least once) before exiting IRQ handler.
>>> Else, the dra7xx PCIe wrapper will not register new MSI IRQs even though
>>> PCIE_MSI_INTR0_STATUS shows IRQs are pending.
>>
>> I'm not an IRQ guy (real IRQ guys CC'd), but I'm wondering if this is
>> really a symptom of a problem in the generic DWC IRQ handling, not a
>> problem in dra7xx itself.
>>
>> I thought it was sort of standard behavior that a device would not
>> send a new MSI unless there was a transition from "no status bits set"
>> to "at least one status bit set".  I'm looking at this text from the
>> PCIe r5.0 spec, sec 6.7.3.4:


This patch is addressing an issue wrt how DWC PCIe MSI IRQ status is
aggregated at TI wrapper level:

There is a single MSI status bit which is supposed to be logical OR of
all the MSI IRQ status bits inside the DWC wrapper.  So if any of the
MSI IRQ status bits are set then this bit should read 1 and raise an
interrupt to CPU.
IRQ handler would then go through each MSI bit (inside DWC) and call
corresponding handler and then clear individual bits.
So the expectation was that wrapper level MSI status bit would auto
clear once all the DWC level MSI bits are cleared or wrapper will keep
the interrupt line asserted if there are still some outstanding ones.

But unfortunately that does not seem to be the case, MSI status bit in
the wrapper needs to be cleared manually. And moreover, once wrapper
level bit is cleared, the observation is that all the IRQ status bit
inside the DWC should be handled completely (i.e all the registers
should read 0 at least once) and only then is a new MSI IRQ guaranteed
to be recognized by the wrapper.

During debug without this commit, I often saw that DWC level MSI bit was
set (and IRQ status in endpoint's register was also set) but wrapper
level MSI bit was not set and host CPU never received an interrupt
therefore causing endpoint drivers to timeout waiting for certain events.

Regards
Vignesh

>>
>>   If the Port is enabled for edge-triggered interrupt signaling using
>>   MSI or MSI-X, an interrupt message must be sent every time the
>>   logical AND of the following conditions transitions from FALSE to
>>   TRUE:
>>
>>     - The associated vector is unmasked (not applicable if MSI does
>>       not support PVM).
>>
>>     - The Hot-Plug Interrupt Enable bit in the Slot Control register
>>       is set to 1b.
>>
>>     - At least one hot-plug event status bit in the Slot Status
>>       register and its associated enable bit in the Slot Control
>>       register are both set to 1b.
>>
>> and this related commit: https://git.kernel.org/linus/fad214b0aa72
> 
> and this one: https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git/commit/?id=87d94ad41bd2
> 

I am not sure how these fixes can be relevant to my problem.
Bjorn Helgaas March 31, 2020, 4:09 p.m. UTC | #6
On Mon, Mar 30, 2020 at 11:12:10PM +0200, Thomas Gleixner wrote:
> Bjorn Helgaas <helgaas@kernel.org> writes:
> > On Fri, Mar 27, 2020 at 03:24:34PM +0530, Vignesh Raghavendra wrote:
> >> Due an issue with PCIe wrapper around DWC PCIe IP on dra7xx, driver
> >> needs to ensure that there are no pending MSI IRQ vector set (i.e
> >> PCIE_MSI_INTR0_STATUS reads 0 at least once) before exiting IRQ handler.
> >> Else, the dra7xx PCIe wrapper will not register new MSI IRQs even though
> >> PCIE_MSI_INTR0_STATUS shows IRQs are pending.
> >
> > I'm not an IRQ guy (real IRQ guys CC'd), but I'm wondering if this is
> > really a symptom of a problem in the generic DWC IRQ handling, not a
> > problem in dra7xx itself.
> >
> > I thought it was sort of standard behavior that a device would not
> > send a new MSI unless there was a transition from "no status bits set"
> > to "at least one status bit set".  I'm looking at this text from the
> > PCIe r5.0 spec, sec 6.7.3.4:
> 
> That's for the device side. But this is the host side and that consists
> of two components:
> 
>      1) The actual PCIe host controller (DWC)
> 
>      2) Some hardware wrapper around #1 to glue the host controller IP
>         into the TI SoC.
> 
> #1 contains a MSI message receiver unit. PCIE_MSI_INTR0_STATUS is part
> that.
> 
> If there is a MSI message sent to the host then the bit which is
> corresponding to the sent message (vector) is set in the status
> register. If a bit is set in the status register then the host
> controller raises an interrupt at its output.
> 
> Here, if I deciphered the above changelog correctly, comes the wrapper
> glue #2 into play, which seems to be involved in forwarding the host
> controller interrupt to the CPU's interrupt controller (GIC) and that
> forwarding mechanism seems to have some issue.

Sorry for muddying the waters, and thanks for clarifying it, Thomas.

This patch is on its way to v5.7, and I guess we'll worry about
whether the interrupt chip reimplementation is overkill later.

Bjorn
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c
index 9bf7fa99b103..3b0e58f2de58 100644
--- a/drivers/pci/controller/dwc/pci-dra7xx.c
+++ b/drivers/pci/controller/dwc/pci-dra7xx.c
@@ -215,10 +215,6 @@  static int dra7xx_pcie_host_init(struct pcie_port *pp)
 	return 0;
 }
 
-static const struct dw_pcie_host_ops dra7xx_pcie_host_ops = {
-	.host_init = dra7xx_pcie_host_init,
-};
-
 static int dra7xx_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
 				irq_hw_number_t hwirq)
 {
@@ -233,43 +229,77 @@  static const struct irq_domain_ops intx_domain_ops = {
 	.xlate = pci_irqd_intx_xlate,
 };
 
-static int dra7xx_pcie_init_irq_domain(struct pcie_port *pp)
+static int dra7xx_pcie_handle_msi(struct pcie_port *pp, int index)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
-	struct device *dev = pci->dev;
-	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
-	struct device_node *node = dev->of_node;
-	struct device_node *pcie_intc_node =  of_get_next_child(node, NULL);
+	unsigned long val;
+	int pos, irq;
 
-	if (!pcie_intc_node) {
-		dev_err(dev, "No PCIe Intc node found\n");
-		return -ENODEV;
-	}
+	val = dw_pcie_readl_dbi(pci, PCIE_MSI_INTR0_STATUS +
+				   (index * MSI_REG_CTRL_BLOCK_SIZE));
+	if (!val)
+		return 0;
 
-	dra7xx->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
-						   &intx_domain_ops, pp);
-	of_node_put(pcie_intc_node);
-	if (!dra7xx->irq_domain) {
-		dev_err(dev, "Failed to get a INTx IRQ domain\n");
-		return -ENODEV;
+	pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, 0);
+	while (pos != MAX_MSI_IRQS_PER_CTRL) {
+		irq = irq_find_mapping(pp->irq_domain,
+				       (index * MAX_MSI_IRQS_PER_CTRL) + pos);
+		generic_handle_irq(irq);
+		pos++;
+		pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, pos);
 	}
 
-	return 0;
+	return 1;
 }
 
-static irqreturn_t dra7xx_pcie_msi_irq_handler(int irq, void *arg)
+static void dra7xx_pcie_handle_msi_irq(struct pcie_port *pp)
 {
-	struct dra7xx_pcie *dra7xx = arg;
-	struct dw_pcie *pci = dra7xx->pci;
-	struct pcie_port *pp = &pci->pp;
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	int ret, i, count, num_ctrls;
+
+	num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL;
+
+	/**
+	 * Need to make sure all MSI status bits read 0 before exiting.
+	 * Else, new MSI IRQs are not registered by the wrapper. Have an
+	 * upperbound for the loop and exit the IRQ in case of IRQ flood
+	 * to avoid locking up system in interrupt context.
+	 */
+	count = 0;
+	do {
+		ret = 0;
+
+		for (i = 0; i < num_ctrls; i++)
+			ret |= dra7xx_pcie_handle_msi(pp, i);
+		count++;
+	} while (ret && count <= 1000);
+
+	if (count > 1000)
+		dev_warn_ratelimited(pci->dev,
+				     "Too many MSI IRQs to handle\n");
+}
+
+static void dra7xx_pcie_msi_irq_handler(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct dra7xx_pcie *dra7xx;
+	struct dw_pcie *pci;
+	struct pcie_port *pp;
 	unsigned long reg;
 	u32 virq, bit;
 
+	chained_irq_enter(chip, desc);
+
+	pp = irq_desc_get_handler_data(desc);
+	pci = to_dw_pcie_from_pp(pp);
+	dra7xx = to_dra7xx_pcie(pci);
+
 	reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI);
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI, reg);
 
 	switch (reg) {
 	case MSI:
-		dw_handle_msi_irq(pp);
+		dra7xx_pcie_handle_msi_irq(pp);
 		break;
 	case INTA:
 	case INTB:
@@ -283,9 +313,7 @@  static irqreturn_t dra7xx_pcie_msi_irq_handler(int irq, void *arg)
 		break;
 	}
 
-	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI, reg);
-
-	return IRQ_HANDLED;
+	chained_irq_exit(chip, desc);
 }
 
 static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg)
@@ -347,6 +375,145 @@  static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+static int dra7xx_pcie_init_irq_domain(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct device *dev = pci->dev;
+	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
+	struct device_node *node = dev->of_node;
+	struct device_node *pcie_intc_node =  of_get_next_child(node, NULL);
+
+	if (!pcie_intc_node) {
+		dev_err(dev, "No PCIe Intc node found\n");
+		return -ENODEV;
+	}
+
+	irq_set_chained_handler_and_data(pp->irq, dra7xx_pcie_msi_irq_handler,
+					 pp);
+	dra7xx->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+						   &intx_domain_ops, pp);
+	of_node_put(pcie_intc_node);
+	if (!dra7xx->irq_domain) {
+		dev_err(dev, "Failed to get a INTx IRQ domain\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void dra7xx_pcie_setup_msi_msg(struct irq_data *d, struct msi_msg *msg)
+{
+	struct pcie_port *pp = irq_data_get_irq_chip_data(d);
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	u64 msi_target;
+
+	msi_target = (u64)pp->msi_data;
+
+	msg->address_lo = lower_32_bits(msi_target);
+	msg->address_hi = upper_32_bits(msi_target);
+
+	msg->data = d->hwirq;
+
+	dev_dbg(pci->dev, "msi#%d address_hi %#x address_lo %#x\n",
+		(int)d->hwirq, msg->address_hi, msg->address_lo);
+}
+
+static int dra7xx_pcie_msi_set_affinity(struct irq_data *d,
+					const struct cpumask *mask,
+					bool force)
+{
+	return -EINVAL;
+}
+
+static void dra7xx_pcie_bottom_mask(struct irq_data *d)
+{
+	struct pcie_port *pp = irq_data_get_irq_chip_data(d);
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	unsigned int res, bit, ctrl;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&pp->lock, flags);
+
+	ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
+	res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
+	bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
+
+	pp->irq_mask[ctrl] |= BIT(bit);
+	dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res,
+			   pp->irq_mask[ctrl]);
+
+	raw_spin_unlock_irqrestore(&pp->lock, flags);
+}
+
+static void dra7xx_pcie_bottom_unmask(struct irq_data *d)
+{
+	struct pcie_port *pp = irq_data_get_irq_chip_data(d);
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	unsigned int res, bit, ctrl;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&pp->lock, flags);
+
+	ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
+	res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
+	bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
+
+	pp->irq_mask[ctrl] &= ~BIT(bit);
+	dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res,
+			   pp->irq_mask[ctrl]);
+
+	raw_spin_unlock_irqrestore(&pp->lock, flags);
+}
+
+static void dra7xx_pcie_bottom_ack(struct irq_data *d)
+{
+	struct pcie_port *pp  = irq_data_get_irq_chip_data(d);
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	unsigned int res, bit, ctrl;
+
+	ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
+	res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
+	bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
+
+	dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_STATUS + res, BIT(bit));
+}
+
+static struct irq_chip dra7xx_pci_msi_bottom_irq_chip = {
+	.name = "DRA7XX-PCI-MSI",
+	.irq_ack = dra7xx_pcie_bottom_ack,
+	.irq_compose_msi_msg = dra7xx_pcie_setup_msi_msg,
+	.irq_set_affinity = dra7xx_pcie_msi_set_affinity,
+	.irq_mask = dra7xx_pcie_bottom_mask,
+	.irq_unmask = dra7xx_pcie_bottom_unmask,
+};
+
+static int dra7xx_pcie_msi_host_init(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	u32 ctrl, num_ctrls;
+
+	pp->msi_irq_chip = &dra7xx_pci_msi_bottom_irq_chip;
+
+	num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL;
+	/* Initialize IRQ Status array */
+	for (ctrl = 0; ctrl < num_ctrls; ctrl++) {
+		pp->irq_mask[ctrl] = ~0;
+		dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK +
+				    (ctrl * MSI_REG_CTRL_BLOCK_SIZE),
+				    pp->irq_mask[ctrl]);
+		dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_ENABLE +
+				    (ctrl * MSI_REG_CTRL_BLOCK_SIZE),
+				    ~0);
+	}
+
+	return dw_pcie_allocate_domains(pp);
+}
+
+static const struct dw_pcie_host_ops dra7xx_pcie_host_ops = {
+	.host_init = dra7xx_pcie_host_init,
+	.msi_host_init = dra7xx_pcie_msi_host_init,
+};
+
 static void dra7xx_pcie_ep_init(struct dw_pcie_ep *ep)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
@@ -467,14 +634,6 @@  static int __init dra7xx_add_pcie_port(struct dra7xx_pcie *dra7xx,
 		return pp->irq;
 	}
 
-	ret = devm_request_irq(dev, pp->irq, dra7xx_pcie_msi_irq_handler,
-			       IRQF_SHARED | IRQF_NO_THREAD,
-			       "dra7-pcie-msi",	dra7xx);
-	if (ret) {
-		dev_err(dev, "failed to request irq\n");
-		return ret;
-	}
-
 	ret = dra7xx_pcie_init_irq_domain(pp);
 	if (ret < 0)
 		return ret;