diff mbox series

[10/11] PCI: mvebu: Implement support for legacy INTx interrupts

Message ID 20220105150239.9628-11-pali@kernel.org (mailing list archive)
State Superseded
Delegated to: Lorenzo Pieralisi
Headers show
Series PCI: mvebu: subsystem ids, AER and INTx | expand

Commit Message

Pali Rohár Jan. 5, 2022, 3:02 p.m. UTC
This adds support for legacy INTx interrupts received from other PCIe
devices and which are reported by a new INTx irq chip.

With this change, kernel can distinguish between INTA, INTB, INTC and INTD
interrupts.

Note that for this support, device tree files has to be properly adjusted
to provide "interrupts" or "interrupts-extended" property with intx
interrupt source, "interrupt-names" property with "intx" string and also
'interrupt-controller' subnode must be defined.

If device tree files do not provide these nodes then driver would work as
before.

Signed-off-by: Pali Rohár <pali@kernel.org>
---
 drivers/pci/controller/pci-mvebu.c | 182 +++++++++++++++++++++++++++--
 1 file changed, 174 insertions(+), 8 deletions(-)

Comments

Marc Zyngier Jan. 6, 2022, 3:28 p.m. UTC | #1
On Wed, 05 Jan 2022 15:02:38 +0000,
Pali Rohár <pali@kernel.org> wrote:
> 
> This adds support for legacy INTx interrupts received from other PCIe
> devices and which are reported by a new INTx irq chip.
> 
> With this change, kernel can distinguish between INTA, INTB, INTC and INTD
> interrupts.
> 
> Note that for this support, device tree files has to be properly adjusted
> to provide "interrupts" or "interrupts-extended" property with intx
> interrupt source, "interrupt-names" property with "intx" string and also
> 'interrupt-controller' subnode must be defined.
> 
> If device tree files do not provide these nodes then driver would work as
> before.
> 
> Signed-off-by: Pali Rohár <pali@kernel.org>
> ---
>  drivers/pci/controller/pci-mvebu.c | 182 +++++++++++++++++++++++++++--
>  1 file changed, 174 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
> index 1e90ab888075..04bcdd7b7a6d 100644
> --- a/drivers/pci/controller/pci-mvebu.c
> +++ b/drivers/pci/controller/pci-mvebu.c
> @@ -54,9 +54,10 @@
>  	 PCIE_CONF_ADDR_EN)
>  #define PCIE_CONF_DATA_OFF	0x18fc
>  #define PCIE_INT_CAUSE_OFF	0x1900
> +#define PCIE_INT_UNMASK_OFF	0x1910
> +#define  PCIE_INT_INTX(i)		BIT(24+i)
>  #define  PCIE_INT_PM_PME		BIT(28)
> -#define PCIE_MASK_OFF		0x1910
> -#define  PCIE_MASK_ENABLE_INTS          0x0f000000
> +#define  PCIE_INT_ALL_MASK		GENMASK(31, 0)
>  #define PCIE_CTRL_OFF		0x1a00
>  #define  PCIE_CTRL_X1_MODE		0x0001
>  #define  PCIE_CTRL_RC_MODE		BIT(1)
> @@ -110,6 +111,10 @@ struct mvebu_pcie_port {
>  	struct mvebu_pcie_window iowin;
>  	u32 saved_pcie_stat;
>  	struct resource regs;
> +	struct irq_domain *intx_irq_domain;
> +	struct irq_chip intx_irq_chip;

Why is this structure per port? It really should be global. Printing
the port number in the name isn't enough of a reason.

> +	raw_spinlock_t irq_lock;
> +	int intx_irq;
>  };
>  
>  static inline void mvebu_writel(struct mvebu_pcie_port *port, u32 val, u32 reg)
> @@ -235,7 +240,7 @@ static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
>  
>  static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
>  {
> -	u32 ctrl, lnkcap, cmd, dev_rev, mask;
> +	u32 ctrl, lnkcap, cmd, dev_rev, unmask;
>  
>  	/* Setup PCIe controller to Root Complex mode. */
>  	ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
> @@ -288,10 +293,30 @@ static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
>  	/* Point PCIe unit MBUS decode windows to DRAM space. */
>  	mvebu_pcie_setup_wins(port);
>  
> -	/* Enable interrupt lines A-D. */
> -	mask = mvebu_readl(port, PCIE_MASK_OFF);
> -	mask |= PCIE_MASK_ENABLE_INTS;
> -	mvebu_writel(port, mask, PCIE_MASK_OFF);
> +	/* Mask all interrupt sources. */
> +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
> +
> +	/* Clear all interrupt causes. */
> +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
> +
> +	if (port->intx_irq <= 0) {
> +		/*
> +		 * When neither "summary" interrupt, nor "intx" interrupt was
> +		 * specified in DT then unmask all legacy INTx interrupts as in
> +		 * this case driver does not provide a way for masking and
> +		 * unmasking of individual legacy INTx interrupts. In this case
> +		 * all interrupts, including legacy INTx are reported via one
> +		 * shared GIC source and therefore kernel cannot distinguish
> +		 * which individual legacy INTx was triggered. These interrupts
> +		 * are shared, so it should not cause any issue. Just
> +		 * performance penalty as every PCIe interrupt handler needs to
> +		 * be called when some interrupt is triggered.
> +		 */
> +		unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> +		unmask |= PCIE_INT_INTX(0) | PCIE_INT_INTX(1) |
> +			  PCIE_INT_INTX(2) | PCIE_INT_INTX(3);
> +		mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);

Maybe worth printing a warning here, so that the user knows they are
on thin ice.

> +	}
>  }
>  
>  static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
> @@ -924,6 +949,109 @@ static struct pci_ops mvebu_pcie_ops = {
>  	.write = mvebu_pcie_wr_conf,
>  };
>  
> +static void mvebu_pcie_intx_irq_mask(struct irq_data *d)
> +{
> +	struct mvebu_pcie_port *port = d->domain->host_data;
> +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> +	unsigned long flags;
> +	u32 unmask;
> +
> +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> +	unmask &= ~PCIE_INT_INTX(hwirq);
> +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> +}
> +
> +static void mvebu_pcie_intx_irq_unmask(struct irq_data *d)
> +{
> +	struct mvebu_pcie_port *port = d->domain->host_data;
> +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> +	unsigned long flags;
> +	u32 unmask;
> +
> +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> +	unmask |= PCIE_INT_INTX(hwirq);
> +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> +}
> +
> +static int mvebu_pcie_intx_irq_map(struct irq_domain *h,
> +				   unsigned int virq, irq_hw_number_t hwirq)
> +{
> +	struct mvebu_pcie_port *port = h->host_data;
> +
> +	irq_set_status_flags(virq, IRQ_LEVEL);
> +	irq_set_chip_and_handler(virq, &port->intx_irq_chip, handle_level_irq);
> +	irq_set_chip_data(virq, port);
> +
> +	return 0;
> +}
> +
> +static const struct irq_domain_ops mvebu_pcie_intx_irq_domain_ops = {
> +	.map = mvebu_pcie_intx_irq_map,
> +	.xlate = irq_domain_xlate_onecell,
> +};
> +
> +static int mvebu_pcie_init_irq_domain(struct mvebu_pcie_port *port)
> +{
> +	struct device *dev = &port->pcie->pdev->dev;
> +	struct device_node *pcie_intc_node;
> +
> +	raw_spin_lock_init(&port->irq_lock);
> +
> +	port->intx_irq_chip.name = devm_kasprintf(dev, GFP_KERNEL,
> +						  "mvebu-%s-INTx",
> +						  port->name);

That's exactly what I really don't want to see. It prevents sharing of
the irq_chip structure, and gets in the way of making it const in the
future. Yes, I know that some drivers do that. I can't fix those,
because /proc/interrupts is ABI. But I really don't want to see more
of these.

/sys/kernel/debug/irqs already has all the information you need, as it
will happily give you the domain name and the interrupt topology.

> +	port->intx_irq_chip.irq_mask = mvebu_pcie_intx_irq_mask;
> +	port->intx_irq_chip.irq_unmask = mvebu_pcie_intx_irq_unmask;
> +
> +	pcie_intc_node = of_get_next_child(port->dn, NULL);
> +	if (!pcie_intc_node) {
> +		dev_err(dev, "No PCIe Intc node found for %s\n", port->name);
> +		return -ENODEV;
> +	}
> +
> +	port->intx_irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
> +						      &mvebu_pcie_intx_irq_domain_ops,
> +						      port);
> +	of_node_put(pcie_intc_node);
> +	if (!port->intx_irq_domain) {
> +		devm_kfree(dev, port->intx_irq_chip.name);
> +		dev_err(dev, "Failed to get INTx IRQ domain for %s\n", port->name);
> +		return -ENOMEM;
> +	}
> +
> +	return 0;
> +}
> +
> +static void mvebu_pcie_irq_handler(struct irq_desc *desc)
> +{
> +	struct mvebu_pcie_port *port = irq_desc_get_handler_data(desc);
> +	struct irq_chip *chip = irq_desc_get_chip(desc);
> +	struct device *dev = &port->pcie->pdev->dev;
> +	u32 cause, unmask, status;
> +	int i;
> +
> +	chained_irq_enter(chip, desc);
> +
> +	cause = mvebu_readl(port, PCIE_INT_CAUSE_OFF);
> +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);

Why do you need to read this? If the CAUSE register also returns the
masked interrupts that are pending, it may be worth keeping a shadow
copy of the this register, as you end-up having an extra MMIO read on
each and every interrupt, which can't be great for performance.

> +	status = cause & unmask;
> +
> +	/* Process legacy INTx interrupts */
> +	for (i = 0; i < PCI_NUM_INTX; i++) {
> +		if (!(status & PCIE_INT_INTX(i)))
> +			continue;
> +
> +		if (generic_handle_domain_irq(port->intx_irq_domain, i) == -EINVAL)
> +			dev_err_ratelimited(dev, "unexpected INT%c IRQ\n", (char)i+'A');
> +	}
> +
> +	chained_irq_exit(chip, desc);
> +}
> +
>  static int mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
>  {
>  	/* Interrupt support on mvebu emulated bridges is not implemented yet */
> @@ -1121,6 +1249,16 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie,
>  		port->io_attr = -1;
>  	}
>  
> +	/*
> +	 * Old DT bindings do not contain "intx" interrupt
> +	 * so do not fail probing driver when interrupt does not exist.
> +	 */
> +	port->intx_irq = of_irq_get_byname(child, "intx");
> +	if (port->intx_irq == -EPROBE_DEFER) {
> +		ret = port->intx_irq;
> +		goto err;
> +	}
> +
>  	reset_gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, &flags);
>  	if (reset_gpio == -EPROBE_DEFER) {
>  		ret = reset_gpio;
> @@ -1317,6 +1455,7 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
>  
>  	for (i = 0; i < pcie->nports; i++) {
>  		struct mvebu_pcie_port *port = &pcie->ports[i];
> +		int irq = port->intx_irq;
>  
>  		child = port->dn;
>  		if (!child)
> @@ -1344,6 +1483,22 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
>  			continue;
>  		}
>  
> +		if (irq > 0) {
> +			ret = mvebu_pcie_init_irq_domain(port);
> +			if (ret) {
> +				dev_err(dev, "%s: cannot init irq domain\n",
> +					port->name);
> +				pci_bridge_emul_cleanup(&port->bridge);
> +				devm_iounmap(dev, port->base);
> +				port->base = NULL;
> +				mvebu_pcie_powerdown(port);
> +				continue;
> +			}
> +			irq_set_chained_handler_and_data(irq,
> +							 mvebu_pcie_irq_handler,
> +							 port);
> +		}
> +
>  		/*
>  		 * PCIe topology exported by mvebu hw is quite complicated. In
>  		 * reality has something like N fully independent host bridges
> @@ -1448,6 +1603,7 @@ static int mvebu_pcie_remove(struct platform_device *pdev)
>  
>  	for (i = 0; i < pcie->nports; i++) {
>  		struct mvebu_pcie_port *port = &pcie->ports[i];
> +		int irq = port->intx_irq;
>  
>  		if (!port->base)
>  			continue;
> @@ -1458,7 +1614,17 @@ static int mvebu_pcie_remove(struct platform_device *pdev)
>  		mvebu_writel(port, cmd, PCIE_CMD_OFF);
>  
>  		/* Mask all interrupt sources. */
> -		mvebu_writel(port, 0, PCIE_MASK_OFF);
> +		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
> +
> +		/* Clear all interrupt causes. */
> +		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
> +
> +		/* Remove IRQ domains. */
> +		if (port->intx_irq_domain)
> +			irq_domain_remove(port->intx_irq_domain);
> +
> +		if (irq > 0)
> +			irq_set_chained_handler_and_data(irq, NULL, NULL);
>  
>  		/* Free config space for emulated root bridge. */
>  		pci_bridge_emul_cleanup(&port->bridge);

Thanks,

	M.
Pali Rohár Jan. 6, 2022, 3:44 p.m. UTC | #2
On Thursday 06 January 2022 15:28:20 Marc Zyngier wrote:
> On Wed, 05 Jan 2022 15:02:38 +0000,
> Pali Rohár <pali@kernel.org> wrote:
> > 
> > This adds support for legacy INTx interrupts received from other PCIe
> > devices and which are reported by a new INTx irq chip.
> > 
> > With this change, kernel can distinguish between INTA, INTB, INTC and INTD
> > interrupts.
> > 
> > Note that for this support, device tree files has to be properly adjusted
> > to provide "interrupts" or "interrupts-extended" property with intx
> > interrupt source, "interrupt-names" property with "intx" string and also
> > 'interrupt-controller' subnode must be defined.
> > 
> > If device tree files do not provide these nodes then driver would work as
> > before.
> > 
> > Signed-off-by: Pali Rohár <pali@kernel.org>
> > ---
> >  drivers/pci/controller/pci-mvebu.c | 182 +++++++++++++++++++++++++++--
> >  1 file changed, 174 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
> > index 1e90ab888075..04bcdd7b7a6d 100644
> > --- a/drivers/pci/controller/pci-mvebu.c
> > +++ b/drivers/pci/controller/pci-mvebu.c
> > @@ -54,9 +54,10 @@
> >  	 PCIE_CONF_ADDR_EN)
> >  #define PCIE_CONF_DATA_OFF	0x18fc
> >  #define PCIE_INT_CAUSE_OFF	0x1900
> > +#define PCIE_INT_UNMASK_OFF	0x1910
> > +#define  PCIE_INT_INTX(i)		BIT(24+i)
> >  #define  PCIE_INT_PM_PME		BIT(28)
> > -#define PCIE_MASK_OFF		0x1910
> > -#define  PCIE_MASK_ENABLE_INTS          0x0f000000
> > +#define  PCIE_INT_ALL_MASK		GENMASK(31, 0)
> >  #define PCIE_CTRL_OFF		0x1a00
> >  #define  PCIE_CTRL_X1_MODE		0x0001
> >  #define  PCIE_CTRL_RC_MODE		BIT(1)
> > @@ -110,6 +111,10 @@ struct mvebu_pcie_port {
> >  	struct mvebu_pcie_window iowin;
> >  	u32 saved_pcie_stat;
> >  	struct resource regs;
> > +	struct irq_domain *intx_irq_domain;
> > +	struct irq_chip intx_irq_chip;
> 
> Why is this structure per port? It really should be global. Printing
> the port number in the name isn't enough of a reason.

Because each port has its own independent set of INTA-INTD interrupts.

> > +	raw_spinlock_t irq_lock;
> > +	int intx_irq;
> >  };
> >  
> >  static inline void mvebu_writel(struct mvebu_pcie_port *port, u32 val, u32 reg)
> > @@ -235,7 +240,7 @@ static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
> >  
> >  static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
> >  {
> > -	u32 ctrl, lnkcap, cmd, dev_rev, mask;
> > +	u32 ctrl, lnkcap, cmd, dev_rev, unmask;
> >  
> >  	/* Setup PCIe controller to Root Complex mode. */
> >  	ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
> > @@ -288,10 +293,30 @@ static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
> >  	/* Point PCIe unit MBUS decode windows to DRAM space. */
> >  	mvebu_pcie_setup_wins(port);
> >  
> > -	/* Enable interrupt lines A-D. */
> > -	mask = mvebu_readl(port, PCIE_MASK_OFF);
> > -	mask |= PCIE_MASK_ENABLE_INTS;
> > -	mvebu_writel(port, mask, PCIE_MASK_OFF);
> > +	/* Mask all interrupt sources. */
> > +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
> > +
> > +	/* Clear all interrupt causes. */
> > +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
> > +
> > +	if (port->intx_irq <= 0) {
> > +		/*
> > +		 * When neither "summary" interrupt, nor "intx" interrupt was
> > +		 * specified in DT then unmask all legacy INTx interrupts as in
> > +		 * this case driver does not provide a way for masking and
> > +		 * unmasking of individual legacy INTx interrupts. In this case
> > +		 * all interrupts, including legacy INTx are reported via one
> > +		 * shared GIC source and therefore kernel cannot distinguish
> > +		 * which individual legacy INTx was triggered. These interrupts
> > +		 * are shared, so it should not cause any issue. Just
> > +		 * performance penalty as every PCIe interrupt handler needs to
> > +		 * be called when some interrupt is triggered.
> > +		 */
> > +		unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > +		unmask |= PCIE_INT_INTX(0) | PCIE_INT_INTX(1) |
> > +			  PCIE_INT_INTX(2) | PCIE_INT_INTX(3);
> > +		mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> 
> Maybe worth printing a warning here, so that the user knows they are
> on thin ice.

Ok. I can add it here. Anyway, this is default current state without
this patch.

> > +	}
> >  }
> >  
> >  static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
> > @@ -924,6 +949,109 @@ static struct pci_ops mvebu_pcie_ops = {
> >  	.write = mvebu_pcie_wr_conf,
> >  };
> >  
> > +static void mvebu_pcie_intx_irq_mask(struct irq_data *d)
> > +{
> > +	struct mvebu_pcie_port *port = d->domain->host_data;
> > +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> > +	unsigned long flags;
> > +	u32 unmask;
> > +
> > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > +	unmask &= ~PCIE_INT_INTX(hwirq);
> > +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > +}
> > +
> > +static void mvebu_pcie_intx_irq_unmask(struct irq_data *d)
> > +{
> > +	struct mvebu_pcie_port *port = d->domain->host_data;
> > +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> > +	unsigned long flags;
> > +	u32 unmask;
> > +
> > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > +	unmask |= PCIE_INT_INTX(hwirq);
> > +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > +}
> > +
> > +static int mvebu_pcie_intx_irq_map(struct irq_domain *h,
> > +				   unsigned int virq, irq_hw_number_t hwirq)
> > +{
> > +	struct mvebu_pcie_port *port = h->host_data;
> > +
> > +	irq_set_status_flags(virq, IRQ_LEVEL);
> > +	irq_set_chip_and_handler(virq, &port->intx_irq_chip, handle_level_irq);
> > +	irq_set_chip_data(virq, port);
> > +
> > +	return 0;
> > +}
> > +
> > +static const struct irq_domain_ops mvebu_pcie_intx_irq_domain_ops = {
> > +	.map = mvebu_pcie_intx_irq_map,
> > +	.xlate = irq_domain_xlate_onecell,
> > +};
> > +
> > +static int mvebu_pcie_init_irq_domain(struct mvebu_pcie_port *port)
> > +{
> > +	struct device *dev = &port->pcie->pdev->dev;
> > +	struct device_node *pcie_intc_node;
> > +
> > +	raw_spin_lock_init(&port->irq_lock);
> > +
> > +	port->intx_irq_chip.name = devm_kasprintf(dev, GFP_KERNEL,
> > +						  "mvebu-%s-INTx",
> > +						  port->name);
> 
> That's exactly what I really don't want to see. It prevents sharing of
> the irq_chip structure, and gets in the way of making it const in the
> future. Yes, I know that some drivers do that. I can't fix those,
> because /proc/interrupts is ABI. But I really don't want to see more
> of these.

Well, I do not understand why it should be shared and with who. HW has N
independent IRQ chips for legacy interrupts. And each one will be
specified in DT per HW layout / design.

> /sys/kernel/debug/irqs already has all the information you need, as it
> will happily give you the domain name and the interrupt topology.
> 
> > +	port->intx_irq_chip.irq_mask = mvebu_pcie_intx_irq_mask;
> > +	port->intx_irq_chip.irq_unmask = mvebu_pcie_intx_irq_unmask;
> > +
> > +	pcie_intc_node = of_get_next_child(port->dn, NULL);
> > +	if (!pcie_intc_node) {
> > +		dev_err(dev, "No PCIe Intc node found for %s\n", port->name);
> > +		return -ENODEV;
> > +	}
> > +
> > +	port->intx_irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
> > +						      &mvebu_pcie_intx_irq_domain_ops,
> > +						      port);
> > +	of_node_put(pcie_intc_node);
> > +	if (!port->intx_irq_domain) {
> > +		devm_kfree(dev, port->intx_irq_chip.name);
> > +		dev_err(dev, "Failed to get INTx IRQ domain for %s\n", port->name);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void mvebu_pcie_irq_handler(struct irq_desc *desc)
> > +{
> > +	struct mvebu_pcie_port *port = irq_desc_get_handler_data(desc);
> > +	struct irq_chip *chip = irq_desc_get_chip(desc);
> > +	struct device *dev = &port->pcie->pdev->dev;
> > +	u32 cause, unmask, status;
> > +	int i;
> > +
> > +	chained_irq_enter(chip, desc);
> > +
> > +	cause = mvebu_readl(port, PCIE_INT_CAUSE_OFF);
> > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> 
> Why do you need to read this? If the CAUSE register also returns the
> masked interrupts that are pending,

Yes, it returns all interrupts, even those which are masked for which is
not system interested.

> it may be worth keeping a shadow
> copy of the this register, as you end-up having an extra MMIO read on
> each and every interrupt, which can't be great for performance.

Ok. I can set shadow copy of unmask register into port structure. But it
has really performance impact when reading directly unmask register from
hw and when reading it from port shadow copy structure?

> > +	status = cause & unmask;
> > +
> > +	/* Process legacy INTx interrupts */
> > +	for (i = 0; i < PCI_NUM_INTX; i++) {
> > +		if (!(status & PCIE_INT_INTX(i)))
> > +			continue;
> > +
> > +		if (generic_handle_domain_irq(port->intx_irq_domain, i) == -EINVAL)
> > +			dev_err_ratelimited(dev, "unexpected INT%c IRQ\n", (char)i+'A');
> > +	}
> > +
> > +	chained_irq_exit(chip, desc);
> > +}
> > +
> >  static int mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
> >  {
> >  	/* Interrupt support on mvebu emulated bridges is not implemented yet */
> > @@ -1121,6 +1249,16 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie,
> >  		port->io_attr = -1;
> >  	}
> >  
> > +	/*
> > +	 * Old DT bindings do not contain "intx" interrupt
> > +	 * so do not fail probing driver when interrupt does not exist.
> > +	 */
> > +	port->intx_irq = of_irq_get_byname(child, "intx");
> > +	if (port->intx_irq == -EPROBE_DEFER) {
> > +		ret = port->intx_irq;
> > +		goto err;
> > +	}
> > +
> >  	reset_gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, &flags);
> >  	if (reset_gpio == -EPROBE_DEFER) {
> >  		ret = reset_gpio;
> > @@ -1317,6 +1455,7 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
> >  
> >  	for (i = 0; i < pcie->nports; i++) {
> >  		struct mvebu_pcie_port *port = &pcie->ports[i];
> > +		int irq = port->intx_irq;
> >  
> >  		child = port->dn;
> >  		if (!child)
> > @@ -1344,6 +1483,22 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
> >  			continue;
> >  		}
> >  
> > +		if (irq > 0) {
> > +			ret = mvebu_pcie_init_irq_domain(port);
> > +			if (ret) {
> > +				dev_err(dev, "%s: cannot init irq domain\n",
> > +					port->name);
> > +				pci_bridge_emul_cleanup(&port->bridge);
> > +				devm_iounmap(dev, port->base);
> > +				port->base = NULL;
> > +				mvebu_pcie_powerdown(port);
> > +				continue;
> > +			}
> > +			irq_set_chained_handler_and_data(irq,
> > +							 mvebu_pcie_irq_handler,
> > +							 port);
> > +		}
> > +
> >  		/*
> >  		 * PCIe topology exported by mvebu hw is quite complicated. In
> >  		 * reality has something like N fully independent host bridges
> > @@ -1448,6 +1603,7 @@ static int mvebu_pcie_remove(struct platform_device *pdev)
> >  
> >  	for (i = 0; i < pcie->nports; i++) {
> >  		struct mvebu_pcie_port *port = &pcie->ports[i];
> > +		int irq = port->intx_irq;
> >  
> >  		if (!port->base)
> >  			continue;
> > @@ -1458,7 +1614,17 @@ static int mvebu_pcie_remove(struct platform_device *pdev)
> >  		mvebu_writel(port, cmd, PCIE_CMD_OFF);
> >  
> >  		/* Mask all interrupt sources. */
> > -		mvebu_writel(port, 0, PCIE_MASK_OFF);
> > +		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
> > +
> > +		/* Clear all interrupt causes. */
> > +		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
> > +
> > +		/* Remove IRQ domains. */
> > +		if (port->intx_irq_domain)
> > +			irq_domain_remove(port->intx_irq_domain);
> > +
> > +		if (irq > 0)
> > +			irq_set_chained_handler_and_data(irq, NULL, NULL);
> >  
> >  		/* Free config space for emulated root bridge. */
> >  		pci_bridge_emul_cleanup(&port->bridge);
> 
> Thanks,
> 
> 	M.
> 
> -- 
> Without deviation from the norm, progress is not possible.
Marc Zyngier Jan. 6, 2022, 3:55 p.m. UTC | #3
On Thu, 06 Jan 2022 15:44:47 +0000,
Pali Rohár <pali@kernel.org> wrote:
> 
> On Thursday 06 January 2022 15:28:20 Marc Zyngier wrote:
> > On Wed, 05 Jan 2022 15:02:38 +0000,
> > Pali Rohár <pali@kernel.org> wrote:
> > > 
> > > This adds support for legacy INTx interrupts received from other PCIe
> > > devices and which are reported by a new INTx irq chip.
> > > 
> > > With this change, kernel can distinguish between INTA, INTB, INTC and INTD
> > > interrupts.
> > > 
> > > Note that for this support, device tree files has to be properly adjusted
> > > to provide "interrupts" or "interrupts-extended" property with intx
> > > interrupt source, "interrupt-names" property with "intx" string and also
> > > 'interrupt-controller' subnode must be defined.
> > > 
> > > If device tree files do not provide these nodes then driver would work as
> > > before.
> > > 
> > > Signed-off-by: Pali Rohár <pali@kernel.org>
> > > ---
> > >  drivers/pci/controller/pci-mvebu.c | 182 +++++++++++++++++++++++++++--
> > >  1 file changed, 174 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
> > > index 1e90ab888075..04bcdd7b7a6d 100644
> > > --- a/drivers/pci/controller/pci-mvebu.c
> > > +++ b/drivers/pci/controller/pci-mvebu.c
> > > @@ -54,9 +54,10 @@
> > >  	 PCIE_CONF_ADDR_EN)
> > >  #define PCIE_CONF_DATA_OFF	0x18fc
> > >  #define PCIE_INT_CAUSE_OFF	0x1900
> > > +#define PCIE_INT_UNMASK_OFF	0x1910
> > > +#define  PCIE_INT_INTX(i)		BIT(24+i)
> > >  #define  PCIE_INT_PM_PME		BIT(28)
> > > -#define PCIE_MASK_OFF		0x1910
> > > -#define  PCIE_MASK_ENABLE_INTS          0x0f000000
> > > +#define  PCIE_INT_ALL_MASK		GENMASK(31, 0)
> > >  #define PCIE_CTRL_OFF		0x1a00
> > >  #define  PCIE_CTRL_X1_MODE		0x0001
> > >  #define  PCIE_CTRL_RC_MODE		BIT(1)
> > > @@ -110,6 +111,10 @@ struct mvebu_pcie_port {
> > >  	struct mvebu_pcie_window iowin;
> > >  	u32 saved_pcie_stat;
> > >  	struct resource regs;
> > > +	struct irq_domain *intx_irq_domain;
> > > +	struct irq_chip intx_irq_chip;
> > 
> > Why is this structure per port? It really should be global. Printing
> > the port number in the name isn't enough of a reason.
> 
> Because each port has its own independent set of INTA-INTD
> interrupts.

That doesn't warrant a copy of an irq_chip structure that contains the
exact same callbacks, and only differs by *a string*. And the use of
this string is only to end-up in /proc/interrupts, which is totally
pointless.

> 
> > > +	raw_spinlock_t irq_lock;
> > > +	int intx_irq;
> > >  };
> > >  
> > >  static inline void mvebu_writel(struct mvebu_pcie_port *port, u32 val, u32 reg)
> > > @@ -235,7 +240,7 @@ static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
> > >  
> > >  static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
> > >  {
> > > -	u32 ctrl, lnkcap, cmd, dev_rev, mask;
> > > +	u32 ctrl, lnkcap, cmd, dev_rev, unmask;
> > >  
> > >  	/* Setup PCIe controller to Root Complex mode. */
> > >  	ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
> > > @@ -288,10 +293,30 @@ static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
> > >  	/* Point PCIe unit MBUS decode windows to DRAM space. */
> > >  	mvebu_pcie_setup_wins(port);
> > >  
> > > -	/* Enable interrupt lines A-D. */
> > > -	mask = mvebu_readl(port, PCIE_MASK_OFF);
> > > -	mask |= PCIE_MASK_ENABLE_INTS;
> > > -	mvebu_writel(port, mask, PCIE_MASK_OFF);
> > > +	/* Mask all interrupt sources. */
> > > +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
> > > +
> > > +	/* Clear all interrupt causes. */
> > > +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
> > > +
> > > +	if (port->intx_irq <= 0) {
> > > +		/*
> > > +		 * When neither "summary" interrupt, nor "intx" interrupt was
> > > +		 * specified in DT then unmask all legacy INTx interrupts as in
> > > +		 * this case driver does not provide a way for masking and
> > > +		 * unmasking of individual legacy INTx interrupts. In this case
> > > +		 * all interrupts, including legacy INTx are reported via one
> > > +		 * shared GIC source and therefore kernel cannot distinguish
> > > +		 * which individual legacy INTx was triggered. These interrupts
> > > +		 * are shared, so it should not cause any issue. Just
> > > +		 * performance penalty as every PCIe interrupt handler needs to
> > > +		 * be called when some interrupt is triggered.
> > > +		 */
> > > +		unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > > +		unmask |= PCIE_INT_INTX(0) | PCIE_INT_INTX(1) |
> > > +			  PCIE_INT_INTX(2) | PCIE_INT_INTX(3);
> > > +		mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > 
> > Maybe worth printing a warning here, so that the user knows they are
> > on thin ice.
> 
> Ok. I can add it here. Anyway, this is default current state without
> this patch.
> 
> > > +	}
> > >  }
> > >  
> > >  static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
> > > @@ -924,6 +949,109 @@ static struct pci_ops mvebu_pcie_ops = {
> > >  	.write = mvebu_pcie_wr_conf,
> > >  };
> > >  
> > > +static void mvebu_pcie_intx_irq_mask(struct irq_data *d)
> > > +{
> > > +	struct mvebu_pcie_port *port = d->domain->host_data;
> > > +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> > > +	unsigned long flags;
> > > +	u32 unmask;
> > > +
> > > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > > +	unmask &= ~PCIE_INT_INTX(hwirq);
> > > +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > > +}
> > > +
> > > +static void mvebu_pcie_intx_irq_unmask(struct irq_data *d)
> > > +{
> > > +	struct mvebu_pcie_port *port = d->domain->host_data;
> > > +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> > > +	unsigned long flags;
> > > +	u32 unmask;
> > > +
> > > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > > +	unmask |= PCIE_INT_INTX(hwirq);
> > > +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > > +}
> > > +
> > > +static int mvebu_pcie_intx_irq_map(struct irq_domain *h,
> > > +				   unsigned int virq, irq_hw_number_t hwirq)
> > > +{
> > > +	struct mvebu_pcie_port *port = h->host_data;
> > > +
> > > +	irq_set_status_flags(virq, IRQ_LEVEL);
> > > +	irq_set_chip_and_handler(virq, &port->intx_irq_chip, handle_level_irq);
> > > +	irq_set_chip_data(virq, port);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static const struct irq_domain_ops mvebu_pcie_intx_irq_domain_ops = {
> > > +	.map = mvebu_pcie_intx_irq_map,
> > > +	.xlate = irq_domain_xlate_onecell,
> > > +};
> > > +
> > > +static int mvebu_pcie_init_irq_domain(struct mvebu_pcie_port *port)
> > > +{
> > > +	struct device *dev = &port->pcie->pdev->dev;
> > > +	struct device_node *pcie_intc_node;
> > > +
> > > +	raw_spin_lock_init(&port->irq_lock);
> > > +
> > > +	port->intx_irq_chip.name = devm_kasprintf(dev, GFP_KERNEL,
> > > +						  "mvebu-%s-INTx",
> > > +						  port->name);
> > 
> > That's exactly what I really don't want to see. It prevents sharing of
> > the irq_chip structure, and gets in the way of making it const in the
> > future. Yes, I know that some drivers do that. I can't fix those,
> > because /proc/interrupts is ABI. But I really don't want to see more
> > of these.
> 
> Well, I do not understand why it should be shared and with who. HW has N
> independent IRQ chips for legacy interrupts. And each one will be
> specified in DT per HW layout / design.

If you have multiple ports, all the ports can share the irq_chip
structure. Actually scratch that. They *MUST* share the structure. The
only reason you're not sharing it is to be able to print this useless
string in /proc/interrupts.

	M.
Pali Rohár Jan. 6, 2022, 4:20 p.m. UTC | #4
On Thursday 06 January 2022 15:55:11 Marc Zyngier wrote:
> On Thu, 06 Jan 2022 15:44:47 +0000,
> Pali Rohár <pali@kernel.org> wrote:
> > 
> > On Thursday 06 January 2022 15:28:20 Marc Zyngier wrote:
> > > On Wed, 05 Jan 2022 15:02:38 +0000,
> > > Pali Rohár <pali@kernel.org> wrote:
> > > > 
> > > > This adds support for legacy INTx interrupts received from other PCIe
> > > > devices and which are reported by a new INTx irq chip.
> > > > 
> > > > With this change, kernel can distinguish between INTA, INTB, INTC and INTD
> > > > interrupts.
> > > > 
> > > > Note that for this support, device tree files has to be properly adjusted
> > > > to provide "interrupts" or "interrupts-extended" property with intx
> > > > interrupt source, "interrupt-names" property with "intx" string and also
> > > > 'interrupt-controller' subnode must be defined.
> > > > 
> > > > If device tree files do not provide these nodes then driver would work as
> > > > before.
> > > > 
> > > > Signed-off-by: Pali Rohár <pali@kernel.org>
> > > > ---
> > > >  drivers/pci/controller/pci-mvebu.c | 182 +++++++++++++++++++++++++++--
> > > >  1 file changed, 174 insertions(+), 8 deletions(-)
> > > > 
> > > > diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
> > > > index 1e90ab888075..04bcdd7b7a6d 100644
> > > > --- a/drivers/pci/controller/pci-mvebu.c
> > > > +++ b/drivers/pci/controller/pci-mvebu.c
> > > > @@ -54,9 +54,10 @@
> > > >  	 PCIE_CONF_ADDR_EN)
> > > >  #define PCIE_CONF_DATA_OFF	0x18fc
> > > >  #define PCIE_INT_CAUSE_OFF	0x1900
> > > > +#define PCIE_INT_UNMASK_OFF	0x1910
> > > > +#define  PCIE_INT_INTX(i)		BIT(24+i)
> > > >  #define  PCIE_INT_PM_PME		BIT(28)
> > > > -#define PCIE_MASK_OFF		0x1910
> > > > -#define  PCIE_MASK_ENABLE_INTS          0x0f000000
> > > > +#define  PCIE_INT_ALL_MASK		GENMASK(31, 0)
> > > >  #define PCIE_CTRL_OFF		0x1a00
> > > >  #define  PCIE_CTRL_X1_MODE		0x0001
> > > >  #define  PCIE_CTRL_RC_MODE		BIT(1)
> > > > @@ -110,6 +111,10 @@ struct mvebu_pcie_port {
> > > >  	struct mvebu_pcie_window iowin;
> > > >  	u32 saved_pcie_stat;
> > > >  	struct resource regs;
> > > > +	struct irq_domain *intx_irq_domain;
> > > > +	struct irq_chip intx_irq_chip;
> > > 
> > > Why is this structure per port? It really should be global. Printing
> > > the port number in the name isn't enough of a reason.
> > 
> > Because each port has its own independent set of INTA-INTD
> > interrupts.
> 
> That doesn't warrant a copy of an irq_chip structure that contains the
> exact same callbacks, and only differs by *a string*. And the use of
> this string is only to end-up in /proc/interrupts, which is totally
> pointless.
> 
> > 
> > > > +	raw_spinlock_t irq_lock;
> > > > +	int intx_irq;
> > > >  };
> > > >  
> > > >  static inline void mvebu_writel(struct mvebu_pcie_port *port, u32 val, u32 reg)
> > > > @@ -235,7 +240,7 @@ static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
> > > >  
> > > >  static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
> > > >  {
> > > > -	u32 ctrl, lnkcap, cmd, dev_rev, mask;
> > > > +	u32 ctrl, lnkcap, cmd, dev_rev, unmask;
> > > >  
> > > >  	/* Setup PCIe controller to Root Complex mode. */
> > > >  	ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
> > > > @@ -288,10 +293,30 @@ static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
> > > >  	/* Point PCIe unit MBUS decode windows to DRAM space. */
> > > >  	mvebu_pcie_setup_wins(port);
> > > >  
> > > > -	/* Enable interrupt lines A-D. */
> > > > -	mask = mvebu_readl(port, PCIE_MASK_OFF);
> > > > -	mask |= PCIE_MASK_ENABLE_INTS;
> > > > -	mvebu_writel(port, mask, PCIE_MASK_OFF);
> > > > +	/* Mask all interrupt sources. */
> > > > +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
> > > > +
> > > > +	/* Clear all interrupt causes. */
> > > > +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
> > > > +
> > > > +	if (port->intx_irq <= 0) {
> > > > +		/*
> > > > +		 * When neither "summary" interrupt, nor "intx" interrupt was
> > > > +		 * specified in DT then unmask all legacy INTx interrupts as in
> > > > +		 * this case driver does not provide a way for masking and
> > > > +		 * unmasking of individual legacy INTx interrupts. In this case
> > > > +		 * all interrupts, including legacy INTx are reported via one
> > > > +		 * shared GIC source and therefore kernel cannot distinguish
> > > > +		 * which individual legacy INTx was triggered. These interrupts
> > > > +		 * are shared, so it should not cause any issue. Just
> > > > +		 * performance penalty as every PCIe interrupt handler needs to
> > > > +		 * be called when some interrupt is triggered.
> > > > +		 */
> > > > +		unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > > > +		unmask |= PCIE_INT_INTX(0) | PCIE_INT_INTX(1) |
> > > > +			  PCIE_INT_INTX(2) | PCIE_INT_INTX(3);
> > > > +		mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > > 
> > > Maybe worth printing a warning here, so that the user knows they are
> > > on thin ice.
> > 
> > Ok. I can add it here. Anyway, this is default current state without
> > this patch.
> > 
> > > > +	}
> > > >  }
> > > >  
> > > >  static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
> > > > @@ -924,6 +949,109 @@ static struct pci_ops mvebu_pcie_ops = {
> > > >  	.write = mvebu_pcie_wr_conf,
> > > >  };
> > > >  
> > > > +static void mvebu_pcie_intx_irq_mask(struct irq_data *d)
> > > > +{
> > > > +	struct mvebu_pcie_port *port = d->domain->host_data;
> > > > +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> > > > +	unsigned long flags;
> > > > +	u32 unmask;
> > > > +
> > > > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > > > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > > > +	unmask &= ~PCIE_INT_INTX(hwirq);
> > > > +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > > > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > > > +}
> > > > +
> > > > +static void mvebu_pcie_intx_irq_unmask(struct irq_data *d)
> > > > +{
> > > > +	struct mvebu_pcie_port *port = d->domain->host_data;
> > > > +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> > > > +	unsigned long flags;
> > > > +	u32 unmask;
> > > > +
> > > > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > > > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > > > +	unmask |= PCIE_INT_INTX(hwirq);
> > > > +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > > > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > > > +}
> > > > +
> > > > +static int mvebu_pcie_intx_irq_map(struct irq_domain *h,
> > > > +				   unsigned int virq, irq_hw_number_t hwirq)
> > > > +{
> > > > +	struct mvebu_pcie_port *port = h->host_data;
> > > > +
> > > > +	irq_set_status_flags(virq, IRQ_LEVEL);
> > > > +	irq_set_chip_and_handler(virq, &port->intx_irq_chip, handle_level_irq);
> > > > +	irq_set_chip_data(virq, port);
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +static const struct irq_domain_ops mvebu_pcie_intx_irq_domain_ops = {
> > > > +	.map = mvebu_pcie_intx_irq_map,
> > > > +	.xlate = irq_domain_xlate_onecell,
> > > > +};
> > > > +
> > > > +static int mvebu_pcie_init_irq_domain(struct mvebu_pcie_port *port)
> > > > +{
> > > > +	struct device *dev = &port->pcie->pdev->dev;
> > > > +	struct device_node *pcie_intc_node;
> > > > +
> > > > +	raw_spin_lock_init(&port->irq_lock);
> > > > +
> > > > +	port->intx_irq_chip.name = devm_kasprintf(dev, GFP_KERNEL,
> > > > +						  "mvebu-%s-INTx",
> > > > +						  port->name);
> > > 
> > > That's exactly what I really don't want to see. It prevents sharing of
> > > the irq_chip structure, and gets in the way of making it const in the
> > > future. Yes, I know that some drivers do that. I can't fix those,
> > > because /proc/interrupts is ABI. But I really don't want to see more
> > > of these.
> > 
> > Well, I do not understand why it should be shared and with who. HW has N
> > independent IRQ chips for legacy interrupts. And each one will be
> > specified in DT per HW layout / design.
> 
> If you have multiple ports, all the ports can share the irq_chip
> structure. Actually scratch that. They *MUST* share the structure. The
> only reason you're not sharing it is to be able to print this useless
> string in /proc/interrupts.

What is the point of sharing one irq chip if HW has N independent irq
chips (for legacy interrupts)? I do not catch it yet. And I do not care
here for /proc/interrupts, so also I have not caught what do you mean be
last sentence with "the only reason".

And I still do not see how it could even work to have just one irq chip
and one irq domain as each irq domain needs to know to which port it
belongs, so it can mask/unmask interrupts from correct port. Also
initialization of domain is taking DT node and for each port it is
different.

So I'm somehow confused here...

The improvement in this patch is to be able to mask INTA interrupts on
port 1 and let INTA interrupts unmasked on port 2 if there drivers are
interested only for interrupts from device connected to port 2.

And if all interrupts are going to be shared (again) then it does not
solve any problem.
Marc Zyngier Jan. 6, 2022, 4:27 p.m. UTC | #5
On Thu, 06 Jan 2022 16:20:47 +0000,
Pali Rohár <pali@kernel.org> wrote:
> 
> On Thursday 06 January 2022 15:55:11 Marc Zyngier wrote:
> > On Thu, 06 Jan 2022 15:44:47 +0000,
> > Pali Rohár <pali@kernel.org> wrote:
> > > 
> > > On Thursday 06 January 2022 15:28:20 Marc Zyngier wrote:
> > > > On Wed, 05 Jan 2022 15:02:38 +0000,
> > > > Pali Rohár <pali@kernel.org> wrote:
> > > > > 
> > > > > This adds support for legacy INTx interrupts received from other PCIe
> > > > > devices and which are reported by a new INTx irq chip.
> > > > > 
> > > > > With this change, kernel can distinguish between INTA, INTB, INTC and INTD
> > > > > interrupts.
> > > > > 
> > > > > Note that for this support, device tree files has to be properly adjusted
> > > > > to provide "interrupts" or "interrupts-extended" property with intx
> > > > > interrupt source, "interrupt-names" property with "intx" string and also
> > > > > 'interrupt-controller' subnode must be defined.
> > > > > 
> > > > > If device tree files do not provide these nodes then driver would work as
> > > > > before.
> > > > > 
> > > > > Signed-off-by: Pali Rohár <pali@kernel.org>
> > > > > ---
> > > > >  drivers/pci/controller/pci-mvebu.c | 182 +++++++++++++++++++++++++++--
> > > > >  1 file changed, 174 insertions(+), 8 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
> > > > > index 1e90ab888075..04bcdd7b7a6d 100644
> > > > > --- a/drivers/pci/controller/pci-mvebu.c
> > > > > +++ b/drivers/pci/controller/pci-mvebu.c
> > > > > @@ -54,9 +54,10 @@
> > > > >  	 PCIE_CONF_ADDR_EN)
> > > > >  #define PCIE_CONF_DATA_OFF	0x18fc
> > > > >  #define PCIE_INT_CAUSE_OFF	0x1900
> > > > > +#define PCIE_INT_UNMASK_OFF	0x1910
> > > > > +#define  PCIE_INT_INTX(i)		BIT(24+i)
> > > > >  #define  PCIE_INT_PM_PME		BIT(28)
> > > > > -#define PCIE_MASK_OFF		0x1910
> > > > > -#define  PCIE_MASK_ENABLE_INTS          0x0f000000
> > > > > +#define  PCIE_INT_ALL_MASK		GENMASK(31, 0)
> > > > >  #define PCIE_CTRL_OFF		0x1a00
> > > > >  #define  PCIE_CTRL_X1_MODE		0x0001
> > > > >  #define  PCIE_CTRL_RC_MODE		BIT(1)
> > > > > @@ -110,6 +111,10 @@ struct mvebu_pcie_port {
> > > > >  	struct mvebu_pcie_window iowin;
> > > > >  	u32 saved_pcie_stat;
> > > > >  	struct resource regs;
> > > > > +	struct irq_domain *intx_irq_domain;
> > > > > +	struct irq_chip intx_irq_chip;
> > > > 
> > > > Why is this structure per port? It really should be global. Printing
> > > > the port number in the name isn't enough of a reason.
> > > 
> > > Because each port has its own independent set of INTA-INTD
> > > interrupts.
> > 
> > That doesn't warrant a copy of an irq_chip structure that contains the
> > exact same callbacks, and only differs by *a string*. And the use of
> > this string is only to end-up in /proc/interrupts, which is totally
> > pointless.
> > 
> > > 
> > > > > +	raw_spinlock_t irq_lock;
> > > > > +	int intx_irq;
> > > > >  };
> > > > >  
> > > > >  static inline void mvebu_writel(struct mvebu_pcie_port *port, u32 val, u32 reg)
> > > > > @@ -235,7 +240,7 @@ static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
> > > > >  
> > > > >  static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
> > > > >  {
> > > > > -	u32 ctrl, lnkcap, cmd, dev_rev, mask;
> > > > > +	u32 ctrl, lnkcap, cmd, dev_rev, unmask;
> > > > >  
> > > > >  	/* Setup PCIe controller to Root Complex mode. */
> > > > >  	ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
> > > > > @@ -288,10 +293,30 @@ static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
> > > > >  	/* Point PCIe unit MBUS decode windows to DRAM space. */
> > > > >  	mvebu_pcie_setup_wins(port);
> > > > >  
> > > > > -	/* Enable interrupt lines A-D. */
> > > > > -	mask = mvebu_readl(port, PCIE_MASK_OFF);
> > > > > -	mask |= PCIE_MASK_ENABLE_INTS;
> > > > > -	mvebu_writel(port, mask, PCIE_MASK_OFF);
> > > > > +	/* Mask all interrupt sources. */
> > > > > +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
> > > > > +
> > > > > +	/* Clear all interrupt causes. */
> > > > > +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
> > > > > +
> > > > > +	if (port->intx_irq <= 0) {
> > > > > +		/*
> > > > > +		 * When neither "summary" interrupt, nor "intx" interrupt was
> > > > > +		 * specified in DT then unmask all legacy INTx interrupts as in
> > > > > +		 * this case driver does not provide a way for masking and
> > > > > +		 * unmasking of individual legacy INTx interrupts. In this case
> > > > > +		 * all interrupts, including legacy INTx are reported via one
> > > > > +		 * shared GIC source and therefore kernel cannot distinguish
> > > > > +		 * which individual legacy INTx was triggered. These interrupts
> > > > > +		 * are shared, so it should not cause any issue. Just
> > > > > +		 * performance penalty as every PCIe interrupt handler needs to
> > > > > +		 * be called when some interrupt is triggered.
> > > > > +		 */
> > > > > +		unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > > > > +		unmask |= PCIE_INT_INTX(0) | PCIE_INT_INTX(1) |
> > > > > +			  PCIE_INT_INTX(2) | PCIE_INT_INTX(3);
> > > > > +		mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > > > 
> > > > Maybe worth printing a warning here, so that the user knows they are
> > > > on thin ice.
> > > 
> > > Ok. I can add it here. Anyway, this is default current state without
> > > this patch.
> > > 
> > > > > +	}
> > > > >  }
> > > > >  
> > > > >  static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
> > > > > @@ -924,6 +949,109 @@ static struct pci_ops mvebu_pcie_ops = {
> > > > >  	.write = mvebu_pcie_wr_conf,
> > > > >  };
> > > > >  
> > > > > +static void mvebu_pcie_intx_irq_mask(struct irq_data *d)
> > > > > +{
> > > > > +	struct mvebu_pcie_port *port = d->domain->host_data;
> > > > > +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> > > > > +	unsigned long flags;
> > > > > +	u32 unmask;
> > > > > +
> > > > > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > > > > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > > > > +	unmask &= ~PCIE_INT_INTX(hwirq);
> > > > > +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > > > > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > > > > +}
> > > > > +
> > > > > +static void mvebu_pcie_intx_irq_unmask(struct irq_data *d)
> > > > > +{
> > > > > +	struct mvebu_pcie_port *port = d->domain->host_data;
> > > > > +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> > > > > +	unsigned long flags;
> > > > > +	u32 unmask;
> > > > > +
> > > > > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > > > > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > > > > +	unmask |= PCIE_INT_INTX(hwirq);
> > > > > +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > > > > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > > > > +}
> > > > > +
> > > > > +static int mvebu_pcie_intx_irq_map(struct irq_domain *h,
> > > > > +				   unsigned int virq, irq_hw_number_t hwirq)
> > > > > +{
> > > > > +	struct mvebu_pcie_port *port = h->host_data;
> > > > > +
> > > > > +	irq_set_status_flags(virq, IRQ_LEVEL);
> > > > > +	irq_set_chip_and_handler(virq, &port->intx_irq_chip, handle_level_irq);
> > > > > +	irq_set_chip_data(virq, port);
> > > > > +
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > > +static const struct irq_domain_ops mvebu_pcie_intx_irq_domain_ops = {
> > > > > +	.map = mvebu_pcie_intx_irq_map,
> > > > > +	.xlate = irq_domain_xlate_onecell,
> > > > > +};
> > > > > +
> > > > > +static int mvebu_pcie_init_irq_domain(struct mvebu_pcie_port *port)
> > > > > +{
> > > > > +	struct device *dev = &port->pcie->pdev->dev;
> > > > > +	struct device_node *pcie_intc_node;
> > > > > +
> > > > > +	raw_spin_lock_init(&port->irq_lock);
> > > > > +
> > > > > +	port->intx_irq_chip.name = devm_kasprintf(dev, GFP_KERNEL,
> > > > > +						  "mvebu-%s-INTx",
> > > > > +						  port->name);
> > > > 
> > > > That's exactly what I really don't want to see. It prevents sharing of
> > > > the irq_chip structure, and gets in the way of making it const in the
> > > > future. Yes, I know that some drivers do that. I can't fix those,
> > > > because /proc/interrupts is ABI. But I really don't want to see more
> > > > of these.
> > > 
> > > Well, I do not understand why it should be shared and with who. HW has N
> > > independent IRQ chips for legacy interrupts. And each one will be
> > > specified in DT per HW layout / design.
> > 
> > If you have multiple ports, all the ports can share the irq_chip
> > structure. Actually scratch that. They *MUST* share the structure. The
> > only reason you're not sharing it is to be able to print this useless
> > string in /proc/interrupts.
> 
> What is the point of sharing one irq chip if HW has N independent irq
> chips (for legacy interrupts)? I do not catch it yet. And I do not care
> here for /proc/interrupts, so also I have not caught what do you mean be
> last sentence with "the only reason".
> 
> And I still do not see how it could even work to have just one irq chip
> and one irq domain as each irq domain needs to know to which port it
> belongs, so it can mask/unmask interrupts from correct port. Also
> initialization of domain is taking DT node and for each port it is
> different.
> 
> So I'm somehow confused here...
> 
> The improvement in this patch is to be able to mask INTA interrupts on
> port 1 and let INTA interrupts unmasked on port 2 if there drivers are
> interested only for interrupts from device connected to port 2.
> 
> And if all interrupts are going to be shared (again) then it does not
> solve any problem.

You are completely missing my point. I'm talking about data
structures, you're talking about interrupts. You have this:

struct mvebu_pcie_port {
       // Tons of stuff
       struct irq_chip intx_chip;
};

What I want you to do is:

struct mvebu_pcie_port {
       // Tons of stuff
};

static struct irq_chip intx_chip = {
	.name		= "INTx",
	.irq_mask	= mvebu_pcie_intx_irq_mask,
	.irq_unmask	= mvebu_pcie_intx_irq_unmask;
};

That's it. No more, no less.

	M.
Marek Behún Jan. 6, 2022, 5:20 p.m. UTC | #6
On Thu, 06 Jan 2022 16:27:44 +0000
Marc Zyngier <maz@kernel.org> wrote:

> On Thu, 06 Jan 2022 16:20:47 +0000,
> Pali Rohár <pali@kernel.org> wrote:
> > 
> > On Thursday 06 January 2022 15:55:11 Marc Zyngier wrote:  
> > > On Thu, 06 Jan 2022 15:44:47 +0000,
> > > Pali Rohár <pali@kernel.org> wrote:  
> > > > 
> > > > On Thursday 06 January 2022 15:28:20 Marc Zyngier wrote:  
> > > > > On Wed, 05 Jan 2022 15:02:38 +0000,
> > > > > Pali Rohár <pali@kernel.org> wrote:  
> > > > > > 
> > > > > > This adds support for legacy INTx interrupts received from other PCIe
> > > > > > devices and which are reported by a new INTx irq chip.
> > > > > > 
> > > > > > With this change, kernel can distinguish between INTA, INTB, INTC and INTD
> > > > > > interrupts.
> > > > > > 
> > > > > > Note that for this support, device tree files has to be properly adjusted
> > > > > > to provide "interrupts" or "interrupts-extended" property with intx
> > > > > > interrupt source, "interrupt-names" property with "intx" string and also
> > > > > > 'interrupt-controller' subnode must be defined.
> > > > > > 
> > > > > > If device tree files do not provide these nodes then driver would work as
> > > > > > before.
> > > > > > 
> > > > > > Signed-off-by: Pali Rohár <pali@kernel.org>
> > > > > > ---
> > > > > >  drivers/pci/controller/pci-mvebu.c | 182 +++++++++++++++++++++++++++--
> > > > > >  1 file changed, 174 insertions(+), 8 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
> > > > > > index 1e90ab888075..04bcdd7b7a6d 100644
> > > > > > --- a/drivers/pci/controller/pci-mvebu.c
> > > > > > +++ b/drivers/pci/controller/pci-mvebu.c
> > > > > > @@ -54,9 +54,10 @@
> > > > > >  	 PCIE_CONF_ADDR_EN)
> > > > > >  #define PCIE_CONF_DATA_OFF	0x18fc
> > > > > >  #define PCIE_INT_CAUSE_OFF	0x1900
> > > > > > +#define PCIE_INT_UNMASK_OFF	0x1910
> > > > > > +#define  PCIE_INT_INTX(i)		BIT(24+i)
> > > > > >  #define  PCIE_INT_PM_PME		BIT(28)
> > > > > > -#define PCIE_MASK_OFF		0x1910
> > > > > > -#define  PCIE_MASK_ENABLE_INTS          0x0f000000
> > > > > > +#define  PCIE_INT_ALL_MASK		GENMASK(31, 0)
> > > > > >  #define PCIE_CTRL_OFF		0x1a00
> > > > > >  #define  PCIE_CTRL_X1_MODE		0x0001
> > > > > >  #define  PCIE_CTRL_RC_MODE		BIT(1)
> > > > > > @@ -110,6 +111,10 @@ struct mvebu_pcie_port {
> > > > > >  	struct mvebu_pcie_window iowin;
> > > > > >  	u32 saved_pcie_stat;
> > > > > >  	struct resource regs;
> > > > > > +	struct irq_domain *intx_irq_domain;
> > > > > > +	struct irq_chip intx_irq_chip;  
> > > > > 
> > > > > Why is this structure per port? It really should be global. Printing
> > > > > the port number in the name isn't enough of a reason.  
> > > > 
> > > > Because each port has its own independent set of INTA-INTD
> > > > interrupts.  
> > > 
> > > That doesn't warrant a copy of an irq_chip structure that contains the
> > > exact same callbacks, and only differs by *a string*. And the use of
> > > this string is only to end-up in /proc/interrupts, which is totally
> > > pointless.
> > >   
> > > >   
> > > > > > +	raw_spinlock_t irq_lock;
> > > > > > +	int intx_irq;
> > > > > >  };
> > > > > >  
> > > > > >  static inline void mvebu_writel(struct mvebu_pcie_port *port, u32 val, u32 reg)
> > > > > > @@ -235,7 +240,7 @@ static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
> > > > > >  
> > > > > >  static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
> > > > > >  {
> > > > > > -	u32 ctrl, lnkcap, cmd, dev_rev, mask;
> > > > > > +	u32 ctrl, lnkcap, cmd, dev_rev, unmask;
> > > > > >  
> > > > > >  	/* Setup PCIe controller to Root Complex mode. */
> > > > > >  	ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
> > > > > > @@ -288,10 +293,30 @@ static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
> > > > > >  	/* Point PCIe unit MBUS decode windows to DRAM space. */
> > > > > >  	mvebu_pcie_setup_wins(port);
> > > > > >  
> > > > > > -	/* Enable interrupt lines A-D. */
> > > > > > -	mask = mvebu_readl(port, PCIE_MASK_OFF);
> > > > > > -	mask |= PCIE_MASK_ENABLE_INTS;
> > > > > > -	mvebu_writel(port, mask, PCIE_MASK_OFF);
> > > > > > +	/* Mask all interrupt sources. */
> > > > > > +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
> > > > > > +
> > > > > > +	/* Clear all interrupt causes. */
> > > > > > +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
> > > > > > +
> > > > > > +	if (port->intx_irq <= 0) {
> > > > > > +		/*
> > > > > > +		 * When neither "summary" interrupt, nor "intx" interrupt was
> > > > > > +		 * specified in DT then unmask all legacy INTx interrupts as in
> > > > > > +		 * this case driver does not provide a way for masking and
> > > > > > +		 * unmasking of individual legacy INTx interrupts. In this case
> > > > > > +		 * all interrupts, including legacy INTx are reported via one
> > > > > > +		 * shared GIC source and therefore kernel cannot distinguish
> > > > > > +		 * which individual legacy INTx was triggered. These interrupts
> > > > > > +		 * are shared, so it should not cause any issue. Just
> > > > > > +		 * performance penalty as every PCIe interrupt handler needs to
> > > > > > +		 * be called when some interrupt is triggered.
> > > > > > +		 */
> > > > > > +		unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > > > > > +		unmask |= PCIE_INT_INTX(0) | PCIE_INT_INTX(1) |
> > > > > > +			  PCIE_INT_INTX(2) | PCIE_INT_INTX(3);
> > > > > > +		mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);  
> > > > > 
> > > > > Maybe worth printing a warning here, so that the user knows they are
> > > > > on thin ice.  
> > > > 
> > > > Ok. I can add it here. Anyway, this is default current state without
> > > > this patch.
> > > >   
> > > > > > +	}
> > > > > >  }
> > > > > >  
> > > > > >  static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
> > > > > > @@ -924,6 +949,109 @@ static struct pci_ops mvebu_pcie_ops = {
> > > > > >  	.write = mvebu_pcie_wr_conf,
> > > > > >  };
> > > > > >  
> > > > > > +static void mvebu_pcie_intx_irq_mask(struct irq_data *d)
> > > > > > +{
> > > > > > +	struct mvebu_pcie_port *port = d->domain->host_data;
> > > > > > +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> > > > > > +	unsigned long flags;
> > > > > > +	u32 unmask;
> > > > > > +
> > > > > > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > > > > > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > > > > > +	unmask &= ~PCIE_INT_INTX(hwirq);
> > > > > > +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > > > > > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > > > > > +}
> > > > > > +
> > > > > > +static void mvebu_pcie_intx_irq_unmask(struct irq_data *d)
> > > > > > +{
> > > > > > +	struct mvebu_pcie_port *port = d->domain->host_data;
> > > > > > +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> > > > > > +	unsigned long flags;
> > > > > > +	u32 unmask;
> > > > > > +
> > > > > > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > > > > > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > > > > > +	unmask |= PCIE_INT_INTX(hwirq);
> > > > > > +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > > > > > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > > > > > +}
> > > > > > +
> > > > > > +static int mvebu_pcie_intx_irq_map(struct irq_domain *h,
> > > > > > +				   unsigned int virq, irq_hw_number_t hwirq)
> > > > > > +{
> > > > > > +	struct mvebu_pcie_port *port = h->host_data;
> > > > > > +
> > > > > > +	irq_set_status_flags(virq, IRQ_LEVEL);
> > > > > > +	irq_set_chip_and_handler(virq, &port->intx_irq_chip, handle_level_irq);
> > > > > > +	irq_set_chip_data(virq, port);
> > > > > > +
> > > > > > +	return 0;
> > > > > > +}
> > > > > > +
> > > > > > +static const struct irq_domain_ops mvebu_pcie_intx_irq_domain_ops = {
> > > > > > +	.map = mvebu_pcie_intx_irq_map,
> > > > > > +	.xlate = irq_domain_xlate_onecell,
> > > > > > +};
> > > > > > +
> > > > > > +static int mvebu_pcie_init_irq_domain(struct mvebu_pcie_port *port)
> > > > > > +{
> > > > > > +	struct device *dev = &port->pcie->pdev->dev;
> > > > > > +	struct device_node *pcie_intc_node;
> > > > > > +
> > > > > > +	raw_spin_lock_init(&port->irq_lock);
> > > > > > +
> > > > > > +	port->intx_irq_chip.name = devm_kasprintf(dev, GFP_KERNEL,
> > > > > > +						  "mvebu-%s-INTx",
> > > > > > +						  port->name);  
> > > > > 
> > > > > That's exactly what I really don't want to see. It prevents sharing of
> > > > > the irq_chip structure, and gets in the way of making it const in the
> > > > > future. Yes, I know that some drivers do that. I can't fix those,
> > > > > because /proc/interrupts is ABI. But I really don't want to see more
> > > > > of these.  
> > > > 
> > > > Well, I do not understand why it should be shared and with who. HW has N
> > > > independent IRQ chips for legacy interrupts. And each one will be
> > > > specified in DT per HW layout / design.  
> > > 
> > > If you have multiple ports, all the ports can share the irq_chip
> > > structure. Actually scratch that. They *MUST* share the structure. The
> > > only reason you're not sharing it is to be able to print this useless
> > > string in /proc/interrupts.  
> > 
> > What is the point of sharing one irq chip if HW has N independent irq
> > chips (for legacy interrupts)? I do not catch it yet. And I do not care
> > here for /proc/interrupts, so also I have not caught what do you mean be
> > last sentence with "the only reason".
> > 
> > And I still do not see how it could even work to have just one irq chip
> > and one irq domain as each irq domain needs to know to which port it
> > belongs, so it can mask/unmask interrupts from correct port. Also
> > initialization of domain is taking DT node and for each port it is
> > different.
> > 
> > So I'm somehow confused here...
> > 
> > The improvement in this patch is to be able to mask INTA interrupts on
> > port 1 and let INTA interrupts unmasked on port 2 if there drivers are
> > interested only for interrupts from device connected to port 2.
> > 
> > And if all interrupts are going to be shared (again) then it does not
> > solve any problem.  
> 
> You are completely missing my point. I'm talking about data
> structures, you're talking about interrupts. You have this:
> 
> struct mvebu_pcie_port {
>        // Tons of stuff
>        struct irq_chip intx_chip;
> };
> 
> What I want you to do is:
> 
> struct mvebu_pcie_port {
>        // Tons of stuff
> };
> 
> static struct irq_chip intx_chip = {
> 	.name		= "INTx",
> 	.irq_mask	= mvebu_pcie_intx_irq_mask,
> 	.irq_unmask	= mvebu_pcie_intx_irq_unmask;
> };
> 
> That's it. No more, no less.
> 
> 	M.
> 

Hmm, but struct irq_chip contains a dynamic member,
  struct device *parent_device;
Isn't that used? Or are you planning to kill it?

Marek
Marc Zyngier Jan. 6, 2022, 5:31 p.m. UTC | #7
On Thu, 06 Jan 2022 17:20:44 +0000,
Marek Behún <kabel@kernel.org> wrote:
> 
> On Thu, 06 Jan 2022 16:27:44 +0000
> Marc Zyngier <maz@kernel.org> wrote:
> > You are completely missing my point. I'm talking about data
> > structures, you're talking about interrupts. You have this:
> > 
> > struct mvebu_pcie_port {
> >        // Tons of stuff
> >        struct irq_chip intx_chip;
> > };
> > 
> > What I want you to do is:
> > 
> > struct mvebu_pcie_port {
> >        // Tons of stuff
> > };
> > 
> > static struct irq_chip intx_chip = {
> > 	.name		= "INTx",
> > 	.irq_mask	= mvebu_pcie_intx_irq_mask,
> > 	.irq_unmask	= mvebu_pcie_intx_irq_unmask;
> > };
> > 
> > That's it. No more, no less.
> > 
> > 	M.
> > 
> 
> Hmm, but struct irq_chip contains a dynamic member,
>   struct device *parent_device;
> Isn't that used? Or are you planning to kill it?

Indeed, and I am definitely planning to kill it. This is the wrong
place for this stuff, and I want it gone. There are thankfully very
few users of this misfeature.

	M.
Pali Rohár Jan. 7, 2022, 11:50 a.m. UTC | #8
On Thursday 06 January 2022 17:31:36 Marc Zyngier wrote:
> On Thu, 06 Jan 2022 17:20:44 +0000,
> Marek Behún <kabel@kernel.org> wrote:
> > 
> > On Thu, 06 Jan 2022 16:27:44 +0000
> > Marc Zyngier <maz@kernel.org> wrote:
> > > You are completely missing my point. I'm talking about data
> > > structures, you're talking about interrupts. You have this:
> > > 
> > > struct mvebu_pcie_port {
> > >        // Tons of stuff
> > >        struct irq_chip intx_chip;
> > > };
> > > 
> > > What I want you to do is:
> > > 
> > > struct mvebu_pcie_port {
> > >        // Tons of stuff
> > > };
> > > 
> > > static struct irq_chip intx_chip = {
> > > 	.name		= "INTx",
> > > 	.irq_mask	= mvebu_pcie_intx_irq_mask,
> > > 	.irq_unmask	= mvebu_pcie_intx_irq_unmask;
> > > };
> > > 
> > > That's it. No more, no less.
> > > 
> > > 	M.
> > > 
> > 
> > Hmm, but struct irq_chip contains a dynamic member,
> >   struct device *parent_device;
> > Isn't that used? Or are you planning to kill it?
> 
> Indeed, and I am definitely planning to kill it. This is the wrong
> place for this stuff, and I want it gone. There are thankfully very
> few users of this misfeature.

Ok, so what about this change?

diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
index 1e90ab888075..5c816338a569 100644
--- a/drivers/pci/controller/pci-mvebu.c
+++ b/drivers/pci/controller/pci-mvebu.c
@@ -54,9 +54,10 @@
 	 PCIE_CONF_ADDR_EN)
 #define PCIE_CONF_DATA_OFF	0x18fc
 #define PCIE_INT_CAUSE_OFF	0x1900
+#define PCIE_INT_UNMASK_OFF	0x1910
+#define  PCIE_INT_INTX(i)		BIT(24+i)
 #define  PCIE_INT_PM_PME		BIT(28)
-#define PCIE_MASK_OFF		0x1910
-#define  PCIE_MASK_ENABLE_INTS          0x0f000000
+#define  PCIE_INT_ALL_MASK		GENMASK(31, 0)
 #define PCIE_CTRL_OFF		0x1a00
 #define  PCIE_CTRL_X1_MODE		0x0001
 #define  PCIE_CTRL_RC_MODE		BIT(1)
@@ -110,6 +111,9 @@ struct mvebu_pcie_port {
 	struct mvebu_pcie_window iowin;
 	u32 saved_pcie_stat;
 	struct resource regs;
+	struct irq_domain *intx_irq_domain;
+	raw_spinlock_t irq_lock;
+	int intx_irq;
 };
 
 static inline void mvebu_writel(struct mvebu_pcie_port *port, u32 val, u32 reg)
@@ -235,7 +239,7 @@ static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
 
 static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
 {
-	u32 ctrl, lnkcap, cmd, dev_rev, mask;
+	u32 ctrl, lnkcap, cmd, dev_rev, unmask;
 
 	/* Setup PCIe controller to Root Complex mode. */
 	ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
@@ -288,10 +292,30 @@ static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
 	/* Point PCIe unit MBUS decode windows to DRAM space. */
 	mvebu_pcie_setup_wins(port);
 
-	/* Enable interrupt lines A-D. */
-	mask = mvebu_readl(port, PCIE_MASK_OFF);
-	mask |= PCIE_MASK_ENABLE_INTS;
-	mvebu_writel(port, mask, PCIE_MASK_OFF);
+	/* Mask all interrupt sources. */
+	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
+
+	/* Clear all interrupt causes. */
+	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
+
+	if (port->intx_irq <= 0) {
+		/*
+		 * When neither "summary" interrupt, nor "intx" interrupt was
+		 * specified in DT then unmask all legacy INTx interrupts as in
+		 * this case driver does not provide a way for masking and
+		 * unmasking of individual legacy INTx interrupts. In this case
+		 * all interrupts, including legacy INTx are reported via one
+		 * shared GIC source and therefore kernel cannot distinguish
+		 * which individual legacy INTx was triggered. These interrupts
+		 * are shared, so it should not cause any issue. Just
+		 * performance penalty as every PCIe interrupt handler needs to
+		 * be called when some interrupt is triggered.
+		 */
+		unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
+		unmask |= PCIE_INT_INTX(0) | PCIE_INT_INTX(1) |
+			  PCIE_INT_INTX(2) | PCIE_INT_INTX(3);
+		mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
+	}
 }
 
 static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
@@ -924,6 +948,108 @@ static struct pci_ops mvebu_pcie_ops = {
 	.write = mvebu_pcie_wr_conf,
 };
 
+static void mvebu_pcie_intx_irq_mask(struct irq_data *d)
+{
+	struct mvebu_pcie_port *port = d->domain->host_data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 unmask;
+
+	raw_spin_lock_irqsave(&port->irq_lock, flags);
+	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
+	unmask &= ~PCIE_INT_INTX(hwirq);
+	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
+	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+}
+
+static void mvebu_pcie_intx_irq_unmask(struct irq_data *d)
+{
+	struct mvebu_pcie_port *port = d->domain->host_data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 unmask;
+
+	raw_spin_lock_irqsave(&port->irq_lock, flags);
+	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
+	unmask |= PCIE_INT_INTX(hwirq);
+	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
+	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+}
+
+static struct irq_chip intx_irq_chip = {
+	.name = "mvebu-INTx",
+	.irq_mask = mvebu_pcie_intx_irq_mask,
+	.irq_unmask = mvebu_pcie_intx_irq_unmask,
+};
+
+static int mvebu_pcie_intx_irq_map(struct irq_domain *h,
+				   unsigned int virq, irq_hw_number_t hwirq)
+{
+	struct mvebu_pcie_port *port = h->host_data;
+
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &intx_irq_chip, handle_level_irq);
+	irq_set_chip_data(virq, port);
+
+	return 0;
+}
+
+static const struct irq_domain_ops mvebu_pcie_intx_irq_domain_ops = {
+	.map = mvebu_pcie_intx_irq_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static int mvebu_pcie_init_irq_domain(struct mvebu_pcie_port *port)
+{
+	struct device *dev = &port->pcie->pdev->dev;
+	struct device_node *pcie_intc_node;
+
+	raw_spin_lock_init(&port->irq_lock);
+
+	pcie_intc_node = of_get_next_child(port->dn, NULL);
+	if (!pcie_intc_node) {
+		dev_err(dev, "No PCIe Intc node found for %s\n", port->name);
+		return -ENODEV;
+	}
+
+	port->intx_irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+						      &mvebu_pcie_intx_irq_domain_ops,
+						      port);
+	of_node_put(pcie_intc_node);
+	if (!port->intx_irq_domain) {
+		dev_err(dev, "Failed to get INTx IRQ domain for %s\n", port->name);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mvebu_pcie_irq_handler(struct irq_desc *desc)
+{
+	struct mvebu_pcie_port *port = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct device *dev = &port->pcie->pdev->dev;
+	u32 cause, unmask, status;
+	int i;
+
+	chained_irq_enter(chip, desc);
+
+	cause = mvebu_readl(port, PCIE_INT_CAUSE_OFF);
+	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
+	status = cause & unmask;
+
+	/* Process legacy INTx interrupts */
+	for (i = 0; i < PCI_NUM_INTX; i++) {
+		if (!(status & PCIE_INT_INTX(i)))
+			continue;
+
+		if (generic_handle_domain_irq(port->intx_irq_domain, i) == -EINVAL)
+			dev_err_ratelimited(dev, "unexpected INT%c IRQ\n", (char)i+'A');
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
 static int mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	/* Interrupt support on mvebu emulated bridges is not implemented yet */
@@ -1121,6 +1247,21 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie,
 		port->io_attr = -1;
 	}
 
+	/*
+	 * Old DT bindings do not contain "intx" interrupt
+	 * so do not fail probing driver when interrupt does not exist.
+	 */
+	port->intx_irq = of_irq_get_byname(child, "intx");
+	if (port->intx_irq == -EPROBE_DEFER) {
+		ret = port->intx_irq;
+		goto err;
+	}
+	if (port->intx_irq <= 0) {
+		dev_warn(dev, "%s: legacy INTx interrupts cannot be masked individually, "
+			      "%pOF does not contain intx interrupt\n",
+			 port->name, child);
+	}
+
 	reset_gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, &flags);
 	if (reset_gpio == -EPROBE_DEFER) {
 		ret = reset_gpio;
@@ -1317,6 +1458,7 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
 
 	for (i = 0; i < pcie->nports; i++) {
 		struct mvebu_pcie_port *port = &pcie->ports[i];
+		int irq = port->intx_irq;
 
 		child = port->dn;
 		if (!child)
@@ -1344,6 +1486,22 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
 			continue;
 		}
 
+		if (irq > 0) {
+			ret = mvebu_pcie_init_irq_domain(port);
+			if (ret) {
+				dev_err(dev, "%s: cannot init irq domain\n",
+					port->name);
+				pci_bridge_emul_cleanup(&port->bridge);
+				devm_iounmap(dev, port->base);
+				port->base = NULL;
+				mvebu_pcie_powerdown(port);
+				continue;
+			}
+			irq_set_chained_handler_and_data(irq,
+							 mvebu_pcie_irq_handler,
+							 port);
+		}
+
 		/*
 		 * PCIe topology exported by mvebu hw is quite complicated. In
 		 * reality has something like N fully independent host bridges
@@ -1448,6 +1606,7 @@ static int mvebu_pcie_remove(struct platform_device *pdev)
 
 	for (i = 0; i < pcie->nports; i++) {
 		struct mvebu_pcie_port *port = &pcie->ports[i];
+		int irq = port->intx_irq;
 
 		if (!port->base)
 			continue;
@@ -1458,7 +1617,17 @@ static int mvebu_pcie_remove(struct platform_device *pdev)
 		mvebu_writel(port, cmd, PCIE_CMD_OFF);
 
 		/* Mask all interrupt sources. */
-		mvebu_writel(port, 0, PCIE_MASK_OFF);
+		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
+
+		/* Clear all interrupt causes. */
+		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
+
+		/* Remove IRQ domains. */
+		if (port->intx_irq_domain)
+			irq_domain_remove(port->intx_irq_domain);
+
+		if (irq > 0)
+			irq_set_chained_handler_and_data(irq, NULL, NULL);
 
 		/* Free config space for emulated root bridge. */
 		pci_bridge_emul_cleanup(&port->bridge);
Marc Zyngier Jan. 7, 2022, 6:53 p.m. UTC | #9
On Fri, 07 Jan 2022 11:50:53 +0000,
Pali Rohár <pali@kernel.org> wrote:
> 
> On Thursday 06 January 2022 17:31:36 Marc Zyngier wrote:
> > On Thu, 06 Jan 2022 17:20:44 +0000,
> > Marek Behún <kabel@kernel.org> wrote:
> > > 
> > > On Thu, 06 Jan 2022 16:27:44 +0000
> > > Marc Zyngier <maz@kernel.org> wrote:
> > > > You are completely missing my point. I'm talking about data
> > > > structures, you're talking about interrupts. You have this:
> > > > 
> > > > struct mvebu_pcie_port {
> > > >        // Tons of stuff
> > > >        struct irq_chip intx_chip;
> > > > };
> > > > 
> > > > What I want you to do is:
> > > > 
> > > > struct mvebu_pcie_port {
> > > >        // Tons of stuff
> > > > };
> > > > 
> > > > static struct irq_chip intx_chip = {
> > > > 	.name		= "INTx",
> > > > 	.irq_mask	= mvebu_pcie_intx_irq_mask,
> > > > 	.irq_unmask	= mvebu_pcie_intx_irq_unmask;
> > > > };
> > > > 
> > > > That's it. No more, no less.
> > > > 
> > > > 	M.
> > > > 
> > > 
> > > Hmm, but struct irq_chip contains a dynamic member,
> > >   struct device *parent_device;
> > > Isn't that used? Or are you planning to kill it?
> > 
> > Indeed, and I am definitely planning to kill it. This is the wrong
> > place for this stuff, and I want it gone. There are thankfully very
> > few users of this misfeature.
> 
> Ok, so what about this change?
> 
> @@ -1458,7 +1617,17 @@ static int mvebu_pcie_remove(struct platform_device *pdev)
>  		mvebu_writel(port, cmd, PCIE_CMD_OFF);
>  
>  		/* Mask all interrupt sources. */
> -		mvebu_writel(port, 0, PCIE_MASK_OFF);
> +		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
> +
> +		/* Clear all interrupt causes. */
> +		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
> +
> +		/* Remove IRQ domains. */
> +		if (port->intx_irq_domain)
> +			irq_domain_remove(port->intx_irq_domain);
> +
> +		if (irq > 0)
> +			irq_set_chained_handler_and_data(irq, NULL, NULL);

You really want this to be done *before* you remove the domain, as
there still could be interrupts in flight at this point.

	M.
diff mbox series

Patch

diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
index 1e90ab888075..04bcdd7b7a6d 100644
--- a/drivers/pci/controller/pci-mvebu.c
+++ b/drivers/pci/controller/pci-mvebu.c
@@ -54,9 +54,10 @@ 
 	 PCIE_CONF_ADDR_EN)
 #define PCIE_CONF_DATA_OFF	0x18fc
 #define PCIE_INT_CAUSE_OFF	0x1900
+#define PCIE_INT_UNMASK_OFF	0x1910
+#define  PCIE_INT_INTX(i)		BIT(24+i)
 #define  PCIE_INT_PM_PME		BIT(28)
-#define PCIE_MASK_OFF		0x1910
-#define  PCIE_MASK_ENABLE_INTS          0x0f000000
+#define  PCIE_INT_ALL_MASK		GENMASK(31, 0)
 #define PCIE_CTRL_OFF		0x1a00
 #define  PCIE_CTRL_X1_MODE		0x0001
 #define  PCIE_CTRL_RC_MODE		BIT(1)
@@ -110,6 +111,10 @@  struct mvebu_pcie_port {
 	struct mvebu_pcie_window iowin;
 	u32 saved_pcie_stat;
 	struct resource regs;
+	struct irq_domain *intx_irq_domain;
+	struct irq_chip intx_irq_chip;
+	raw_spinlock_t irq_lock;
+	int intx_irq;
 };
 
 static inline void mvebu_writel(struct mvebu_pcie_port *port, u32 val, u32 reg)
@@ -235,7 +240,7 @@  static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
 
 static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
 {
-	u32 ctrl, lnkcap, cmd, dev_rev, mask;
+	u32 ctrl, lnkcap, cmd, dev_rev, unmask;
 
 	/* Setup PCIe controller to Root Complex mode. */
 	ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
@@ -288,10 +293,30 @@  static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
 	/* Point PCIe unit MBUS decode windows to DRAM space. */
 	mvebu_pcie_setup_wins(port);
 
-	/* Enable interrupt lines A-D. */
-	mask = mvebu_readl(port, PCIE_MASK_OFF);
-	mask |= PCIE_MASK_ENABLE_INTS;
-	mvebu_writel(port, mask, PCIE_MASK_OFF);
+	/* Mask all interrupt sources. */
+	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
+
+	/* Clear all interrupt causes. */
+	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
+
+	if (port->intx_irq <= 0) {
+		/*
+		 * When neither "summary" interrupt, nor "intx" interrupt was
+		 * specified in DT then unmask all legacy INTx interrupts as in
+		 * this case driver does not provide a way for masking and
+		 * unmasking of individual legacy INTx interrupts. In this case
+		 * all interrupts, including legacy INTx are reported via one
+		 * shared GIC source and therefore kernel cannot distinguish
+		 * which individual legacy INTx was triggered. These interrupts
+		 * are shared, so it should not cause any issue. Just
+		 * performance penalty as every PCIe interrupt handler needs to
+		 * be called when some interrupt is triggered.
+		 */
+		unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
+		unmask |= PCIE_INT_INTX(0) | PCIE_INT_INTX(1) |
+			  PCIE_INT_INTX(2) | PCIE_INT_INTX(3);
+		mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
+	}
 }
 
 static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
@@ -924,6 +949,109 @@  static struct pci_ops mvebu_pcie_ops = {
 	.write = mvebu_pcie_wr_conf,
 };
 
+static void mvebu_pcie_intx_irq_mask(struct irq_data *d)
+{
+	struct mvebu_pcie_port *port = d->domain->host_data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 unmask;
+
+	raw_spin_lock_irqsave(&port->irq_lock, flags);
+	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
+	unmask &= ~PCIE_INT_INTX(hwirq);
+	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
+	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+}
+
+static void mvebu_pcie_intx_irq_unmask(struct irq_data *d)
+{
+	struct mvebu_pcie_port *port = d->domain->host_data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 unmask;
+
+	raw_spin_lock_irqsave(&port->irq_lock, flags);
+	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
+	unmask |= PCIE_INT_INTX(hwirq);
+	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
+	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+}
+
+static int mvebu_pcie_intx_irq_map(struct irq_domain *h,
+				   unsigned int virq, irq_hw_number_t hwirq)
+{
+	struct mvebu_pcie_port *port = h->host_data;
+
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &port->intx_irq_chip, handle_level_irq);
+	irq_set_chip_data(virq, port);
+
+	return 0;
+}
+
+static const struct irq_domain_ops mvebu_pcie_intx_irq_domain_ops = {
+	.map = mvebu_pcie_intx_irq_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static int mvebu_pcie_init_irq_domain(struct mvebu_pcie_port *port)
+{
+	struct device *dev = &port->pcie->pdev->dev;
+	struct device_node *pcie_intc_node;
+
+	raw_spin_lock_init(&port->irq_lock);
+
+	port->intx_irq_chip.name = devm_kasprintf(dev, GFP_KERNEL,
+						  "mvebu-%s-INTx",
+						  port->name);
+	port->intx_irq_chip.irq_mask = mvebu_pcie_intx_irq_mask;
+	port->intx_irq_chip.irq_unmask = mvebu_pcie_intx_irq_unmask;
+
+	pcie_intc_node = of_get_next_child(port->dn, NULL);
+	if (!pcie_intc_node) {
+		dev_err(dev, "No PCIe Intc node found for %s\n", port->name);
+		return -ENODEV;
+	}
+
+	port->intx_irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+						      &mvebu_pcie_intx_irq_domain_ops,
+						      port);
+	of_node_put(pcie_intc_node);
+	if (!port->intx_irq_domain) {
+		devm_kfree(dev, port->intx_irq_chip.name);
+		dev_err(dev, "Failed to get INTx IRQ domain for %s\n", port->name);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mvebu_pcie_irq_handler(struct irq_desc *desc)
+{
+	struct mvebu_pcie_port *port = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct device *dev = &port->pcie->pdev->dev;
+	u32 cause, unmask, status;
+	int i;
+
+	chained_irq_enter(chip, desc);
+
+	cause = mvebu_readl(port, PCIE_INT_CAUSE_OFF);
+	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
+	status = cause & unmask;
+
+	/* Process legacy INTx interrupts */
+	for (i = 0; i < PCI_NUM_INTX; i++) {
+		if (!(status & PCIE_INT_INTX(i)))
+			continue;
+
+		if (generic_handle_domain_irq(port->intx_irq_domain, i) == -EINVAL)
+			dev_err_ratelimited(dev, "unexpected INT%c IRQ\n", (char)i+'A');
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
 static int mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	/* Interrupt support on mvebu emulated bridges is not implemented yet */
@@ -1121,6 +1249,16 @@  static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie,
 		port->io_attr = -1;
 	}
 
+	/*
+	 * Old DT bindings do not contain "intx" interrupt
+	 * so do not fail probing driver when interrupt does not exist.
+	 */
+	port->intx_irq = of_irq_get_byname(child, "intx");
+	if (port->intx_irq == -EPROBE_DEFER) {
+		ret = port->intx_irq;
+		goto err;
+	}
+
 	reset_gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, &flags);
 	if (reset_gpio == -EPROBE_DEFER) {
 		ret = reset_gpio;
@@ -1317,6 +1455,7 @@  static int mvebu_pcie_probe(struct platform_device *pdev)
 
 	for (i = 0; i < pcie->nports; i++) {
 		struct mvebu_pcie_port *port = &pcie->ports[i];
+		int irq = port->intx_irq;
 
 		child = port->dn;
 		if (!child)
@@ -1344,6 +1483,22 @@  static int mvebu_pcie_probe(struct platform_device *pdev)
 			continue;
 		}
 
+		if (irq > 0) {
+			ret = mvebu_pcie_init_irq_domain(port);
+			if (ret) {
+				dev_err(dev, "%s: cannot init irq domain\n",
+					port->name);
+				pci_bridge_emul_cleanup(&port->bridge);
+				devm_iounmap(dev, port->base);
+				port->base = NULL;
+				mvebu_pcie_powerdown(port);
+				continue;
+			}
+			irq_set_chained_handler_and_data(irq,
+							 mvebu_pcie_irq_handler,
+							 port);
+		}
+
 		/*
 		 * PCIe topology exported by mvebu hw is quite complicated. In
 		 * reality has something like N fully independent host bridges
@@ -1448,6 +1603,7 @@  static int mvebu_pcie_remove(struct platform_device *pdev)
 
 	for (i = 0; i < pcie->nports; i++) {
 		struct mvebu_pcie_port *port = &pcie->ports[i];
+		int irq = port->intx_irq;
 
 		if (!port->base)
 			continue;
@@ -1458,7 +1614,17 @@  static int mvebu_pcie_remove(struct platform_device *pdev)
 		mvebu_writel(port, cmd, PCIE_CMD_OFF);
 
 		/* Mask all interrupt sources. */
-		mvebu_writel(port, 0, PCIE_MASK_OFF);
+		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
+
+		/* Clear all interrupt causes. */
+		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
+
+		/* Remove IRQ domains. */
+		if (port->intx_irq_domain)
+			irq_domain_remove(port->intx_irq_domain);
+
+		if (irq > 0)
+			irq_set_chained_handler_and_data(irq, NULL, NULL);
 
 		/* Free config space for emulated root bridge. */
 		pci_bridge_emul_cleanup(&port->bridge);