diff mbox series

[v5,07/14] irqchip/irq-mvebu-sei: add new driver for Marvell SEI

Message ID 20180830073535.10710-8-miquel.raynal@bootlin.com (mailing list archive)
State Superseded, archived
Headers show
Series Add System Error Interrupt support to Armada SoCs | expand

Commit Message

Miquel Raynal Aug. 30, 2018, 7:35 a.m. UTC
This is a cascaded interrupt controller in the AP806 GIC that collapses
SEIs (System Error Interrupt) coming from the AP and the CPs (through
the ICU).

The SEI handles up to 64 interrupts. The first 21 interrupts are wired
from the AP. The next 43 interrupts are from the CPs and are triggered
through MSI messages. To handle this complexity, the driver has to
declare to the upper layer: one IRQ domain for the wired interrupts,
one IRQ domain for the MSIs; and acts as a MSI controller ('parent')
by declaring an MSI domain.

Suggested-by: Haim Boot <hayim@marvell.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/irqchip/Kconfig         |   3 +
 drivers/irqchip/Makefile        |   1 +
 drivers/irqchip/irq-mvebu-sei.c | 475 ++++++++++++++++++++++++++++++++
 3 files changed, 479 insertions(+)
 create mode 100644 drivers/irqchip/irq-mvebu-sei.c

Comments

Marc Zyngier Sept. 20, 2018, 8:40 p.m. UTC | #1
Hi Miquel,

On Thu, 30 Aug 2018 08:35:28 +0100,
Miquel Raynal <miquel.raynal@bootlin.com> wrote:
> 
> This is a cascaded interrupt controller in the AP806 GIC that collapses
> SEIs (System Error Interrupt) coming from the AP and the CPs (through
> the ICU).
> 
> The SEI handles up to 64 interrupts. The first 21 interrupts are wired
> from the AP. The next 43 interrupts are from the CPs and are triggered
> through MSI messages. To handle this complexity, the driver has to
> declare to the upper layer: one IRQ domain for the wired interrupts,
> one IRQ domain for the MSIs; and acts as a MSI controller ('parent')
> by declaring an MSI domain.
> 
> Suggested-by: Haim Boot <hayim@marvell.com>
> Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> ---
>  drivers/irqchip/Kconfig         |   3 +
>  drivers/irqchip/Makefile        |   1 +
>  drivers/irqchip/irq-mvebu-sei.c | 475 ++++++++++++++++++++++++++++++++
>  3 files changed, 479 insertions(+)
>  create mode 100644 drivers/irqchip/irq-mvebu-sei.c
> 
> diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
> index 383e7b70221d..96451b581452 100644
> --- a/drivers/irqchip/Kconfig
> +++ b/drivers/irqchip/Kconfig
> @@ -310,6 +310,9 @@ config MVEBU_ODMI
>  config MVEBU_PIC
>  	bool
>  
> +config MVEBU_SEI
> +        bool
> +
>  config LS_SCFG_MSI
>  	def_bool y if SOC_LS1021A || ARCH_LAYERSCAPE
>  	depends on PCI && PCI_MSI
> diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
> index fbd1ec8070ef..b822199445ff 100644
> --- a/drivers/irqchip/Makefile
> +++ b/drivers/irqchip/Makefile
> @@ -76,6 +76,7 @@ obj-$(CONFIG_MVEBU_GICP)		+= irq-mvebu-gicp.o
>  obj-$(CONFIG_MVEBU_ICU)			+= irq-mvebu-icu.o
>  obj-$(CONFIG_MVEBU_ODMI)		+= irq-mvebu-odmi.o
>  obj-$(CONFIG_MVEBU_PIC)			+= irq-mvebu-pic.o
> +obj-$(CONFIG_MVEBU_SEI)			+= irq-mvebu-sei.o
>  obj-$(CONFIG_LS_SCFG_MSI)		+= irq-ls-scfg-msi.o
>  obj-$(CONFIG_EZNPS_GIC)			+= irq-eznps.o
>  obj-$(CONFIG_ARCH_ASPEED)		+= irq-aspeed-vic.o irq-aspeed-i2c-ic.o
> diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
> new file mode 100644
> index 000000000000..7dbc57d017b3
> --- /dev/null
> +++ b/drivers/irqchip/irq-mvebu-sei.c
> @@ -0,0 +1,475 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#define pr_fmt(fmt) "mvebu-sei: " fmt
> +
> +#include <linux/irq.h>
> +#include <linux/interrupt.h>
> +#include <linux/irqchip/chained_irq.h>
> +#include <linux/irqdomain.h>
> +#include <linux/kernel.h>
> +#include <linux/of_address.h>
> +#include <linux/of_irq.h>
> +#include <linux/of_platform.h>
> +#include <linux/msi.h>
> +#include <linux/platform_device.h>
> +#include <linux/irqchip.h>
> +#include <asm/smp_plat.h>
> +
> +#include <dt-bindings/interrupt-controller/arm-gic.h>
> +
> +/* Cause register */
> +#define GICP_SECR(idx)		(0x0  + ((idx) * 0x4))
> +/* Mask register */
> +#define GICP_SEMR(idx)		(0x20 + ((idx) * 0x4))
> +#define GICP_SET_SEI_OFFSET	0x30
> +
> +#define SEI_IRQ_COUNT_PER_REG	32
> +#define SEI_IRQ_REG_COUNT	2
> +#define SEI_IRQ_COUNT		(SEI_IRQ_COUNT_PER_REG * SEI_IRQ_REG_COUNT)
> +#define SEI_IRQ_REG_IDX(irq_id)	((irq_id) / SEI_IRQ_COUNT_PER_REG)
> +#define SEI_IRQ_REG_BIT(irq_id)	((irq_id) % SEI_IRQ_COUNT_PER_REG)
> +
> +struct mvebu_sei_interrupt_range {
> +	u32 first;
> +	u32 size;
> +};
> +
> +struct mvebu_sei_caps {
> +	struct mvebu_sei_interrupt_range ap_range;
> +	struct mvebu_sei_interrupt_range cp_range;
> +};
> +
> +struct mvebu_sei {
> +	struct device *dev;
> +	void __iomem *base;
> +	struct resource *res;
> +	struct irq_domain *ap_domain;
> +	struct irq_domain *cp_domain;
> +	const struct mvebu_sei_caps *caps;
> +
> +	/* Lock on MSI allocations/releases */
> +	struct mutex cp_msi_lock;
> +	DECLARE_BITMAP(cp_msi_bitmap, SEI_IRQ_COUNT);
> +
> +	/* Lock on IRQ masking register */
> +	struct mutex mask_lock;
> +};
> +
> +static int mvebu_sei_domain_to_sei_irq(struct mvebu_sei *sei,
> +				       struct irq_domain *domain,
> +				       irq_hw_number_t hwirq)
> +{
> +	if (domain == sei->ap_domain)
> +		return sei->caps->ap_range.first + hwirq;
> +	else
> +		return sei->caps->cp_range.first + hwirq;
> +}
> +
> +static void mvebu_sei_reset(struct mvebu_sei *sei)
> +{
> +	u32 reg_idx;
> +
> +	/* Clear IRQ cause registers */
> +	for (reg_idx = 0; reg_idx < SEI_IRQ_REG_COUNT; reg_idx++)
> +		writel_relaxed(0xFFFFFFFF, sei->base + GICP_SECR(reg_idx));
> +}
> +
> +static void mvebu_sei_mask_irq(struct irq_data *d)
> +{
> +	struct mvebu_sei *sei = irq_data_get_irq_chip_data(d);
> +	u32 sei_irq = mvebu_sei_domain_to_sei_irq(sei, d->domain, d->hwirq);
> +	u32 reg_idx = SEI_IRQ_REG_IDX(sei_irq);
> +	u32 reg;
> +
> +	/* 1 disables the interrupt */
> +	mutex_lock(&sei->mask_lock);
> +	reg = readl_relaxed(sei->base + GICP_SEMR(reg_idx));
> +	reg |= BIT(SEI_IRQ_REG_BIT(sei_irq));
> +	writel_relaxed(reg, sei->base + GICP_SEMR(reg_idx));
> +	mutex_unlock(&sei->mask_lock);
> +}
> +
> +static void mvebu_sei_unmask_irq(struct irq_data *d)
> +{
> +	struct mvebu_sei *sei = irq_data_get_irq_chip_data(d);
> +	u32 sei_irq = mvebu_sei_domain_to_sei_irq(sei, d->domain, d->hwirq);
> +	u32 reg_idx = SEI_IRQ_REG_IDX(sei_irq);
> +	u32 reg;
> +
> +	/* 0 enables the interrupt */
> +	mutex_lock(&sei->mask_lock);
> +	reg = readl_relaxed(sei->base + GICP_SEMR(reg_idx));
> +	reg &= ~BIT(SEI_IRQ_REG_BIT(sei_irq));
> +	writel_relaxed(reg, sei->base + GICP_SEMR(reg_idx));
> +	mutex_unlock(&sei->mask_lock);
> +}
> +
> +static void mvebu_sei_compose_msi_msg(struct irq_data *data,
> +				      struct msi_msg *msg)
> +{
> +	struct mvebu_sei *sei = data->chip_data;
> +	phys_addr_t set = sei->res->start + GICP_SET_SEI_OFFSET;
> +
> +	msg->data = mvebu_sei_domain_to_sei_irq(sei, data->domain, data->hwirq);
> +	msg->address_lo = lower_32_bits(set);
> +	msg->address_hi = upper_32_bits(set);
> +}
> +
> +static int mvebu_sei_ap_set_type(struct irq_data *data, unsigned int type)
> +{
> +	if (!(type & IRQ_TYPE_LEVEL_HIGH))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static int mvebu_sei_cp_set_type(struct irq_data *data, unsigned int type)
> +{
> +	if (!(type & IRQ_TYPE_EDGE_RISING))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static struct irq_chip mvebu_sei_ap_irq_chip = {
> +	.name			= "AP SEI",
> +	.irq_mask		= mvebu_sei_mask_irq,
> +	.irq_unmask		= mvebu_sei_unmask_irq,
> +	.irq_set_type		= mvebu_sei_ap_set_type,
> +};
> +
> +static struct irq_chip mvebu_sei_cp_irq_chip = {
> +	.name			= "CP SEI",
> +	.irq_mask		= mvebu_sei_mask_irq,
> +	.irq_unmask		= mvebu_sei_unmask_irq,
> +	.irq_eoi                = irq_chip_eoi_parent,
> +	.irq_set_affinity       = irq_chip_set_affinity_parent,
> +	.irq_set_type		= mvebu_sei_cp_set_type,
> +	.irq_compose_msi_msg	= mvebu_sei_compose_msi_msg,
> +};
> +
> +static int mvebu_sei_ap_match(struct irq_domain *d, struct device_node *node,
> +			      enum irq_domain_bus_token bus_token)
> +{
> +	struct mvebu_sei *sei = d->host_data;
> +
> +	if (sei->dev->of_node != node)
> +		return 0;
> +
> +	if (d == sei->ap_domain)
> +		return 1;
> +
> +	return 0;
> +}
> +
> +static int mvebu_sei_ap_irq_map(struct irq_domain *domain, unsigned int virq,
> +				irq_hw_number_t hwirq)
> +{
> +	struct mvebu_sei *sei = domain->host_data;
> +
> +	irq_set_chip_data(virq, sei);
> +	irq_set_chip_and_handler(virq, &mvebu_sei_ap_irq_chip,
> +				 handle_level_irq);
> +	irq_set_status_flags(virq, IRQ_LEVEL);
> +	irq_set_probe(virq);
> +
> +	return 0;
> +}
> +
> +static const struct irq_domain_ops mvebu_sei_ap_domain_ops = {
> +	.match = mvebu_sei_ap_match,
> +	.xlate = irq_domain_xlate_onecell,
> +	.map = mvebu_sei_ap_irq_map,
> +};
> +
> +static int mvebu_sei_cp_domain_alloc(struct mvebu_sei *sei)
> +{
> +	int hwirq;
> +
> +	mutex_lock(&sei->cp_msi_lock);
> +	hwirq = find_first_zero_bit(sei->cp_msi_bitmap,
> +				    sei->caps->cp_range.size);
> +	set_bit(hwirq, sei->cp_msi_bitmap);
> +	mutex_unlock(&sei->cp_msi_lock);
> +
> +	return hwirq;
> +}
> +
> +static void mvebu_sei_cp_domain_free(struct mvebu_sei *sei, int hwirq)
> +{
> +	mutex_lock(&sei->cp_msi_lock);
> +	clear_bit(hwirq, sei->cp_msi_bitmap);
> +	mutex_unlock(&sei->cp_msi_lock);
> +}
> +
> +static int mvebu_sei_irq_domain_alloc(struct irq_domain *domain,
> +				      unsigned int virq, unsigned int nr_irqs,
> +				      void *args)
> +{
> +	struct mvebu_sei *sei = domain->host_data;
> +	struct irq_fwspec *fwspec = args;
> +	int hwirq;
> +	int ret;
> +
> +	/* The software only supports single allocations for now */
> +	if (nr_irqs != 1)
> +		return -ENOTSUPP;
> +
> +	hwirq = mvebu_sei_cp_domain_alloc(sei);
> +	if (hwirq < 0)
> +		return -ENOSPC;
> +
> +	fwspec->fwnode = domain->parent->fwnode;
> +	fwspec->param_count = 3;
> +	fwspec->param[0] = GIC_SPI;
> +	fwspec->param[1] = 0;

On first approximation, this deserves a good comment about 0
representing INTID 32 at the GIC level. Then, another question is why
this doesn't come from DT. I bet that in the future, this block will
be reused and you'll find more than one is a single chip.

But the real question is why you are constantly calling
irq_alloc_irqs_parent. I remember commenting about this in my previous
round of review, and I still see this.

The whole SEI thing is a chained interrupt controller, a
multiplexer. The output interrupt is known at probe time, and never
change. You also have code to that effect in the probe routine. So
what is this exactly? Dead code?

> +	/*
> +	 * Assume edge rising for now, it will be properly set when
> +	 * ->set_type() is called
> +	 */
> +	fwspec->param[2] = IRQ_TYPE_EDGE_RISING;
> +	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, fwspec);
> +	if (ret)
> +		goto free_irq;
> +
> +	ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
> +					    &mvebu_sei_cp_irq_chip, sei);

Same thing here. I think the whole mvebu_sei_cp_irq_chip thing should
go away.

> +	if (ret)
> +		goto free_irq_parents;
> +
> +	return 0;
> +
> +free_irq_parents:
> +	irq_domain_free_irqs_parent(domain, virq, 1);
> +free_irq:
> +	mvebu_sei_cp_domain_free(sei, hwirq);
> +
> +	return ret;
> +}
> +
> +static void mvebu_sei_irq_domain_free(struct irq_domain *domain,
> +				      unsigned int virq, unsigned int nr_irqs)
> +{
> +	struct mvebu_sei *sei = domain->host_data;
> +	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
> +	u32 irq_count = sei->caps->ap_range.size + sei->caps->cp_range.size;
> +
> +	if (nr_irqs != 1 || d->hwirq >= irq_count) {
> +		dev_err(sei->dev, "Invalid hwirq %lu\n", d->hwirq);
> +		return;
> +	}
> +
> +	mvebu_sei_cp_domain_free(sei, d->hwirq);
> +}
> +
> +static const struct irq_domain_ops mvebu_sei_cp_domain_ops = {
> +	.alloc = mvebu_sei_irq_domain_alloc,
> +	.free = mvebu_sei_irq_domain_free,
> +};
> +
> +static struct irq_chip mvebu_sei_msi_irq_chip = {
> +	.name		= "SEI MSI controller",
> +	.irq_set_type	= mvebu_sei_cp_set_type,
> +};
> +
> +static struct msi_domain_ops mvebu_sei_msi_ops = {
> +};
> +
> +static struct msi_domain_info mvebu_sei_msi_domain_info = {
> +	.flags	= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS,
> +	.ops	= &mvebu_sei_msi_ops,
> +	.chip	= &mvebu_sei_msi_irq_chip,
> +};
> +
> +static void mvebu_sei_handle_cascade_irq(struct irq_desc *desc)
> +{
> +	struct mvebu_sei *sei = irq_desc_get_handler_data(desc);
> +	struct irq_chip *chip = irq_desc_get_chip(desc);
> +	DECLARE_BITMAP(irqmap, SEI_IRQ_COUNT);
> +	u32 *irqreg = (u32 *)&irqmap;
> +	u32 idx, irqn;
> +
> +	chained_irq_enter(chip, desc);
> +
> +	/* Create the bitmap of the pending interrupts */
> +	for (idx = 0; idx < SEI_IRQ_REG_COUNT; idx++)
> +		irqreg[idx] = readl_relaxed(sei->base + GICP_SECR(idx));
> +
> +	/* Call handler for each pending interrupt */
> +	for_each_set_bit(irqn, irqmap, SEI_IRQ_COUNT) {
> +		struct irq_domain *domain;
> +		u32 virq, hwirq;
> +
> +		/*
> +		 * Finding Linux mapping (virq) needs the right domain
> +		 * and the relative hwirq (which start at 0 in both
> +		 * cases, while irqn is relative to all SEI interrupts).
> +		 */
> +		if (irqn < sei->caps->ap_range.size) {
> +			domain = sei->ap_domain;
> +			hwirq = irqn;
> +		} else {
> +			domain = sei->cp_domain;
> +			hwirq = irqn - sei->caps->ap_range.size;
> +		}
> +
> +		virq = irq_find_mapping(domain, hwirq);
> +		if (!virq) {
> +			dev_warn(sei->dev,
> +				 "Spurious IRQ detected (hwirq %d)\n", hwirq);
> +			continue;
> +		}
> +
> +		/* Call IRQ handler */
> +		generic_handle_irq(virq);
> +	}
> +
> +	/* Clear the pending interrupts by writing 1 to the set bits */
> +	for (idx = 0; idx < SEI_IRQ_REG_COUNT; idx++)
> +		if (irqreg[idx])
> +			writel_relaxed(irqreg[idx], sei->base + GICP_SECR(idx));
> +
> +	chained_irq_exit(chip, desc);
> +}
> +
> +static int mvebu_sei_probe(struct platform_device *pdev)
> +{
> +	struct device_node *node = pdev->dev.of_node, *parent;
> +	struct irq_domain *parent_domain, *plat_domain;
> +	struct mvebu_sei *sei;
> +	u32 parent_irq;
> +	int ret;
> +
> +	sei = devm_kzalloc(&pdev->dev, sizeof(*sei), GFP_KERNEL);
> +	if (!sei)
> +		return -ENOMEM;
> +
> +	sei->dev = &pdev->dev;
> +
> +	mutex_init(&sei->cp_msi_lock);
> +	mutex_init(&sei->mask_lock);
> +
> +	sei->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	sei->base = devm_ioremap_resource(sei->dev, sei->res);
> +	if (!sei->base) {
> +		dev_err(sei->dev, "Failed to remap SEI resource\n");
> +		return -ENODEV;
> +	}
> +
> +	/* Retrieve the SEI capabilities with the interrupt ranges */
> +	sei->caps = of_device_get_match_data(&pdev->dev);
> +	if (!sei->caps) {
> +		dev_err(sei->dev,
> +			"Could not retrieve controller capabilities\n");
> +		return -EINVAL;
> +	}
> +
> +	mvebu_sei_reset(sei);
> +
> +	/*
> +	 * Reserve the single (top-level) parent SPI IRQ from which all the
> +	 * interrupts handled by this driver will be signaled.
> +	 */
> +	parent_irq = irq_of_parse_and_map(node, 0);
> +	if (parent_irq <= 0) {
> +		dev_err(sei->dev, "Failed to retrieve top-level SPI IRQ\n");
> +		return -ENODEV;
> +	}
> +
> +	/*
> +	 * SEIs can be triggered from the AP through wired interrupts and from
> +	 * the CPs through MSIs.
> +	 */
> +
> +	/* Get a reference to the parent domain */
> +	parent = of_irq_find_parent(node);
> +	if (!parent) {
> +		dev_err(sei->dev, "Failed to find parent IRQ node\n");
> +		ret = -ENODEV;
> +		goto dispose_irq;
> +	}
> +
> +	parent_domain = irq_find_host(parent);
> +	if (!parent_domain) {
> +		dev_err(sei->dev, "Failed to find parent IRQ domain\n");
> +		ret = -ENODEV;
> +		goto dispose_irq;
> +	}
> +
> +	/* Create the 'wired' domain */
> +	sei->ap_domain = irq_domain_create_hierarchy(parent_domain, 0,
> +						     sei->caps->ap_range.size,
> +						     of_node_to_fwnode(node),
> +						     &mvebu_sei_ap_domain_ops,
> +						     sei);
> +	if (!sei->ap_domain) {
> +		dev_err(sei->dev, "Failed to create AP IRQ domain\n");
> +		ret = -ENOMEM;
> +		goto dispose_irq;
> +	}
> +
> +	/* Create the 'MSI' domain */
> +	sei->cp_domain = irq_domain_create_hierarchy(parent_domain, 0,
> +						     sei->caps->cp_range.size,
> +						     of_node_to_fwnode(node),
> +						     &mvebu_sei_cp_domain_ops,
> +						     sei);
> +	if (!sei->cp_domain) {
> +		pr_err("Failed to create CPs IRQ domain\n");
> +		ret = -ENOMEM;
> +		goto remove_ap_domain;
> +	}
> +
> +	plat_domain = platform_msi_create_irq_domain(of_node_to_fwnode(node),
> +						     &mvebu_sei_msi_domain_info,
> +						     sei->cp_domain);
> +	if (!plat_domain) {
> +		pr_err("Failed to create CPs MSI domain\n");
> +		ret = -ENOMEM;
> +		goto remove_cp_domain;
> +	}
> +
> +	platform_set_drvdata(pdev, sei);
> +
> +	irq_set_chained_handler(parent_irq, mvebu_sei_handle_cascade_irq);
> +	irq_set_handler_data(parent_irq, sei);
> +
> +	return 0;
> +
> +remove_cp_domain:
> +	irq_domain_remove(sei->cp_domain);
> +remove_ap_domain:
> +	irq_domain_remove(sei->ap_domain);
> +dispose_irq:
> +	irq_dispose_mapping(parent_irq);
> +
> +	return ret;
> +}
> +
> +struct mvebu_sei_caps mvebu_sei_ap806_caps = {
> +	.ap_range = {
> +		.first = 0,
> +		.size = 21,
> +	},
> +	.cp_range = {
> +		.first = 21,
> +		.size = 43,
> +	},

I'd appreciate some symbolic constants instead of magic numbers.

> +};
> +
> +static const struct of_device_id mvebu_sei_of_match[] = {
> +	{
> +		.compatible = "marvell,ap806-sei",
> +		.data = &mvebu_sei_ap806_caps,
> +	},
> +	{},
> +};
> +
> +static struct platform_driver mvebu_sei_driver = {
> +	.probe  = mvebu_sei_probe,
> +	.driver = {
> +		.name = "mvebu-sei",
> +		.of_match_table = mvebu_sei_of_match,
> +	},
> +};
> +builtin_platform_driver(mvebu_sei_driver);
> -- 
> 2.17.1
> 

Thanks,

	M.
Miquel Raynal Sept. 24, 2018, 4:01 p.m. UTC | #2
Hi Marc,

> > +
> > +static int mvebu_sei_cp_domain_alloc(struct mvebu_sei *sei)
> > +{
> > +	int hwirq;
> > +
> > +	mutex_lock(&sei->cp_msi_lock);
> > +	hwirq = find_first_zero_bit(sei->cp_msi_bitmap,
> > +				    sei->caps->cp_range.size);
> > +	set_bit(hwirq, sei->cp_msi_bitmap);
> > +	mutex_unlock(&sei->cp_msi_lock);
> > +
> > +	return hwirq;
> > +}
> > +
> > +static void mvebu_sei_cp_domain_free(struct mvebu_sei *sei, int hwirq)
> > +{
> > +	mutex_lock(&sei->cp_msi_lock);
> > +	clear_bit(hwirq, sei->cp_msi_bitmap);
> > +	mutex_unlock(&sei->cp_msi_lock);
> > +}
> > +
> > +static int mvebu_sei_irq_domain_alloc(struct irq_domain *domain,
> > +				      unsigned int virq, unsigned int nr_irqs,
> > +				      void *args)
> > +{
> > +	struct mvebu_sei *sei = domain->host_data;
> > +	struct irq_fwspec *fwspec = args;
> > +	int hwirq;
> > +	int ret;
> > +
> > +	/* The software only supports single allocations for now */
> > +	if (nr_irqs != 1)
> > +		return -ENOTSUPP;
> > +
> > +	hwirq = mvebu_sei_cp_domain_alloc(sei);
> > +	if (hwirq < 0)
> > +		return -ENOSPC;
> > +
> > +	fwspec->fwnode = domain->parent->fwnode;
> > +	fwspec->param_count = 3;
> > +	fwspec->param[0] = GIC_SPI;
> > +	fwspec->param[1] = 0;  
> 
> On first approximation, this deserves a good comment about 0
> representing INTID 32 at the GIC level. Then, another question is why
> this doesn't come from DT. I bet that in the future, this block will
> be reused and you'll find more than one is a single chip.

About the 0, sure, if we maintain this code, I should probably derive
the value from DT.

> 
> But the real question is why you are constantly calling
> irq_alloc_irqs_parent. I remember commenting about this in my previous
> round of review, and I still see this.

I really tried to remove it but I clearly failed. When there is
no irq_alloc_irqs_parent() call in mvebu_sei_irq_domain_alloc() it looks
like something is missing and I get a oops at probe time.

This is the oops:

[    3.112148] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000e8
[    3.120970] Mem abort info:
[    3.123775]   ESR = 0x96000004
[    3.126842]   Exception class = DABT (current EL), IL = 32 bits
[    3.132787]   SET = 0, FnV = 0
[    3.135853]   EA = 0, S1PTW = 0
[    3.139006] Data abort info:
[    3.141897]   ISV = 0, ISS = 0x00000004
[    3.145749]   CM = 0, WnR = 0
[    3.148728] [00000000000000e8] user address but active_mm is swapper
[    3.155110] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[    3.160705] Modules linked in:
[    3.163777] CPU: 0 PID: 35 Comm: kworker/0:1 Not tainted 4.19.0-rc1-00024-g1ff609093efd-dirty #1931
[    3.172861] Hardware name: Marvell Armada 7040 DB board (DT)
[    3.178556] Workqueue: events deferred_probe_work_func
[    3.183719] pstate: 60000085 (nZCv daIf -PAN -UAO)
[    3.188535] pc : irq_set_irqchip_state+0x160/0x1a0
[    3.193348] lr : irq_set_irqchip_state+0x160/0x1a0
[    3.198158] sp : ffff000009d037f0
[    3.201486] x29: ffff000009d037f0 x28: ffff000008e49d20 
[    3.206824] x27: ffff000008e49cb0 x26: 0000000000000000 
[    3.212161] x25: 0000000000000000 x24: ffff000008ad2570 
[    3.217498] x23: ffff8000e8f80c00 x22: ffff0000093e9000 
[    3.222835] x21: 0000000000000033 x20: 0000000000000000 
[    3.228172] x19: ffff8000e8bdf300 x18: ffffffffffffffff 
[    3.233509] x17: 0000000000000000 x16: 0000000000000000 
[    3.238846] x15: ffff0000093e96c8 x14: ffff0000895436bf 
[    3.244182] x13: ffff0000095436cd x12: ffff0000094010c8 
[    3.249519] x11: ffff000009401000 x10: 0000000005f5e0ff 
[    3.254856] x9 : ffff000009d034f0 x8 : 325b206574617473 
[    3.260192] x7 : 5f70696863717269 x6 : ffff0000085cd238 
[    3.265529] x5 : 0000000000000000 x4 : 0000000000000000 
[    3.270866] x3 : ffffffffffffffff x2 : 337acc4b0d752600 
[    3.276202] x1 : 0000000000000000 x0 : 000000000000001c 
[    3.281540] Process kworker/0:1 (pid: 35, stack limit = 0x(____ptrval____))
[    3.288530] Call trace:
[    3.290987]  irq_set_irqchip_state+0x160/0x1a0
[    3.295452]  mvebu_icu_irq_domain_alloc+0x180/0x208
[    3.300351]  __irq_domain_alloc_irqs+0x138/0x2a0
[    3.304988]  irq_create_fwspec_mapping+0x140/0x328
[    3.309799]  irq_create_of_mapping+0x78/0xa0
[    3.314089]  of_irq_get+0x88/0xf8
[    3.317419]  platform_get_irq+0x24/0x178
[    3.321360]  armada_thermal_probe+0x110/0x668
[    3.325735]  platform_drv_probe+0x50/0xb0
[    3.329763]  really_probe+0x1fc/0x290
[    3.333441]  driver_probe_device+0x58/0x100
[    3.337643]  __device_attach_driver+0x9c/0xf8
[    3.342019]  bus_for_each_drv+0x70/0xc8
[    3.345872]  __device_attach+0xe0/0x140
[    3.349725]  device_initial_probe+0x10/0x18
[    3.353926]  bus_probe_device+0x94/0xa0
[    3.357779]  deferred_probe_work_func+0x6c/0xa0
[    3.362331]  process_one_work+0x1dc/0x330
[    3.366359]  worker_thread+0x23c/0x430
[    3.370124]  kthread+0xf8/0x128
[    3.373280]  ret_from_fork+0x10/0x18
[    3.376872] Code: aa1803e1 aa1c03e0 528119a2 97fff760 (f9407680) 
[    3.382992] ---[ end trace f423829cb4b1c06c ]---


The faulty instruction is the if statement there: 
https://elixir.bootlin.com/linux/v4.19-rc5/source/kernel/irq/manage.c#L2243


I added traces to understand what is going on. The do-while loop gets
an irq_data structure, derives the chip pointer, then look at its
->set_irqchip_state() callback. It it does not exist, it tries with its
parent irq_data.

Here are the traces that show why the oops happens when
irq_alloc_irqs_parent() is not called:

With irq_alloc_irqs_parent() (running exactly what I sent in the series):

[    2.993887] irq_set_irqchip_state [2244] irq_data ptr: 0xffff8000e8f80c28 [    3.000703] irq_set_irqchip_state [2246]   chip ptr: 0xffff000009401538 (name: none)
[    3.008480] irq_set_irqchip_state [2249]   parent irq_data ptr ? 0xffff8000e8bdf200

[    3.024045] irq_set_irqchip_state [2244] irq_data ptr: 0xffff8000e8bdf200
[    3.030861] irq_set_irqchip_state [2246]   chip ptr: 0xffff00000941b658 (name: SEI MSI controller)
[    3.039857] irq_set_irqchip_state [2249]   parent irq_data ptr ? 0xffff8000e8bdf280

[    3.055420] irq_set_irqchip_state [2244] irq_data ptr: 0xffff8000e8bdf280
[    3.062236] irq_set_irqchip_state [2246]   chip ptr: 0xffff00000941b3e8 (name: CP SEI)
[    3.070185] irq_set_irqchip_state [2249]   parent irq_data ptr ? 0xffff8000e8bdf300

[    3.085750] irq_set_irqchip_state [2244] irq_data ptr: 0xffff8000e8bdf300
[    3.092566] irq_set_irqchip_state [2246]   chip ptr: 0x0000000000000000 (name: (null))
[    3.100515] irq_set_irqchip_state [2249]   parent irq_data ptr ? 0x0000000000000000

Without irq_alloc_irqs_parent() (same code base, with the call to this function commented out):

[    2.992364] irq_set_irqchip_state [2244] irq_data: 0xffff8000e7cb7228
[    2.999181] irq_set_irqchip_state [2246]   chip: 0xffff000009401538 (name: none)
[    3.006956] irq_set_irqchip_state [2249]   parent irq_data: 0xffff8000e93a9e00

[    3.022520] irq_set_irqchip_state [2244] irq_data: 0xffff8000e93a9e00
[    3.029337] irq_set_irqchip_state [2246]   chip: 0xffff00000941b658 (name: SEI MSI controller)
[    3.038333] irq_set_irqchip_state [2249]   parent irq_data: 0xffff8000e93a9e80

[    3.053896] irq_set_irqchip_state [2244] irq_data: 0xffff8000e93a9e80
[    3.060713] irq_set_irqchip_state [2246]   chip: 0xffff00000941b3e8 (name: CP SEI)
[    3.068662] irq_set_irqchip_state [2249]   parent irq_data: 0xffff8000e93a9f00

[    3.084225] irq_set_irqchip_state [2244] irq_data: 0xffff8000e93a9f00
[    3.091041] irq_set_irqchip_state [2246]   chip: 0xffff0000093ec148 (name: GICv2)
[    3.098903] irq_set_irqchip_state [2249]   parent irq_data: 0x0000000000000000


The difference is that at this stage, the irq_data->chip pointer of the
SEI controller _parent_ (ie. the GIC's chip pointer) is not valid. I
digged a lot in this direction during your vacations to find out what I
missed, and I ended up calling back irq_alloc_irqs_parent().

If you have an idea of how to handle this properly, I am all ears!

> 
> The whole SEI thing is a chained interrupt controller, a
> multiplexer. The output interrupt is known at probe time, and never
> change. You also have code to that effect in the probe routine. So
> what is this exactly? Dead code?
> 
> > +	/*
> > +	 * Assume edge rising for now, it will be properly set when
> > +	 * ->set_type() is called
> > +	 */
> > +	fwspec->param[2] = IRQ_TYPE_EDGE_RISING;
> > +	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, fwspec);
> > +	if (ret)
> > +		goto free_irq;
> > +

[...]

> > +
> > +struct mvebu_sei_caps mvebu_sei_ap806_caps = {
> > +	.ap_range = {
> > +		.first = 0,
> > +		.size = 21,
> > +	},
> > +	.cp_range = {
> > +		.first = 21,
> > +		.size = 43,
> > +	},  
> 
> I'd appreciate some symbolic constants instead of magic numbers.

I can definitely use symbolic constants but these numbers are already
quite meaningful, the constants could be named:

    SEI_AP806_FIRST_INT_IN_AP_RANGE
    SEI_AP806_SIZE_OF_AP_RANGE

> 
> > +};
> > +
> > +static const struct of_device_id mvebu_sei_of_match[] = {
> > +	{
> > +		.compatible = "marvell,ap806-sei",
> > +		.data = &mvebu_sei_ap806_caps,
> > +	},
> > +	{},
> > +};
> > +
> > +static struct platform_driver mvebu_sei_driver = {
> > +	.probe  = mvebu_sei_probe,
> > +	.driver = {
> > +		.name = "mvebu-sei",
> > +		.of_match_table = mvebu_sei_of_match,
> > +	},
> > +};
> > +builtin_platform_driver(mvebu_sei_driver);
> > -- 
> > 2.17.1
> >   
> 
> Thanks,
> 
> 	M.
> 


Thanks,
Miquèl
Marc Zyngier Sept. 28, 2018, 10:25 a.m. UTC | #3
On 24/09/18 17:01, Miquel Raynal wrote:

Hi Miquel,

[...]

> The difference is that at this stage, the irq_data->chip pointer of the
> SEI controller _parent_ (ie. the GIC's chip pointer) is not valid. I
> digged a lot in this direction during your vacations to find out what I
> missed, and I ended up calling back irq_alloc_irqs_parent().
> 
> If you have an idea of how to handle this properly, I am all ears!

The most glaring problem is that you create a hierarchy that encompasses
the GIC, which is just wrong. The hierarchy cannot point to the GIC,
because it end-up as a multiplexer.

This code sequence in the probe function is the root of all evil:

	/* Get a reference to the parent domain */
	parent = of_irq_find_parent(node);
	if (!parent) {
		dev_err(sei->dev, "Failed to find parent IRQ node\n");
		ret = -ENODEV;
		goto dispose_irq;
	}

This is a GIC interrupt, which is the output line for the SEI block.

	parent_domain = irq_find_host(parent);
	if (!parent_domain) {
		dev_err(sei->dev, "Failed to find parent IRQ domain\n");
		ret = -ENODEV;
		goto dispose_irq;
	}

That's the GIC domain.

	/* Create the 'wired' domain */
	sei->ap_domain = irq_domain_create_hierarchy(parent_domain, 0,
						     sei->caps->ap_range.size,
						     of_node_to_fwnode(node),
						     &mvebu_sei_ap_domain_ops,
						     sei);
	if (!sei->ap_domain) {
		dev_err(sei->dev, "Failed to create AP IRQ domain\n");
		ret = -ENOMEM;
		goto dispose_irq;
	}

And here, you're saying "each and every AP SEI interrupt is directly
linked to a unique GIC interrupt". Nothing could be further from the
truth, since all SEI interrupts are funnelled through a *single*
GIC interrupt. So you cannot create it as a hierarchy parented at
the GIC.

	/* Create the 'MSI' domain */
	sei->cp_domain = irq_domain_create_hierarchy(parent_domain, 0,
						     sei->caps->cp_range.size,
						     of_node_to_fwnode(node),
						     &mvebu_sei_cp_domain_ops,
						     sei);


Same thing here.

The issue here is that you're using the GIC domain as the way to root
the two distinct SEI domains, while they should be rooted at an internal,
SEI-specific domain. I'd suggest a topology like this:

                  AP-SEI ---> S
                              E
    Plat-MSI ---> CP-SEI ---> I

CP-SEI and AP-SEI use SEI as a parent. SEI does not have a parent, but is
a chained irqchip.

I'm happy to help you reworking this piece of code if you tell me how to
plug a driver that can use it on an mcbin system.

Thanks,

	M.
Miquel Raynal Sept. 28, 2018, 4:38 p.m. UTC | #4
Hi Marc,

Marc Zyngier <marc.zyngier@arm.com> wrote on Fri, 28 Sep 2018 11:25:32
+0100:

> On 24/09/18 17:01, Miquel Raynal wrote:
> 
> Hi Miquel,
> 
> [...]
> 
> > The difference is that at this stage, the irq_data->chip pointer of the
> > SEI controller _parent_ (ie. the GIC's chip pointer) is not valid. I
> > digged a lot in this direction during your vacations to find out what I
> > missed, and I ended up calling back irq_alloc_irqs_parent().
> > 
> > If you have an idea of how to handle this properly, I am all ears!  
> 
> The most glaring problem is that you create a hierarchy that encompasses
> the GIC, which is just wrong. The hierarchy cannot point to the GIC,
> because it end-up as a multiplexer.
> 
> This code sequence in the probe function is the root of all evil:
> 
> 	/* Get a reference to the parent domain */
> 	parent = of_irq_find_parent(node);
> 	if (!parent) {
> 		dev_err(sei->dev, "Failed to find parent IRQ node\n");
> 		ret = -ENODEV;
> 		goto dispose_irq;
> 	}
> 
> This is a GIC interrupt, which is the output line for the SEI block.
> 
> 	parent_domain = irq_find_host(parent);
> 	if (!parent_domain) {
> 		dev_err(sei->dev, "Failed to find parent IRQ domain\n");
> 		ret = -ENODEV;
> 		goto dispose_irq;
> 	}
> 
> That's the GIC domain.
> 
> 	/* Create the 'wired' domain */
> 	sei->ap_domain = irq_domain_create_hierarchy(parent_domain, 0,
> 						     sei->caps->ap_range.size,
> 						     of_node_to_fwnode(node),
> 						     &mvebu_sei_ap_domain_ops,
> 						     sei);
> 	if (!sei->ap_domain) {
> 		dev_err(sei->dev, "Failed to create AP IRQ domain\n");
> 		ret = -ENOMEM;
> 		goto dispose_irq;
> 	}
> 
> And here, you're saying "each and every AP SEI interrupt is directly
> linked to a unique GIC interrupt". Nothing could be further from the
> truth, since all SEI interrupts are funnelled through a *single*
> GIC interrupt. So you cannot create it as a hierarchy parented at
> the GIC.
> 
> 	/* Create the 'MSI' domain */
> 	sei->cp_domain = irq_domain_create_hierarchy(parent_domain, 0,
> 						     sei->caps->cp_range.size,
> 						     of_node_to_fwnode(node),
> 						     &mvebu_sei_cp_domain_ops,
> 						     sei);
> 
> 
> Same thing here.
> 
> The issue here is that you're using the GIC domain as the way to root
> the two distinct SEI domains, while they should be rooted at an internal,
> SEI-specific domain. I'd suggest a topology like this:
> 
>                   AP-SEI ---> S
>                               E
>     Plat-MSI ---> CP-SEI ---> I
> 
> CP-SEI and AP-SEI use SEI as a parent. SEI does not have a parent, but is
> a chained irqchip.

Thanks you very much for this detailed explanation. The above is what I
intended to do, but maybe what I achieved is more something like:

                   AP-SEI ---> G
                               I
     Plat-MSI ---> CP-SEI ---> C

And now I understand better what is bothering you since the beginning.

> 
> I'm happy to help you reworking this piece of code if you tell me how to
> plug a driver that can use it on an mcbin system.

Next week I'll have another look at the driver, but it could be great
if you could show me the big lines of how you imagine the rework. I
prepared a branch based on 4.19-rc1 with:
* The ICU/SEI series
* The series adding support for thermal interrupts in the
  armada_thermal.c driver (it triggers a wired SEI when the AP is too
  hot or an MSI SEI when it is a CP).
* The needed DT changes.

https://github.com/miquelraynal/linux/tree/marvell/4.19-rc1/icu-sei


Thanks,
Miquèl
Marc Zyngier Sept. 30, 2018, 2:39 p.m. UTC | #5
On Fri, 28 Sep 2018 18:38:06 +0200
Miquel Raynal <miquel.raynal@bootlin.com> wrote:

> Hi Marc,
> 
> Marc Zyngier <marc.zyngier@arm.com> wrote on Fri, 28 Sep 2018 11:25:32
> +0100:
> 
> > On 24/09/18 17:01, Miquel Raynal wrote:
> > 
> > Hi Miquel,
> > 
> > [...]
> >   
> > > The difference is that at this stage, the irq_data->chip pointer of the
> > > SEI controller _parent_ (ie. the GIC's chip pointer) is not valid. I
> > > digged a lot in this direction during your vacations to find out what I
> > > missed, and I ended up calling back irq_alloc_irqs_parent().
> > > 
> > > If you have an idea of how to handle this properly, I am all ears!    
> > 
> > The most glaring problem is that you create a hierarchy that encompasses
> > the GIC, which is just wrong. The hierarchy cannot point to the GIC,
> > because it end-up as a multiplexer.
> > 
> > This code sequence in the probe function is the root of all evil:
> > 
> > 	/* Get a reference to the parent domain */
> > 	parent = of_irq_find_parent(node);
> > 	if (!parent) {
> > 		dev_err(sei->dev, "Failed to find parent IRQ node\n");
> > 		ret = -ENODEV;
> > 		goto dispose_irq;
> > 	}
> > 
> > This is a GIC interrupt, which is the output line for the SEI block.
> > 
> > 	parent_domain = irq_find_host(parent);
> > 	if (!parent_domain) {
> > 		dev_err(sei->dev, "Failed to find parent IRQ domain\n");
> > 		ret = -ENODEV;
> > 		goto dispose_irq;
> > 	}
> > 
> > That's the GIC domain.
> > 
> > 	/* Create the 'wired' domain */
> > 	sei->ap_domain = irq_domain_create_hierarchy(parent_domain, 0,
> > 						     sei->caps->ap_range.size,
> > 						     of_node_to_fwnode(node),
> > 						     &mvebu_sei_ap_domain_ops,
> > 						     sei);
> > 	if (!sei->ap_domain) {
> > 		dev_err(sei->dev, "Failed to create AP IRQ domain\n");
> > 		ret = -ENOMEM;
> > 		goto dispose_irq;
> > 	}
> > 
> > And here, you're saying "each and every AP SEI interrupt is directly
> > linked to a unique GIC interrupt". Nothing could be further from the
> > truth, since all SEI interrupts are funnelled through a *single*
> > GIC interrupt. So you cannot create it as a hierarchy parented at
> > the GIC.
> > 
> > 	/* Create the 'MSI' domain */
> > 	sei->cp_domain = irq_domain_create_hierarchy(parent_domain, 0,
> > 						     sei->caps->cp_range.size,
> > 						     of_node_to_fwnode(node),
> > 						     &mvebu_sei_cp_domain_ops,
> > 						     sei);
> > 
> > 
> > Same thing here.
> > 
> > The issue here is that you're using the GIC domain as the way to root
> > the two distinct SEI domains, while they should be rooted at an internal,
> > SEI-specific domain. I'd suggest a topology like this:
> > 
> >                   AP-SEI ---> S
> >                               E
> >     Plat-MSI ---> CP-SEI ---> I
> > 
> > CP-SEI and AP-SEI use SEI as a parent. SEI does not have a parent, but is
> > a chained irqchip.  
> 
> Thanks you very much for this detailed explanation. The above is what I
> intended to do, but maybe what I achieved is more something like:
> 
>                    AP-SEI ---> G
>                                I
>      Plat-MSI ---> CP-SEI ---> C
> 
> And now I understand better what is bothering you since the beginning.
> 
> > 
> > I'm happy to help you reworking this piece of code if you tell me how to
> > plug a driver that can use it on an mcbin system.  
> 
> Next week I'll have another look at the driver, but it could be great
> if you could show me the big lines of how you imagine the rework. I
> prepared a branch based on 4.19-rc1 with:
> * The ICU/SEI series
> * The series adding support for thermal interrupts in the
>   armada_thermal.c driver (it triggers a wired SEI when the AP is too
>   hot or an MSI SEI when it is a CP).
> * The needed DT changes.
> 
> https://github.com/miquelraynal/linux/tree/marvell/4.19-rc1/icu-sei 

I've played with this a bit too much, and have basically rewritten the
whole SEI driver (see [1] for the resulting patch, which doesn't leave
much unchanged).

It also requires some changes[2] to the ICU driver, which assumes that
the SEI and GICP have similar interrupt flows (news flash, they don't).
I still hate the way this driver is written (no clear separation
between what is essentially two different drivers bolted together), but
that's a different story.

I'm also very worried that ICU-SEI interrupts are handled as edge,
while they are level. The HW doesn't seem to offer any facility to
resample the level, so this looks broken by design. Maybe the Marvell
people on Cc could shed some light on how they expect this to work?

As for the thermal stuff, the DT is wrong (see [3]).

I've tested it by letting the machine overheat, and take thermal
interrupts on both AP and CP. The box shutdowns cleanly in both cases,
so I guess it somehow works.

Thanks,

	M.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/commit/?h=marvell/4.19-rc1/icu-sei&id=63afff77f73c6cc637fceef247e6860b12b07304

[2] https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/commit/?h=marvell/4.19-rc1/icu-sei&id=e2a54f7716fb096b06d9780065ff37b671659c2e

[3] https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/commit/?h=marvell/4.19-rc1/icu-sei&id=927d5d4d03cc0149c15d55b2a5cdccf6922c554a
Miquel Raynal Oct. 1, 2018, 1:49 p.m. UTC | #6
Hi Marc,

Marc Zyngier <marc.zyngier@arm.com> wrote on Sun, 30 Sep 2018 15:39:30
+0100:

> On Fri, 28 Sep 2018 18:38:06 +0200
> Miquel Raynal <miquel.raynal@bootlin.com> wrote:
> 
> > Hi Marc,
> > 
> > Marc Zyngier <marc.zyngier@arm.com> wrote on Fri, 28 Sep 2018 11:25:32
> > +0100:
> >   
> > > On 24/09/18 17:01, Miquel Raynal wrote:
> > > 
> > > Hi Miquel,
> > > 
> > > [...]
> > >     
> > > > The difference is that at this stage, the irq_data->chip pointer of the
> > > > SEI controller _parent_ (ie. the GIC's chip pointer) is not valid. I
> > > > digged a lot in this direction during your vacations to find out what I
> > > > missed, and I ended up calling back irq_alloc_irqs_parent().
> > > > 
> > > > If you have an idea of how to handle this properly, I am all ears!      
> > > 
> > > The most glaring problem is that you create a hierarchy that encompasses
> > > the GIC, which is just wrong. The hierarchy cannot point to the GIC,
> > > because it end-up as a multiplexer.
> > > 
> > > This code sequence in the probe function is the root of all evil:
> > > 
> > > 	/* Get a reference to the parent domain */
> > > 	parent = of_irq_find_parent(node);
> > > 	if (!parent) {
> > > 		dev_err(sei->dev, "Failed to find parent IRQ node\n");
> > > 		ret = -ENODEV;
> > > 		goto dispose_irq;
> > > 	}
> > > 
> > > This is a GIC interrupt, which is the output line for the SEI block.
> > > 
> > > 	parent_domain = irq_find_host(parent);
> > > 	if (!parent_domain) {
> > > 		dev_err(sei->dev, "Failed to find parent IRQ domain\n");
> > > 		ret = -ENODEV;
> > > 		goto dispose_irq;
> > > 	}
> > > 
> > > That's the GIC domain.
> > > 
> > > 	/* Create the 'wired' domain */
> > > 	sei->ap_domain = irq_domain_create_hierarchy(parent_domain, 0,
> > > 						     sei->caps->ap_range.size,
> > > 						     of_node_to_fwnode(node),
> > > 						     &mvebu_sei_ap_domain_ops,
> > > 						     sei);
> > > 	if (!sei->ap_domain) {
> > > 		dev_err(sei->dev, "Failed to create AP IRQ domain\n");
> > > 		ret = -ENOMEM;
> > > 		goto dispose_irq;
> > > 	}
> > > 
> > > And here, you're saying "each and every AP SEI interrupt is directly
> > > linked to a unique GIC interrupt". Nothing could be further from the
> > > truth, since all SEI interrupts are funnelled through a *single*
> > > GIC interrupt. So you cannot create it as a hierarchy parented at
> > > the GIC.
> > > 
> > > 	/* Create the 'MSI' domain */
> > > 	sei->cp_domain = irq_domain_create_hierarchy(parent_domain, 0,
> > > 						     sei->caps->cp_range.size,
> > > 						     of_node_to_fwnode(node),
> > > 						     &mvebu_sei_cp_domain_ops,
> > > 						     sei);
> > > 
> > > 
> > > Same thing here.
> > > 
> > > The issue here is that you're using the GIC domain as the way to root
> > > the two distinct SEI domains, while they should be rooted at an internal,
> > > SEI-specific domain. I'd suggest a topology like this:
> > > 
> > >                   AP-SEI ---> S
> > >                               E
> > >     Plat-MSI ---> CP-SEI ---> I
> > > 
> > > CP-SEI and AP-SEI use SEI as a parent. SEI does not have a parent, but is
> > > a chained irqchip.    
> > 
> > Thanks you very much for this detailed explanation. The above is what I
> > intended to do, but maybe what I achieved is more something like:
> > 
> >                    AP-SEI ---> G
> >                                I
> >      Plat-MSI ---> CP-SEI ---> C
> > 
> > And now I understand better what is bothering you since the beginning.
> >   
> > > 
> > > I'm happy to help you reworking this piece of code if you tell me how to
> > > plug a driver that can use it on an mcbin system.    
> > 
> > Next week I'll have another look at the driver, but it could be great
> > if you could show me the big lines of how you imagine the rework. I
> > prepared a branch based on 4.19-rc1 with:
> > * The ICU/SEI series
> > * The series adding support for thermal interrupts in the
> >   armada_thermal.c driver (it triggers a wired SEI when the AP is too
> >   hot or an MSI SEI when it is a CP).
> > * The needed DT changes.
> > 
> > https://github.com/miquelraynal/linux/tree/marvell/4.19-rc1/icu-sei   
> 
> I've played with this a bit too much, and have basically rewritten the
> whole SEI driver (see [1] for the resulting patch, which doesn't leave
> much unchanged).

What you expected is now much clearer for me, thank you very much.

> 
> It also requires some changes[2] to the ICU driver, which assumes that
> the SEI and GICP have similar interrupt flows (news flash, they don't).
> I still hate the way this driver is written (no clear separation
> between what is essentially two different drivers bolted together), but
> that's a different story.

Ok, I will integrate this change too.

> 
> I'm also very worried that ICU-SEI interrupts are handled as edge,
> while they are level. The HW doesn't seem to offer any facility to
> resample the level, so this looks broken by design. Maybe the Marvell
> people on Cc could shed some light on how they expect this to work?
> 
> As for the thermal stuff, the DT is wrong (see [3]).

Indeed, wired interrupts are only level-high so we can get rid of the
second parameter. Actually the bindings were right, I messed-up
somewhere when updating them.

> 
> I've tested it by letting the machine overheat, and take thermal
> interrupts on both AP and CP. The box shutdowns cleanly in both cases,
> so I guess it somehow works.

Ok, let me test the v6 and I'll send it.

Thanks,
Miquèl
diff mbox series

Patch

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 383e7b70221d..96451b581452 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -310,6 +310,9 @@  config MVEBU_ODMI
 config MVEBU_PIC
 	bool
 
+config MVEBU_SEI
+        bool
+
 config LS_SCFG_MSI
 	def_bool y if SOC_LS1021A || ARCH_LAYERSCAPE
 	depends on PCI && PCI_MSI
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index fbd1ec8070ef..b822199445ff 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -76,6 +76,7 @@  obj-$(CONFIG_MVEBU_GICP)		+= irq-mvebu-gicp.o
 obj-$(CONFIG_MVEBU_ICU)			+= irq-mvebu-icu.o
 obj-$(CONFIG_MVEBU_ODMI)		+= irq-mvebu-odmi.o
 obj-$(CONFIG_MVEBU_PIC)			+= irq-mvebu-pic.o
+obj-$(CONFIG_MVEBU_SEI)			+= irq-mvebu-sei.o
 obj-$(CONFIG_LS_SCFG_MSI)		+= irq-ls-scfg-msi.o
 obj-$(CONFIG_EZNPS_GIC)			+= irq-eznps.o
 obj-$(CONFIG_ARCH_ASPEED)		+= irq-aspeed-vic.o irq-aspeed-i2c-ic.o
diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
new file mode 100644
index 000000000000..7dbc57d017b3
--- /dev/null
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -0,0 +1,475 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt) "mvebu-sei: " fmt
+
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/msi.h>
+#include <linux/platform_device.h>
+#include <linux/irqchip.h>
+#include <asm/smp_plat.h>
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/* Cause register */
+#define GICP_SECR(idx)		(0x0  + ((idx) * 0x4))
+/* Mask register */
+#define GICP_SEMR(idx)		(0x20 + ((idx) * 0x4))
+#define GICP_SET_SEI_OFFSET	0x30
+
+#define SEI_IRQ_COUNT_PER_REG	32
+#define SEI_IRQ_REG_COUNT	2
+#define SEI_IRQ_COUNT		(SEI_IRQ_COUNT_PER_REG * SEI_IRQ_REG_COUNT)
+#define SEI_IRQ_REG_IDX(irq_id)	((irq_id) / SEI_IRQ_COUNT_PER_REG)
+#define SEI_IRQ_REG_BIT(irq_id)	((irq_id) % SEI_IRQ_COUNT_PER_REG)
+
+struct mvebu_sei_interrupt_range {
+	u32 first;
+	u32 size;
+};
+
+struct mvebu_sei_caps {
+	struct mvebu_sei_interrupt_range ap_range;
+	struct mvebu_sei_interrupt_range cp_range;
+};
+
+struct mvebu_sei {
+	struct device *dev;
+	void __iomem *base;
+	struct resource *res;
+	struct irq_domain *ap_domain;
+	struct irq_domain *cp_domain;
+	const struct mvebu_sei_caps *caps;
+
+	/* Lock on MSI allocations/releases */
+	struct mutex cp_msi_lock;
+	DECLARE_BITMAP(cp_msi_bitmap, SEI_IRQ_COUNT);
+
+	/* Lock on IRQ masking register */
+	struct mutex mask_lock;
+};
+
+static int mvebu_sei_domain_to_sei_irq(struct mvebu_sei *sei,
+				       struct irq_domain *domain,
+				       irq_hw_number_t hwirq)
+{
+	if (domain == sei->ap_domain)
+		return sei->caps->ap_range.first + hwirq;
+	else
+		return sei->caps->cp_range.first + hwirq;
+}
+
+static void mvebu_sei_reset(struct mvebu_sei *sei)
+{
+	u32 reg_idx;
+
+	/* Clear IRQ cause registers */
+	for (reg_idx = 0; reg_idx < SEI_IRQ_REG_COUNT; reg_idx++)
+		writel_relaxed(0xFFFFFFFF, sei->base + GICP_SECR(reg_idx));
+}
+
+static void mvebu_sei_mask_irq(struct irq_data *d)
+{
+	struct mvebu_sei *sei = irq_data_get_irq_chip_data(d);
+	u32 sei_irq = mvebu_sei_domain_to_sei_irq(sei, d->domain, d->hwirq);
+	u32 reg_idx = SEI_IRQ_REG_IDX(sei_irq);
+	u32 reg;
+
+	/* 1 disables the interrupt */
+	mutex_lock(&sei->mask_lock);
+	reg = readl_relaxed(sei->base + GICP_SEMR(reg_idx));
+	reg |= BIT(SEI_IRQ_REG_BIT(sei_irq));
+	writel_relaxed(reg, sei->base + GICP_SEMR(reg_idx));
+	mutex_unlock(&sei->mask_lock);
+}
+
+static void mvebu_sei_unmask_irq(struct irq_data *d)
+{
+	struct mvebu_sei *sei = irq_data_get_irq_chip_data(d);
+	u32 sei_irq = mvebu_sei_domain_to_sei_irq(sei, d->domain, d->hwirq);
+	u32 reg_idx = SEI_IRQ_REG_IDX(sei_irq);
+	u32 reg;
+
+	/* 0 enables the interrupt */
+	mutex_lock(&sei->mask_lock);
+	reg = readl_relaxed(sei->base + GICP_SEMR(reg_idx));
+	reg &= ~BIT(SEI_IRQ_REG_BIT(sei_irq));
+	writel_relaxed(reg, sei->base + GICP_SEMR(reg_idx));
+	mutex_unlock(&sei->mask_lock);
+}
+
+static void mvebu_sei_compose_msi_msg(struct irq_data *data,
+				      struct msi_msg *msg)
+{
+	struct mvebu_sei *sei = data->chip_data;
+	phys_addr_t set = sei->res->start + GICP_SET_SEI_OFFSET;
+
+	msg->data = mvebu_sei_domain_to_sei_irq(sei, data->domain, data->hwirq);
+	msg->address_lo = lower_32_bits(set);
+	msg->address_hi = upper_32_bits(set);
+}
+
+static int mvebu_sei_ap_set_type(struct irq_data *data, unsigned int type)
+{
+	if (!(type & IRQ_TYPE_LEVEL_HIGH))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int mvebu_sei_cp_set_type(struct irq_data *data, unsigned int type)
+{
+	if (!(type & IRQ_TYPE_EDGE_RISING))
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct irq_chip mvebu_sei_ap_irq_chip = {
+	.name			= "AP SEI",
+	.irq_mask		= mvebu_sei_mask_irq,
+	.irq_unmask		= mvebu_sei_unmask_irq,
+	.irq_set_type		= mvebu_sei_ap_set_type,
+};
+
+static struct irq_chip mvebu_sei_cp_irq_chip = {
+	.name			= "CP SEI",
+	.irq_mask		= mvebu_sei_mask_irq,
+	.irq_unmask		= mvebu_sei_unmask_irq,
+	.irq_eoi                = irq_chip_eoi_parent,
+	.irq_set_affinity       = irq_chip_set_affinity_parent,
+	.irq_set_type		= mvebu_sei_cp_set_type,
+	.irq_compose_msi_msg	= mvebu_sei_compose_msi_msg,
+};
+
+static int mvebu_sei_ap_match(struct irq_domain *d, struct device_node *node,
+			      enum irq_domain_bus_token bus_token)
+{
+	struct mvebu_sei *sei = d->host_data;
+
+	if (sei->dev->of_node != node)
+		return 0;
+
+	if (d == sei->ap_domain)
+		return 1;
+
+	return 0;
+}
+
+static int mvebu_sei_ap_irq_map(struct irq_domain *domain, unsigned int virq,
+				irq_hw_number_t hwirq)
+{
+	struct mvebu_sei *sei = domain->host_data;
+
+	irq_set_chip_data(virq, sei);
+	irq_set_chip_and_handler(virq, &mvebu_sei_ap_irq_chip,
+				 handle_level_irq);
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_probe(virq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops mvebu_sei_ap_domain_ops = {
+	.match = mvebu_sei_ap_match,
+	.xlate = irq_domain_xlate_onecell,
+	.map = mvebu_sei_ap_irq_map,
+};
+
+static int mvebu_sei_cp_domain_alloc(struct mvebu_sei *sei)
+{
+	int hwirq;
+
+	mutex_lock(&sei->cp_msi_lock);
+	hwirq = find_first_zero_bit(sei->cp_msi_bitmap,
+				    sei->caps->cp_range.size);
+	set_bit(hwirq, sei->cp_msi_bitmap);
+	mutex_unlock(&sei->cp_msi_lock);
+
+	return hwirq;
+}
+
+static void mvebu_sei_cp_domain_free(struct mvebu_sei *sei, int hwirq)
+{
+	mutex_lock(&sei->cp_msi_lock);
+	clear_bit(hwirq, sei->cp_msi_bitmap);
+	mutex_unlock(&sei->cp_msi_lock);
+}
+
+static int mvebu_sei_irq_domain_alloc(struct irq_domain *domain,
+				      unsigned int virq, unsigned int nr_irqs,
+				      void *args)
+{
+	struct mvebu_sei *sei = domain->host_data;
+	struct irq_fwspec *fwspec = args;
+	int hwirq;
+	int ret;
+
+	/* The software only supports single allocations for now */
+	if (nr_irqs != 1)
+		return -ENOTSUPP;
+
+	hwirq = mvebu_sei_cp_domain_alloc(sei);
+	if (hwirq < 0)
+		return -ENOSPC;
+
+	fwspec->fwnode = domain->parent->fwnode;
+	fwspec->param_count = 3;
+	fwspec->param[0] = GIC_SPI;
+	fwspec->param[1] = 0;
+	/*
+	 * Assume edge rising for now, it will be properly set when
+	 * ->set_type() is called
+	 */
+	fwspec->param[2] = IRQ_TYPE_EDGE_RISING;
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, fwspec);
+	if (ret)
+		goto free_irq;
+
+	ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
+					    &mvebu_sei_cp_irq_chip, sei);
+	if (ret)
+		goto free_irq_parents;
+
+	return 0;
+
+free_irq_parents:
+	irq_domain_free_irqs_parent(domain, virq, 1);
+free_irq:
+	mvebu_sei_cp_domain_free(sei, hwirq);
+
+	return ret;
+}
+
+static void mvebu_sei_irq_domain_free(struct irq_domain *domain,
+				      unsigned int virq, unsigned int nr_irqs)
+{
+	struct mvebu_sei *sei = domain->host_data;
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	u32 irq_count = sei->caps->ap_range.size + sei->caps->cp_range.size;
+
+	if (nr_irqs != 1 || d->hwirq >= irq_count) {
+		dev_err(sei->dev, "Invalid hwirq %lu\n", d->hwirq);
+		return;
+	}
+
+	mvebu_sei_cp_domain_free(sei, d->hwirq);
+}
+
+static const struct irq_domain_ops mvebu_sei_cp_domain_ops = {
+	.alloc = mvebu_sei_irq_domain_alloc,
+	.free = mvebu_sei_irq_domain_free,
+};
+
+static struct irq_chip mvebu_sei_msi_irq_chip = {
+	.name		= "SEI MSI controller",
+	.irq_set_type	= mvebu_sei_cp_set_type,
+};
+
+static struct msi_domain_ops mvebu_sei_msi_ops = {
+};
+
+static struct msi_domain_info mvebu_sei_msi_domain_info = {
+	.flags	= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS,
+	.ops	= &mvebu_sei_msi_ops,
+	.chip	= &mvebu_sei_msi_irq_chip,
+};
+
+static void mvebu_sei_handle_cascade_irq(struct irq_desc *desc)
+{
+	struct mvebu_sei *sei = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	DECLARE_BITMAP(irqmap, SEI_IRQ_COUNT);
+	u32 *irqreg = (u32 *)&irqmap;
+	u32 idx, irqn;
+
+	chained_irq_enter(chip, desc);
+
+	/* Create the bitmap of the pending interrupts */
+	for (idx = 0; idx < SEI_IRQ_REG_COUNT; idx++)
+		irqreg[idx] = readl_relaxed(sei->base + GICP_SECR(idx));
+
+	/* Call handler for each pending interrupt */
+	for_each_set_bit(irqn, irqmap, SEI_IRQ_COUNT) {
+		struct irq_domain *domain;
+		u32 virq, hwirq;
+
+		/*
+		 * Finding Linux mapping (virq) needs the right domain
+		 * and the relative hwirq (which start at 0 in both
+		 * cases, while irqn is relative to all SEI interrupts).
+		 */
+		if (irqn < sei->caps->ap_range.size) {
+			domain = sei->ap_domain;
+			hwirq = irqn;
+		} else {
+			domain = sei->cp_domain;
+			hwirq = irqn - sei->caps->ap_range.size;
+		}
+
+		virq = irq_find_mapping(domain, hwirq);
+		if (!virq) {
+			dev_warn(sei->dev,
+				 "Spurious IRQ detected (hwirq %d)\n", hwirq);
+			continue;
+		}
+
+		/* Call IRQ handler */
+		generic_handle_irq(virq);
+	}
+
+	/* Clear the pending interrupts by writing 1 to the set bits */
+	for (idx = 0; idx < SEI_IRQ_REG_COUNT; idx++)
+		if (irqreg[idx])
+			writel_relaxed(irqreg[idx], sei->base + GICP_SECR(idx));
+
+	chained_irq_exit(chip, desc);
+}
+
+static int mvebu_sei_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node, *parent;
+	struct irq_domain *parent_domain, *plat_domain;
+	struct mvebu_sei *sei;
+	u32 parent_irq;
+	int ret;
+
+	sei = devm_kzalloc(&pdev->dev, sizeof(*sei), GFP_KERNEL);
+	if (!sei)
+		return -ENOMEM;
+
+	sei->dev = &pdev->dev;
+
+	mutex_init(&sei->cp_msi_lock);
+	mutex_init(&sei->mask_lock);
+
+	sei->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	sei->base = devm_ioremap_resource(sei->dev, sei->res);
+	if (!sei->base) {
+		dev_err(sei->dev, "Failed to remap SEI resource\n");
+		return -ENODEV;
+	}
+
+	/* Retrieve the SEI capabilities with the interrupt ranges */
+	sei->caps = of_device_get_match_data(&pdev->dev);
+	if (!sei->caps) {
+		dev_err(sei->dev,
+			"Could not retrieve controller capabilities\n");
+		return -EINVAL;
+	}
+
+	mvebu_sei_reset(sei);
+
+	/*
+	 * Reserve the single (top-level) parent SPI IRQ from which all the
+	 * interrupts handled by this driver will be signaled.
+	 */
+	parent_irq = irq_of_parse_and_map(node, 0);
+	if (parent_irq <= 0) {
+		dev_err(sei->dev, "Failed to retrieve top-level SPI IRQ\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * SEIs can be triggered from the AP through wired interrupts and from
+	 * the CPs through MSIs.
+	 */
+
+	/* Get a reference to the parent domain */
+	parent = of_irq_find_parent(node);
+	if (!parent) {
+		dev_err(sei->dev, "Failed to find parent IRQ node\n");
+		ret = -ENODEV;
+		goto dispose_irq;
+	}
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		dev_err(sei->dev, "Failed to find parent IRQ domain\n");
+		ret = -ENODEV;
+		goto dispose_irq;
+	}
+
+	/* Create the 'wired' domain */
+	sei->ap_domain = irq_domain_create_hierarchy(parent_domain, 0,
+						     sei->caps->ap_range.size,
+						     of_node_to_fwnode(node),
+						     &mvebu_sei_ap_domain_ops,
+						     sei);
+	if (!sei->ap_domain) {
+		dev_err(sei->dev, "Failed to create AP IRQ domain\n");
+		ret = -ENOMEM;
+		goto dispose_irq;
+	}
+
+	/* Create the 'MSI' domain */
+	sei->cp_domain = irq_domain_create_hierarchy(parent_domain, 0,
+						     sei->caps->cp_range.size,
+						     of_node_to_fwnode(node),
+						     &mvebu_sei_cp_domain_ops,
+						     sei);
+	if (!sei->cp_domain) {
+		pr_err("Failed to create CPs IRQ domain\n");
+		ret = -ENOMEM;
+		goto remove_ap_domain;
+	}
+
+	plat_domain = platform_msi_create_irq_domain(of_node_to_fwnode(node),
+						     &mvebu_sei_msi_domain_info,
+						     sei->cp_domain);
+	if (!plat_domain) {
+		pr_err("Failed to create CPs MSI domain\n");
+		ret = -ENOMEM;
+		goto remove_cp_domain;
+	}
+
+	platform_set_drvdata(pdev, sei);
+
+	irq_set_chained_handler(parent_irq, mvebu_sei_handle_cascade_irq);
+	irq_set_handler_data(parent_irq, sei);
+
+	return 0;
+
+remove_cp_domain:
+	irq_domain_remove(sei->cp_domain);
+remove_ap_domain:
+	irq_domain_remove(sei->ap_domain);
+dispose_irq:
+	irq_dispose_mapping(parent_irq);
+
+	return ret;
+}
+
+struct mvebu_sei_caps mvebu_sei_ap806_caps = {
+	.ap_range = {
+		.first = 0,
+		.size = 21,
+	},
+	.cp_range = {
+		.first = 21,
+		.size = 43,
+	},
+};
+
+static const struct of_device_id mvebu_sei_of_match[] = {
+	{
+		.compatible = "marvell,ap806-sei",
+		.data = &mvebu_sei_ap806_caps,
+	},
+	{},
+};
+
+static struct platform_driver mvebu_sei_driver = {
+	.probe  = mvebu_sei_probe,
+	.driver = {
+		.name = "mvebu-sei",
+		.of_match_table = mvebu_sei_of_match,
+	},
+};
+builtin_platform_driver(mvebu_sei_driver);