diff mbox series

[v2,15/25] irqchip/apple-aic: Add support for the Apple Interrupt Controller

Message ID 20210215121713.57687-16-marcan@marcan.st (mailing list archive)
State New, archived
Headers show
Series Apple M1 SoC platform bring-up | expand

Commit Message

Hector Martin Feb. 15, 2021, 12:17 p.m. UTC
This is the root interrupt controller used on Apple ARM SoCs such as the
M1. This irqchip driver performs multiple functions:

* Discriminates between IRQs and FIQs

* Drives the AIC peripheral itself (which handles IRQs)

* Dispatches FIQs to downstream hard-wired clients (currently the ARM
  timer).

This patch introduces basic UP irqchip support, without SMP/IPI support.

Signed-off-by: Hector Martin <marcan@marcan.st>
---
 MAINTAINERS                     |   2 +
 drivers/irqchip/Kconfig         |  10 +
 drivers/irqchip/Makefile        |   1 +
 drivers/irqchip/irq-apple-aic.c | 647 ++++++++++++++++++++++++++++++++
 include/linux/cpuhotplug.h      |   1 +
 5 files changed, 661 insertions(+)
 create mode 100644 drivers/irqchip/irq-apple-aic.c

Comments

Marc Zyngier Feb. 15, 2021, 6:09 p.m. UTC | #1
On Mon, 15 Feb 2021 12:17:03 +0000,
Hector Martin <marcan@marcan.st> wrote:
> 
> This is the root interrupt controller used on Apple ARM SoCs such as the
> M1. This irqchip driver performs multiple functions:
> 
> * Discriminates between IRQs and FIQs
> 
> * Drives the AIC peripheral itself (which handles IRQs)
> 
> * Dispatches FIQs to downstream hard-wired clients (currently the ARM
>   timer).
> 
> This patch introduces basic UP irqchip support, without SMP/IPI support.

This last comment seems outdated now.

> Signed-off-by: Hector Martin <marcan@marcan.st>
> ---
>  MAINTAINERS                     |   2 +
>  drivers/irqchip/Kconfig         |  10 +
>  drivers/irqchip/Makefile        |   1 +
>  drivers/irqchip/irq-apple-aic.c | 647 ++++++++++++++++++++++++++++++++
>  include/linux/cpuhotplug.h      |   1 +
>  5 files changed, 661 insertions(+)
>  create mode 100644 drivers/irqchip/irq-apple-aic.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 9fe723033e63..a8f258fbb5f1 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1636,6 +1636,8 @@ T:	git https://github.com/AsahiLinux/linux.git
>  F:	Documentation/devicetree/bindings/arm/apple.yaml
>  F:	Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
>  F:	arch/arm64/include/asm/sysreg_apple.h
> +F:	drivers/irqchip/irq-apple-aic.c
> +F:	include/dt-bindings/interrupt-controller/apple-aic.h
>  
>  ARM/ARTPEC MACHINE SUPPORT
>  M:	Jesper Nilsson <jesper.nilsson@axis.com>
> diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
> index b147f22a78f4..fca080640c1a 100644
> --- a/drivers/irqchip/Kconfig
> +++ b/drivers/irqchip/Kconfig
> @@ -590,4 +590,14 @@ config MST_IRQ
>  	help
>  	  Support MStar Interrupt Controller.
>  
> +config APPLE_AIC
> +	bool "Apple Interrupt Controller (AIC)"
> +	depends on ARM64
> +	default ARCH_APPLE
> +	select IRQ_DOMAIN
> +	select IRQ_DOMAIN_HIERARCHY

arm64 selects GENERIC_IRQ_IPI, which selects IRQ_DOMAIN_HIERARCHY,
which selects IRQ_DOMAIN. So these two lines are superfluous.

> +	help
> +	  Support for the Apple Interrupt Controller found on Apple Silicon SoCs,
> +	  such as the M1.
> +
>  endmenu
> diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
> index 0ac93bfaec61..0e2ba7c2dce7 100644
> --- a/drivers/irqchip/Makefile
> +++ b/drivers/irqchip/Makefile
> @@ -113,3 +113,4 @@ obj-$(CONFIG_LOONGSON_PCH_PIC)		+= irq-loongson-pch-pic.o
>  obj-$(CONFIG_LOONGSON_PCH_MSI)		+= irq-loongson-pch-msi.o
>  obj-$(CONFIG_MST_IRQ)			+= irq-mst-intc.o
>  obj-$(CONFIG_SL28CPLD_INTC)		+= irq-sl28cpld.o
> +obj-$(CONFIG_APPLE_AIC)			+= irq-apple-aic.o
> diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
> new file mode 100644
> index 000000000000..604dbb8fd898
> --- /dev/null
> +++ b/drivers/irqchip/irq-apple-aic.c
> @@ -0,0 +1,647 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright The Asahi Linux Contributors
> + *
> + * Based on irq-lpc32xx:
> + *   Copyright 2015-2016 Vladimir Zapolskiy <vz@mleia.com>
> + * Based on irq-bcm2836:
> + *   Copyright 2015 Broadcom
> + */
> +
> +/*
> + * AIC is a fairly simple interrupt controller with the following features:
> + *
> + * - 896 level-triggered hardware IRQs
> + *   - Single mask bit per IRQ
> + *   - Per-IRQ affinity setting
> + *   - Automatic masking on event delivery (auto-ack)
> + *   - Software triggering (ORed with hw line)
> + * - 2 per-CPU IPIs (meant as "self" and "other", but they are interchangeable if not symmetric)
> + * - Automatic prioritization (single event/ack register per CPU, lower IRQs = higher priority)
> + * - Automatic masking on ack
> + * - Default "this CPU" register view and explicit per-CPU views
> + *
> + * In addition, this driver also handles FIQs, as these are routed to the same IRQ vector. These
> + * are used for Fast IPIs (TODO), the ARMv8 timer IRQs, and performance counters (TODO).
> + *

nit: A bit of comment formatting could be helpful.

> + * Implementation notes:
> + *
> + * - This driver creates two IRQ domains, one for HW IRQs and internal FIQs, and one for IPIs
> + * - Since Linux needs more than 2 IPIs, we implement a software IRQ controller and funnel all IPIs
> + *   into one per-CPU IPI (the second "self" IPI is unused).
> + * - FIQ hwirq numbers are assigned after true hwirqs, and are per-cpu
> + * - DT bindings use 3-cell form (like GIC):
> + *   - <0 nr flags> - hwirq #nr
> + *   - <1 nr flags> - FIQ #nr
> + *     - nr=0  Physical HV timer
> + *     - nr=1  Virtual HV timer
> + *     - nr=2  Physical guest timer
> + *     - nr=3  Virtual guest timer
> + *
> + */
> +
> +#define pr_fmt(fmt) "%s: " fmt, __func__
> +
> +#include <linux/bits.h>
> +#include <linux/bitfield.h>
> +#include <linux/cpuhotplug.h>
> +#include <linux/io.h>
> +#include <linux/irqchip.h>
> +#include <linux/irqchip/arm-gic-v3.h>

I'd rather you move the ICH_HCR_* definitions to sysreg.h rather than
including the GICv3 stuff. They are only there for historical reasons
(such as supporting KVM on 32bit systems), none of which apply anymore.

> +#include <linux/irqdomain.h>
> +#include <linux/of_address.h>
> +#include <linux/slab.h>
> +#include <asm/exception.h>
> +#include <asm/sysreg.h>
> +#include <asm/sysreg_apple.h>
> +
> +#include <dt-bindings/interrupt-controller/apple-aic.h>
> +
> +#define AIC_INFO		0x0004
> +#define AIC_INFO_NR_HW		GENMASK(15, 0)
> +
> +#define AIC_CONFIG		0x0010
> +
> +#define AIC_WHOAMI		0x2000
> +#define AIC_EVENT		0x2004
> +#define AIC_EVENT_TYPE		GENMASK(31, 16)
> +#define AIC_EVENT_NUM		GENMASK(15, 0)
> +
> +#define AIC_EVENT_TYPE_HW	1
> +#define AIC_EVENT_TYPE_IPI	4
> +#define AIC_EVENT_IPI_OTHER	1
> +#define AIC_EVENT_IPI_SELF	2
> +
> +#define AIC_IPI_SEND		0x2008
> +#define AIC_IPI_ACK		0x200c
> +#define AIC_IPI_MASK_SET	0x2024
> +#define AIC_IPI_MASK_CLR	0x2028
> +
> +#define AIC_IPI_SEND_CPU(cpu)	BIT(cpu)
> +
> +#define AIC_IPI_OTHER		BIT(0)
> +#define AIC_IPI_SELF		BIT(31)
> +
> +#define AIC_TARGET_CPU		0x3000
> +#define AIC_SW_SET		0x4000
> +#define AIC_SW_CLR		0x4080
> +#define AIC_MASK_SET		0x4100
> +#define AIC_MASK_CLR		0x4180
> +
> +#define AIC_CPU_IPI_SET(cpu)	(0x5008 + ((cpu) << 7))
> +#define AIC_CPU_IPI_CLR(cpu)	(0x500c + ((cpu) << 7))
> +#define AIC_CPU_IPI_MASK_SET(cpu) (0x5024 + ((cpu) << 7))
> +#define AIC_CPU_IPI_MASK_CLR(cpu) (0x5028 + ((cpu) << 7))
> +
> +#define MASK_REG(x)		(4 * ((x) >> 5))
> +#define MASK_BIT(x)		BIT((x) & 0x1f)
> +
> +#define AIC_NR_FIQ		4
> +#define AIC_NR_SWIPI		32
> +
> +/*
> + * Max 31 bits in IPI SEND register (top bit is self).
> + * >=32-core chips will need code changes anyway.
> + */
> +#define AIC_MAX_CPUS		31
> +
> +struct aic_irq_chip {
> +	void __iomem *base;
> +	struct irq_domain *hw_domain;
> +	struct irq_domain *ipi_domain;
> +	int nr_hw;
> +	int ipi_hwirq;
> +};
> +
> +static atomic_t aic_vipi_flag[AIC_MAX_CPUS];
> +static atomic_t aic_vipi_mask[AIC_MAX_CPUS];
> +
> +static struct aic_irq_chip *aic_irqc;
> +
> +static void aic_handle_ipi(struct pt_regs *regs);
> +
> +static u32 aic_ic_read(struct aic_irq_chip *ic, u32 reg)
> +{
> +	return readl_relaxed(ic->base + reg);
> +}
> +
> +static void aic_ic_write(struct aic_irq_chip *ic, u32 reg, u32 val)
> +{
> +	writel_relaxed(val, ic->base + reg);
> +}
> +
> +/*
> + * IRQ irqchip
> + */
> +
> +static void aic_irq_mask(struct irq_data *d)
> +{
> +	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
> +
> +	aic_ic_write(ic, AIC_MASK_SET + MASK_REG(d->hwirq),
> +		     MASK_BIT(d->hwirq));
> +}
> +
> +static void aic_irq_unmask(struct irq_data *d)
> +{
> +	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
> +
> +	aic_ic_write(ic, AIC_MASK_CLR + MASK_REG(d->hwirq),
> +		     MASK_BIT(d->hwirq));
> +}
> +
> +static void aic_irq_eoi(struct irq_data *d)
> +{
> +	/*
> +	 * Reading the interrupt reason automatically acknowledges and masks
> +	 * the IRQ, so we just unmask it here if needed.
> +	 */
> +	if (!irqd_irq_disabled(d) && !irqd_irq_masked(d))
> +		aic_irq_unmask(d);
> +}
> +
> +static void aic_handle_irq(struct pt_regs *regs)
> +{
> +	struct aic_irq_chip *ic = aic_irqc;
> +	u32 event, type, irq;
> +
> +	do {
> +		/*
> +		 * We cannot use a relaxed read here, as DMA needs to be
> +		 * ordered with respect to the IRQ firing.
> +		 */
> +		event = readl(ic->base + AIC_EVENT);
> +		type = FIELD_GET(AIC_EVENT_TYPE, event);
> +		irq = FIELD_GET(AIC_EVENT_NUM, event);
> +
> +		if (type == AIC_EVENT_TYPE_HW)
> +			handle_domain_irq(aic_irqc->hw_domain, irq, regs);
> +		else if (type == AIC_EVENT_TYPE_IPI && irq == 1)
> +			aic_handle_ipi(regs);
> +		else if (event != 0)
> +			pr_err("Unknown IRQ event %d, %d\n", type, irq);
> +	} while (event);
> +
> +	/*
> +	 * vGIC maintenance interrupts end up here too, so we need to check
> +	 * for them separately. Just report and disable vGIC for now, until
> +	 * we implement this properly.
> +	 */
> +	if ((read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EN) &&
> +		read_sysreg_s(SYS_ICH_MISR_EL2) != 0) {
> +		pr_err("vGIC IRQ fired, disabling.\n");
> +		sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0);
> +	}
> +}
> +
> +static int aic_irq_set_affinity(struct irq_data *d,
> +				const struct cpumask *mask_val, bool force)
> +{
> +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> +	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
> +	int cpu;
> +
> +	if (hwirq > ic->nr_hw)
> +		return -EINVAL;
> +
> +	if (force)
> +		cpu = cpumask_first(mask_val);
> +	else
> +		cpu = cpumask_any_and(mask_val, cpu_online_mask);
> +
> +	aic_ic_write(ic, AIC_TARGET_CPU + hwirq * 4, BIT(cpu));
> +	irq_data_update_effective_affinity(d, cpumask_of(cpu));

It is fine to pick a single CPU out of the whole affinity set, but you
should tell the kernel that this is the case (irqd_set_single_target()).

> +
> +	return IRQ_SET_MASK_OK;
> +}
> +
> +static struct irq_chip aic_chip = {
> +	.name = "AIC",
> +	.irq_mask = aic_irq_mask,
> +	.irq_unmask = aic_irq_unmask,
> +	.irq_eoi = aic_irq_eoi,
> +	.irq_set_affinity = aic_irq_set_affinity,
> +};
> +
> +/*
> + * FIQ irqchip
> + */
> +
> +static void aic_fiq_mask(struct irq_data *d)
> +{
> +	/* Only the guest timers have real mask bits, unfortunately. */
> +	switch (d->hwirq) {
> +	case AIC_TMR_GUEST_PHYS:
> +		sysreg_clear_set_s(SYS_APL_VM_TMR_MASK,
> +					VM_TMR_MASK_P, 0);
> +		break;
> +	case AIC_TMR_GUEST_VIRT:
> +		sysreg_clear_set_s(SYS_APL_VM_TMR_MASK,
> +					VM_TMR_MASK_V, 0);
> +		break;
> +	}
> +}
> +
> +static void aic_fiq_unmask(struct irq_data *d)
> +{
> +	switch (d->hwirq) {
> +	case AIC_TMR_GUEST_PHYS:
> +		sysreg_clear_set_s(SYS_APL_VM_TMR_MASK,
> +					0, VM_TMR_MASK_P);
> +		break;
> +	case AIC_TMR_GUEST_VIRT:
> +		sysreg_clear_set_s(SYS_APL_VM_TMR_MASK,
> +					0, VM_TMR_MASK_V);
> +		break;
> +	}
> +}
> +
> +static void aic_fiq_eoi(struct irq_data *d)
> +{
> +	/* We mask to ack (where we can), so we need to unmask at EOI. */
> +	if (!irqd_irq_disabled(d) && !irqd_irq_masked(d))
> +		aic_fiq_unmask(d);
> +}
> +
> +#define TIMER_FIRING(x)                                                        \
> +	(((x) & (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_MASK |            \
> +		 ARCH_TIMER_CTRL_IT_STAT)) ==                                  \
> +	 (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
> +
> +static void aic_handle_fiq(struct pt_regs *regs)
> +{
> +	/*
> +	 * It would be really if we had a system register that lets us get
> +	 * the FIQ source state without having to peek down into sources...
> +	 * but such a register does not seem to exist.
> +	 *
> +	 * So, we have these potential sources to test for:
> +	 *  - Fast IPIs (not yet used)
> +	 *  - The 4 timers (CNTP, CNTV for each of HV and guest)
> +	 *  - Per-core PMCs (not yet supported)
> +	 *  - Per-cluster uncore PMCs (not yet supported)
> +	 *
> +	 * Since not dealing with any of these results in a FIQ storm,
> +	 * we check for everything here, even things we don't support yet.
> +	 */
> +
> +	if (read_sysreg_s(SYS_APL_IPI_SR) & IPI_SR_PENDING) {
> +		pr_warn("Fast IPI fired. Acking.\n");
> +		write_sysreg_s(IPI_SR_PENDING, SYS_APL_IPI_SR);
> +	}
> +
> +	if (TIMER_FIRING(read_sysreg(cntp_ctl_el0)))
> +		handle_domain_irq(aic_irqc->hw_domain,
> +				  aic_irqc->nr_hw + AIC_TMR_HV_PHYS, regs);
> +
> +	if (TIMER_FIRING(read_sysreg(cntv_ctl_el0)))
> +		handle_domain_irq(aic_irqc->hw_domain,
> +				  aic_irqc->nr_hw + AIC_TMR_HV_VIRT, regs);
> +
> +	if (TIMER_FIRING(read_sysreg_s(SYS_CNTP_CTL_EL02)))
> +		handle_domain_irq(aic_irqc->hw_domain,
> +				  aic_irqc->nr_hw + AIC_TMR_GUEST_PHYS, regs);
> +
> +	if (TIMER_FIRING(read_sysreg_s(SYS_CNTV_CTL_EL02)))
> +		handle_domain_irq(aic_irqc->hw_domain,
> +				  aic_irqc->nr_hw + AIC_TMR_GUEST_VIRT, regs);
> +
> +	if ((read_sysreg_s(SYS_APL_PMCR0) & (PMCR0_IMODE | PMCR0_IACT))
> +			== (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) {
> +		/*
> +		 * Not supported yet, let's figure out how to handle this when
> +		 * we implement these proprietary performance counters. For now,
> +		 * just mask it and move on.
> +		 */
> +		pr_warn("PMC FIQ fired. Masking.\n");
> +		sysreg_clear_set_s(SYS_APL_PMCR0, PMCR0_IMODE | PMCR0_IACT,
> +				   FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF));
> +	}
> +
> +	if (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_APL_UPMCR0)) == UPMCR0_IMODE_FIQ &&
> +			(read_sysreg_s(SYS_APL_UPMSR) & UPMSR_IACT)) {
> +		/* Same story with uncore PMCs */
> +		pr_warn("Uncore PMC FIQ fired. Masking.\n");
> +		sysreg_clear_set_s(SYS_APL_UPMCR0, UPMCR0_IMODE,
> +				   FIELD_PREP(UPMCR0_IMODE, UPMCR0_IMODE_OFF));
> +	}
> +}
> +
> +static struct irq_chip fiq_chip = {
> +	.name = "AIC-FIQ",
> +	.irq_mask = aic_fiq_mask,
> +	.irq_unmask = aic_fiq_unmask,
> +	.irq_ack = aic_fiq_mask,
> +	.irq_eoi = aic_fiq_eoi,
> +};
> +
> +/*
> + * Main IRQ domain
> + */
> +
> +static void __exception_irq_entry aic_handle_irq_or_fiq(struct pt_regs *regs)
> +{
> +	u64 isr = read_sysreg(isr_el1);
> +
> +	if (isr & PSR_F_BIT)
> +		aic_handle_fiq(regs);
> +
> +	if (isr & PSR_I_BIT)
> +		aic_handle_irq(regs);
> +}
> +
> +static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq,
> +			      irq_hw_number_t hw)
> +{
> +	struct aic_irq_chip *ic = id->host_data;
> +
> +	irq_set_chip_data(irq, ic);
> +	if (hw < ic->nr_hw) {
> +		irq_set_chip_and_handler(irq, &aic_chip, handle_fasteoi_irq);
> +	} else {
> +		irq_set_percpu_devid(irq);
> +		irq_set_chip_and_handler(irq, &fiq_chip,
> +					 handle_percpu_devid_irq);
> +	}
> +
> +	irq_set_status_flags(irq, IRQ_LEVEL);

I'm definitely not keen on this override, and the trigger information
should be the one coming from the DT, which is already set for you.
It'd probably be useful to provide an irq_set_type() callback that
returns an error when fed an unsupported trigger.

> +	irq_set_noprobe(irq);

This seems to be cargo-culted, and I don't believe this is necessary.

> +
> +	return 0;
> +}
> +
> +static void aic_irq_domain_unmap(struct irq_domain *id, unsigned int irq)
> +{
> +	irq_set_chip_and_handler(irq, NULL, NULL);
> +}
> +
> +static int aic_irq_domain_xlate(struct irq_domain *id,
> +				struct device_node *ctrlr, const u32 *intspec,
> +				unsigned int intsize,
> +				irq_hw_number_t *out_hwirq,
> +				unsigned int *out_type)
> +{
> +	struct aic_irq_chip *ic = id->host_data;
> +
> +	if (intsize != 3)
> +		return -EINVAL;
> +
> +	if (intspec[0] == AIC_IRQ && intspec[1] < ic->nr_hw)
> +		*out_hwirq = intspec[1];
> +	else if (intspec[0] == AIC_FIQ && intspec[1] < AIC_NR_FIQ)
> +		*out_hwirq = ic->nr_hw + intspec[1];
> +	else
> +		return -EINVAL;
> +
> +	*out_type = intspec[2] & IRQ_TYPE_SENSE_MASK;
> +
> +	return 0;
> +}
> +
> +static const struct irq_domain_ops aic_irq_domain_ops = {
> +	.map = aic_irq_domain_map,
> +	.unmap = aic_irq_domain_unmap,
> +	.xlate = aic_irq_domain_xlate,
> +};

You are mixing two APIs: the older OF-specific one, and the newer one
that uses fwnode_handle for hierarchical support. That's OK for older
drivers that were forcefully converted to using generic IPIs, but as
this is a brand new driver, I'd rather it consistently used the new
API. See a proposed rework at [1] (compile tested only).

> +
> +/*
> + * IPI irqchip
> + */
> +
> +static void aic_ipi_mask(struct irq_data *d)
> +{
> +	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
> +	u32 irq_bit = BIT(irqd_to_hwirq(d));
> +	int this_cpu = smp_processor_id();
> +
> +	atomic_and(~irq_bit, &aic_vipi_mask[this_cpu]);

atomic_andnot()?

> +
> +	if (!atomic_read(&aic_vipi_mask[this_cpu]))
> +		aic_ic_write(ic, AIC_IPI_MASK_SET, AIC_IPI_OTHER);

This is odd. It means that you still perform the MMIO write if the bit
was already clear. I think this could be written as:

	u32 val;
	val = atomic_fetch_andnot(irq_bit, &aic_vipi_mask[this_cpu]);
	if (val && !(val & ~irq_bit))
		aic_ic_write();

> +}
> +
> +static void aic_ipi_unmask(struct irq_data *d)
> +{
> +	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
> +	u32 irq_bit = BIT(irqd_to_hwirq(d));
> +	int this_cpu = smp_processor_id();
> +
> +	atomic_or(irq_bit, &aic_vipi_mask[this_cpu]);
> +
> +	aic_ic_write(ic, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);

	val  = atomic_fetch_or(irq_bit, &aic_vipi_mask[this_cpu]);
	if (!val)
		aic_ic_write();

> +}
> +
> +static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
> +{
> +	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
> +	u32 irq_bit = BIT(irqd_to_hwirq(d));
> +	u32 send = 0;
> +	int cpu;
> +
> +	/*
> +	 * Ensure that stores to normal memory are visible to the
> +	 * other CPUs before issuing the IPI. This needs to happen
> +	 * before setting any vIPI flag bits, since that can race the
> +	 * atomic_xchg below.
> +	 */
> +	wmb();
> +
> +	for_each_cpu(cpu, mask) {
> +		if (atomic_read(&aic_vipi_mask[cpu]) & irq_bit) {
> +			atomic_or(irq_bit, &aic_vipi_flag[cpu]);
> +			send |= AIC_IPI_SEND_CPU(cpu);

That's really odd. A masked IPI should be made pending, and delivered
on unmask. I think this all works because we never mask individual
IPIs, as this would otherwise drop interrupts on the floor.

> +		}
> +	}
> +
> +	if (send) {
> +		/*
> +		 * Ensure that the vIPI flag writes complete before issuing
> +		 * the physical IPI.
> +		 */
> +		wmb();
> +		aic_ic_write(ic, AIC_IPI_SEND, send);
> +	}
> +}
> +
> +static struct irq_chip ipi_chip = {
> +	.name = "AIC-IPI",
> +	.irq_mask = aic_ipi_mask,
> +	.irq_unmask = aic_ipi_unmask,
> +	.ipi_send_mask = aic_ipi_send_mask,
> +};
> +
> +/*
> + * IPI IRQ domain
> + */
> +
> +static void aic_handle_ipi(struct pt_regs *regs)
> +{
> +	int this_cpu = smp_processor_id();
> +	int i;
> +	unsigned long firing;
> +
> +	aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);
> +
> +	/*
> +	 * Ensure that we've received and acked the IPI before we load the vIPI
> +	 * flags. This pairs with the second wmb() above.
> +	 */
> +	mb();

I don't get your ordering here.

If you are trying to order against something that has happened on
another CPU (which is pretty likely in the case of an IPI), why isn't
this a smp_mb_before_atomic() (and conversely smp_mb_after_atomic() in
aic_ipi_send_mask())?

Although this looks to me like a good case for _acquire/_release
semantics.

> +
> +	firing = atomic_xchg(&aic_vipi_flag[this_cpu], 0);
> +
> +	/*
> +	 * Ensure that we've exchanged the vIPI flags before running any IPI
> +	 * handler code. This pairs with the first wmb() above.
> +	 */
> +	rmb();
> +
> +	for_each_set_bit(i, &firing, AIC_NR_SWIPI) {
> +		handle_domain_irq(aic_irqc->ipi_domain, i, regs);
> +	}
> +
> +	aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
> +}
> +
> +static int aic_ipi_alloc(struct irq_domain *d, unsigned int virq,
> +			 unsigned int nr_irqs, void *args)
> +{
> +	int i;
> +
> +	for (i = 0; i < nr_irqs; i++) {
> +		irq_set_percpu_devid(virq + i);
> +		irq_domain_set_info(d, virq + i, i, &ipi_chip, d->host_data,
> +				    handle_percpu_devid_irq, NULL, NULL);
> +	}
> +
> +	return 0;
> +}
> +
> +static void aic_ipi_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs)
> +{
> +	/* Not freeing IPIs */
> +}
> +
> +static const struct irq_domain_ops aic_ipi_domain_ops = {
> +	.alloc = aic_ipi_alloc,
> +	.free = aic_ipi_free,
> +};
> +
> +static int aic_init_smp(struct aic_irq_chip *irqc, struct device_node *node)
> +{
> +	int base_ipi;
> +
> +	irqc->ipi_domain = irq_domain_create_linear(irqc->hw_domain->fwnode, AIC_NR_SWIPI,
> +						    &aic_ipi_domain_ops, irqc);
> +	if (WARN_ON(!irqc->ipi_domain))
> +		return -ENODEV;
> +
> +	irqc->ipi_domain->flags |= IRQ_DOMAIN_FLAG_IPI_SINGLE;
> +	irq_domain_update_bus_token(irqc->ipi_domain, DOMAIN_BUS_IPI);
> +
> +	base_ipi = __irq_domain_alloc_irqs(irqc->ipi_domain, -1, AIC_NR_SWIPI,
> +					   NUMA_NO_NODE, NULL, false, NULL);
> +
> +	if (WARN_ON(!base_ipi)) {
> +		irq_domain_remove(irqc->ipi_domain);
> +		return -ENODEV;
> +	}
> +
> +	set_smp_ipi_range(base_ipi, AIC_NR_SWIPI);
> +
> +	return 0;
> +}
> +
> +static int aic_init_cpu(unsigned int cpu)
> +{
> +	/* Mask all hard-wired per-CPU IRQ/FIQ sources */
> +
> +	/* vGIC maintenance IRQ */
> +	sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0);
> +
> +	/* Pending Fast IPI FIQs */
> +	write_sysreg_s(IPI_SR_PENDING, SYS_APL_IPI_SR);
> +
> +	/* Timer FIQs */
> +	sysreg_clear_set(cntp_ctl_el0, 0, ARCH_TIMER_CTRL_IT_MASK);
> +	sysreg_clear_set(cntv_ctl_el0, 0, ARCH_TIMER_CTRL_IT_MASK);
> +	sysreg_clear_set_s(SYS_CNTP_CTL_EL02, 0, ARCH_TIMER_CTRL_IT_MASK);
> +	sysreg_clear_set_s(SYS_CNTV_CTL_EL02, 0, ARCH_TIMER_CTRL_IT_MASK);
> +
> +	/* PMC FIQ */
> +	sysreg_clear_set_s(SYS_APL_PMCR0, PMCR0_IMODE | PMCR0_IACT,
> +			   FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF));
> +
> +	/* Uncore PMC FIQ */
> +	sysreg_clear_set_s(SYS_APL_UPMCR0, UPMCR0_IMODE,
> +			   FIELD_PREP(UPMCR0_IMODE, UPMCR0_IMODE_OFF));
> +
> +	/*
> +	 * Make sure the kernel's idea of logical CPU order is the same as AIC's
> +	 * If we ever end up with a mismatch here, we will have to introduce
> +	 * a mapping table similar to what other irqchip drivers do.
> +	 */
> +	WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id());

This is unlikely to work as soon as you get kexec up and running. You
may not have to worry about this for some time...

> +
> +	return 0;
> +
> +}
> +
> +static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)
> +{
> +	int i;
> +	void __iomem *regs;
> +	u32 info;
> +	struct aic_irq_chip *irqc;
> +
> +	regs = of_iomap(node, 0);
> +	if (WARN_ON(!regs))
> +		return -EIO;
> +
> +	irqc = kzalloc(sizeof(*irqc), GFP_KERNEL);
> +	if (!irqc)
> +		return -ENOMEM;
> +
> +	aic_irqc = irqc;
> +	irqc->base = regs;
> +
> +	info = aic_ic_read(irqc, AIC_INFO);
> +	irqc->nr_hw = FIELD_GET(AIC_INFO_NR_HW, info);
> +
> +	irqc->hw_domain = irq_domain_add_linear(node, irqc->nr_hw + AIC_NR_FIQ,
> +						&aic_irq_domain_ops, irqc);
> +	if (WARN_ON(!irqc->hw_domain)) {
> +		iounmap(irqc->base);
> +		kfree(irqc);
> +		return -ENODEV;
> +	}
> +
> +	irq_domain_update_bus_token(irqc->hw_domain, DOMAIN_BUS_WIRED);
> +
> +	if (aic_init_smp(irqc, node)) {
> +		irq_domain_remove(irqc->hw_domain);
> +		iounmap(irqc->base);
> +		kfree(irqc);
> +		return -ENODEV;
> +	}
> +
> +	set_handle_irq(aic_handle_irq_or_fiq);
> +
> +	for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)
> +		aic_ic_write(irqc, AIC_MASK_SET + i * 4, ~0);
> +	for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)
> +		aic_ic_write(irqc, AIC_SW_CLR + i * 4, ~0);
> +	for (i = 0; i < irqc->nr_hw; i++)
> +		aic_ic_write(irqc, AIC_TARGET_CPU + i * 4, 1);
> +
> +	cpuhp_setup_state(CPUHP_AP_IRQ_APPLE_AIC_STARTING,
> +			  "irqchip/apple-aic/ipi:starting",
> +			  aic_init_cpu, NULL);
> +
> +	pr_info("AIC: initialized with %d IRQs, %d FIQs, %d vIPIs\n",
> +		irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI);
> +
> +	return 0;
> +}
> +
> +IRQCHIP_DECLARE(apple_m1_aic, "apple,aic", aic_of_ic_init);
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index 0042ef362511..7ea1fc537f12 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -100,6 +100,7 @@ enum cpuhp_state {
>  	CPUHP_AP_CPU_PM_STARTING,
>  	CPUHP_AP_IRQ_GIC_STARTING,
>  	CPUHP_AP_IRQ_HIP04_STARTING,
> +	CPUHP_AP_IRQ_APPLE_AIC_STARTING,
>  	CPUHP_AP_IRQ_ARMADA_XP_STARTING,
>  	CPUHP_AP_IRQ_BCM2836_STARTING,
>  	CPUHP_AP_IRQ_MIPS_GIC_STARTING,

Thanks,

	M.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/commit/?h=hack/m1-aic&id=17e2fdb4f551f98a2ff483f3ac2501648bfba33e
Hector Martin Feb. 22, 2021, 7:35 p.m. UTC | #2
On 16/02/2021 03.09, Marc Zyngier wrote:
> On Mon, 15 Feb 2021 12:17:03 +0000,
> Hector Martin <marcan@marcan.st> wrote:
>> This patch introduces basic UP irqchip support, without SMP/IPI support.
> 
> This last comment seems outdated now.

Heh, I forgot to reword this one. Thanks :)

>> +config APPLE_AIC
>> +	bool "Apple Interrupt Controller (AIC)"
>> +	depends on ARM64
>> +	default ARCH_APPLE
>> +	select IRQ_DOMAIN
>> +	select IRQ_DOMAIN_HIERARCHY
> 
> arm64 selects GENERIC_IRQ_IPI, which selects IRQ_DOMAIN_HIERARCHY,
> which selects IRQ_DOMAIN. So these two lines are superfluous.

Ack, removing these for v3.

>> + * In addition, this driver also handles FIQs, as these are routed to the same IRQ vector. These
>> + * are used for Fast IPIs (TODO), the ARMv8 timer IRQs, and performance counters (TODO).
>> + *
> 
> nit: A bit of comment formatting could be helpful.

Wrapped this to 80 columns for v3.

>> +#include <linux/bits.h>
>> +#include <linux/bitfield.h>
>> +#include <linux/cpuhotplug.h>
>> +#include <linux/io.h>
>> +#include <linux/irqchip.h>
>> +#include <linux/irqchip/arm-gic-v3.h>
> 
> I'd rather you move the ICH_HCR_* definitions to sysreg.h rather than
> including the GICv3 stuff. They are only there for historical reasons
> (such as supporting KVM on 32bit systems), none of which apply anymore.

Just ICH_HCR, or should I bring all of the ICH_ and ICC_ defines along 
with it?

>> +	aic_ic_write(ic, AIC_TARGET_CPU + hwirq * 4, BIT(cpu));
>> +	irq_data_update_effective_affinity(d, cpumask_of(cpu));
> 
> It is fine to pick a single CPU out of the whole affinity set, but you
> should tell the kernel that this is the case (irqd_set_single_target()).

>> +
>> +	irq_set_status_flags(irq, IRQ_LEVEL);
> 
> I'm definitely not keen on this override, and the trigger information
> should be the one coming from the DT, which is already set for you.
> It'd probably be useful to provide an irq_set_type() callback that
> returns an error when fed an unsupported trigger.
> 

>> +	irq_set_noprobe(irq);
> 
> This seems to be cargo-culted, and I don't believe this is necessary.

>> +static const struct irq_domain_ops aic_irq_domain_ops = {
>> +	.map = aic_irq_domain_map,
>> +	.unmap = aic_irq_domain_unmap,
>> +	.xlate = aic_irq_domain_xlate,
>> +};
> 
> You are mixing two APIs: the older OF-specific one, and the newer one
> that uses fwnode_handle for hierarchical support. That's OK for older
> drivers that were forcefully converted to using generic IPIs, but as
> this is a brand new driver, I'd rather it consistently used the new
> API. See a proposed rework at [1] (compile tested only).

Applying your fixups for these, thanks! :)

>> +	atomic_and(~irq_bit, &aic_vipi_mask[this_cpu]);
> 
> atomic_andnot()?
> 
>> +
>> +	if (!atomic_read(&aic_vipi_mask[this_cpu]))
>> +		aic_ic_write(ic, AIC_IPI_MASK_SET, AIC_IPI_OTHER);
> 
> This is odd. It means that you still perform the MMIO write if the bit
> was already clear. I think this could be written as:
> 
> 	u32 val;
> 	val = atomic_fetch_andnot(irq_bit, &aic_vipi_mask[this_cpu]);
> 	if (val && !(val & ~irq_bit))
> 		aic_ic_write();

> 
> 	val  = atomic_fetch_or(irq_bit, &aic_vipi_mask[this_cpu]);
> 	if (!val)
> 		aic_ic_write();

This makes more sense to avoid the redundant MMIO writes. I need to get 
more familiar with all the available atomic ops... lots of useful stuff 
in there I didn't know about.

>> +	for_each_cpu(cpu, mask) {
>> +		if (atomic_read(&aic_vipi_mask[cpu]) & irq_bit) {
>> +			atomic_or(irq_bit, &aic_vipi_flag[cpu]);
>> +			send |= AIC_IPI_SEND_CPU(cpu);
> 
> That's really odd. A masked IPI should be made pending, and delivered
> on unmask. I think this all works because we never mask individual
> IPIs, as this would otherwise drop interrupts on the floor.

I wasn't really sure whether IPIs are supposed to end up pending like 
that; indeed if that's how it's supposed to work, then I also need logic 
at mask/unmask time to fire off any pending IPIs. I'll do it like that 
for v3.

Now I wonder how other drivers do it... I'm guessing this never gets 
tested, since the IPI code only exercises a fraction of the irq features...

>> +static void aic_handle_ipi(struct pt_regs *regs)
>> +{
>> +	int this_cpu = smp_processor_id();
>> +	int i;
>> +	unsigned long firing;
>> +
>> +	aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);
>> +
>> +	/*
>> +	 * Ensure that we've received and acked the IPI before we load the vIPI
>> +	 * flags. This pairs with the second wmb() above.
>> +	 */
>> +	mb();
> 
> I don't get your ordering here.
> 
> If you are trying to order against something that has happened on
> another CPU (which is pretty likely in the case of an IPI), why isn't
> this a smp_mb_before_atomic() (and conversely smp_mb_after_atomic() in
> aic_ipi_send_mask())?
> 
> Although this looks to me like a good case for _acquire/_release
> semantics.

This is trying to order the atomic ops with the IPI IRQ itself, in 
particular the ACK in the preceding line. If they execute in reverse 
order (or more precisely if the ACK takes effect after the xchg), this 
happens and we lose an IPI:

CPU1              CPU2
set vIPI #0
fire IPI
                   IPI IRQ
                   read EVENT
                   / xchg vIPI
set vIPI #1       X
fire IPI          |
                   \ ACK IPI

The converse race can also happen in the CPU1 path, of course, if the 
IPI ends up fired before the vIPI is set, hence the barrier there.

What I'm not sure is how the smp_ ops order with regard to 
writel_relaxed. It seems like mb() is dsb(sy) and smp_mb() is dmb(ish) 
and the atomic versions just default to aliasing smp_mb()). An 
inner-sharable dmb doesn't sound like it would safely satisfy this 
requirement, as MMIO out to AIC is involved (and we don't know if it's 
in the inner sharable domain or not for these purposes). Since the MMIO 
is nGnRnE, I would expect that a dsb(sy) would satisfy this requirement, 
as then the write op really shouldn't complete until it has taken effect 
in AIC.

>> +	/*
>> +	 * Make sure the kernel's idea of logical CPU order is the same as AIC's
>> +	 * If we ever end up with a mismatch here, we will have to introduce
>> +	 * a mapping table similar to what other irqchip drivers do.
>> +	 */
>> +	WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id());
> 
> This is unlikely to work as soon as you get kexec up and running. You
> may not have to worry about this for some time...

Ah, can kexec randomly shuffle CPUs around?

The solution here is obvious, but at this point I'm more keen on punting 
this to a future patch instead of introducing more complexity into the 
initial series; gotta leave behind some bugs to fix later :)
Marc Zyngier Feb. 23, 2021, 5:37 p.m. UTC | #3
Hi Hector,

On Mon, 22 Feb 2021 19:35:03 +0000,
Hector Martin <marcan@marcan.st> wrote:
> 
> On 16/02/2021 03.09, Marc Zyngier wrote:
> > On Mon, 15 Feb 2021 12:17:03 +0000,
> > Hector Martin <marcan@marcan.st> wrote:
> >> This patch introduces basic UP irqchip support, without SMP/IPI support.
> > 
> > This last comment seems outdated now.
> 
> Heh, I forgot to reword this one. Thanks :)
> 
> >> +config APPLE_AIC
> >> +	bool "Apple Interrupt Controller (AIC)"
> >> +	depends on ARM64
> >> +	default ARCH_APPLE
> >> +	select IRQ_DOMAIN
> >> +	select IRQ_DOMAIN_HIERARCHY
> > 
> > arm64 selects GENERIC_IRQ_IPI, which selects IRQ_DOMAIN_HIERARCHY,
> > which selects IRQ_DOMAIN. So these two lines are superfluous.
> 
> Ack, removing these for v3.
> 
> >> + * In addition, this driver also handles FIQs, as these are routed to the same IRQ vector. These
> >> + * are used for Fast IPIs (TODO), the ARMv8 timer IRQs, and performance counters (TODO).
> >> + *
> > 
> > nit: A bit of comment formatting could be helpful.
> 
> Wrapped this to 80 columns for v3.
> 
> >> +#include <linux/bits.h>
> >> +#include <linux/bitfield.h>
> >> +#include <linux/cpuhotplug.h>
> >> +#include <linux/io.h>
> >> +#include <linux/irqchip.h>
> >> +#include <linux/irqchip/arm-gic-v3.h>
> > 
> > I'd rather you move the ICH_HCR_* definitions to sysreg.h rather than
> > including the GICv3 stuff. They are only there for historical reasons
> > (such as supporting KVM on 32bit systems), none of which apply anymore.
> 
> Just ICH_HCR, or should I bring all of the ICH_ and ICC_ defines along
> with it?

Just ICH_HCR for now would be fine.

> 
> >> +	aic_ic_write(ic, AIC_TARGET_CPU + hwirq * 4, BIT(cpu));
> >> +	irq_data_update_effective_affinity(d, cpumask_of(cpu));
> > 
> > It is fine to pick a single CPU out of the whole affinity set, but you
> > should tell the kernel that this is the case (irqd_set_single_target()).
> 
> >> +
> >> +	irq_set_status_flags(irq, IRQ_LEVEL);
> > 
> > I'm definitely not keen on this override, and the trigger information
> > should be the one coming from the DT, which is already set for you.
> > It'd probably be useful to provide an irq_set_type() callback that
> > returns an error when fed an unsupported trigger.
> > 
> 
> >> +	irq_set_noprobe(irq);
> > 
> > This seems to be cargo-culted, and I don't believe this is necessary.
> 
> >> +static const struct irq_domain_ops aic_irq_domain_ops = {
> >> +	.map = aic_irq_domain_map,
> >> +	.unmap = aic_irq_domain_unmap,
> >> +	.xlate = aic_irq_domain_xlate,
> >> +};
> > 
> > You are mixing two APIs: the older OF-specific one, and the newer one
> > that uses fwnode_handle for hierarchical support. That's OK for older
> > drivers that were forcefully converted to using generic IPIs, but as
> > this is a brand new driver, I'd rather it consistently used the new
> > API. See a proposed rework at [1] (compile tested only).
> 
> Applying your fixups for these, thanks! :)

My pleasure!

> 
> >> +	atomic_and(~irq_bit, &aic_vipi_mask[this_cpu]);
> > 
> > atomic_andnot()?
> > 
> >> +
> >> +	if (!atomic_read(&aic_vipi_mask[this_cpu]))
> >> +		aic_ic_write(ic, AIC_IPI_MASK_SET, AIC_IPI_OTHER);
> > 
> > This is odd. It means that you still perform the MMIO write if the bit
> > was already clear. I think this could be written as:
> > 
> > 	u32 val;
> > 	val = atomic_fetch_andnot(irq_bit, &aic_vipi_mask[this_cpu]);
> > 	if (val && !(val & ~irq_bit))
> > 		aic_ic_write();
> 
> > 
> > 	val  = atomic_fetch_or(irq_bit, &aic_vipi_mask[this_cpu]);
> > 	if (!val)
> > 		aic_ic_write();
> 
> This makes more sense to avoid the redundant MMIO writes. I need to
> get more familiar with all the available atomic ops... lots of useful
> stuff in there I didn't know about.
> 
> >> +	for_each_cpu(cpu, mask) {
> >> +		if (atomic_read(&aic_vipi_mask[cpu]) & irq_bit) {
> >> +			atomic_or(irq_bit, &aic_vipi_flag[cpu]);
> >> +			send |= AIC_IPI_SEND_CPU(cpu);
> > 
> > That's really odd. A masked IPI should be made pending, and delivered
> > on unmask. I think this all works because we never mask individual
> > IPIs, as this would otherwise drop interrupts on the floor.
> 
> I wasn't really sure whether IPIs are supposed to end up pending like
> that; indeed if that's how it's supposed to work, then I also need
> logic at mask/unmask time to fire off any pending IPIs. I'll do it
> like that for v3.

Yes, unmask() needs to release pending IPIs.

> Now I wonder how other drivers do it... I'm guessing this never gets
> tested, since the IPI code only exercises a fraction of the irq
> features...

In most cases, the HW does it for you, fortunately. A masked IPI that
is made pending stays pending until unmasked. On the GIC, the SGIs
(which is the interrupt type we use for IPIs) are just dealt with like
any other interrupt, and are subject to the same life cycle. I think
this is the first SW-based IPI we have on arm64.

> 
> >> +static void aic_handle_ipi(struct pt_regs *regs)
> >> +{
> >> +	int this_cpu = smp_processor_id();
> >> +	int i;
> >> +	unsigned long firing;
> >> +
> >> +	aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);
> >> +
> >> +	/*
> >> +	 * Ensure that we've received and acked the IPI before we load the vIPI
> >> +	 * flags. This pairs with the second wmb() above.
> >> +	 */
> >> +	mb();
> > 
> > I don't get your ordering here.
> > 
> > If you are trying to order against something that has happened on
> > another CPU (which is pretty likely in the case of an IPI), why isn't
> > this a smp_mb_before_atomic() (and conversely smp_mb_after_atomic() in
> > aic_ipi_send_mask())?
> > 
> > Although this looks to me like a good case for _acquire/_release
> > semantics.
> 
> This is trying to order the atomic ops with the IPI IRQ itself, in
> particular the ACK in the preceding line. If they execute in reverse
> order (or more precisely if the ACK takes effect after the xchg), this
> happens and we lose an IPI:
> 
> CPU1              CPU2
> set vIPI #0
> fire IPI
>                   IPI IRQ
>                   read EVENT
>                   / xchg vIPI
> set vIPI #1       X
> fire IPI          |
>                   \ ACK IPI
> 
> The converse race can also happen in the CPU1 path, of course, if the
> IPI ends up fired before the vIPI is set, hence the barrier there.

I'm going to be brave and suggest something. Will can chime in and
explain why I'm totally wrong! :D

I *think* this scenario should be solved with:

- sender:
	// virtually signal the remote CPU, making sure this RMW
	// is ordered after all the previous writes
	atomic_or_release(irq_bit, &vipi[target]);
	// Signal the IPI
	writel(...);

  although I think the _release is superfluous given that the writel()
  is a pretty heavy hammer already. You may need to rejig this to
  account for the loop

- receiver
        // Ack the interrupt, all further memory accesses are ordered
        // after it
	irq = readl();
	// Order all further loads after this
	ipis = atomic_xchg_acquire(&vipi[me], 0);
	// handle all interrupts described in ipis, which may include
	// more than a single source
	handle_ipis(ipis);

  Here, we may end-up with multiple IPIs (#0 and #1 in your
  example). It doesn't really matter, and the only ill effect is a
  potential spurious IPI if we have consumed more than a single one.

> What I'm not sure is how the smp_ ops order with regard to
> writel_relaxed. It seems like mb() is dsb(sy) and smp_mb() is dmb(ish)
> and the atomic versions just default to aliasing smp_mb()). An
> inner-sharable dmb doesn't sound like it would safely satisfy this
> requirement, as MMIO out to AIC is involved (and we don't know if it's
> in the inner sharable domain or not for these purposes). Since the
> MMIO is nGnRnE, I would expect that a dsb(sy) would satisfy this
> requirement, as then the write op really shouldn't complete until it
> has taken effect in AIC.

In the example above, I used a non-relaxed write, as it guarantees
that it is ordered after any DMA agent that can access the data
ordered before (and I assume that if DMA can see it, another CPU can
as well).

I'm not convinced that the DSB(SY) solves anything here, *unless*
there are some other rules specific to the AIC. DSB doesn't guarantee
that the device has actually changed state as a consequence of the
MMIO access. If I remember well, you actually need a read from the
same device to ensure the state has been changed. But after all, a
state change isn't what you're after, only ordering.

> 
> >> +	/*
> >> +	 * Make sure the kernel's idea of logical CPU order is the same as AIC's
> >> +	 * If we ever end up with a mismatch here, we will have to introduce
> >> +	 * a mapping table similar to what other irqchip drivers do.
> >> +	 */
> >> +	WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id());
> > 
> > This is unlikely to work as soon as you get kexec up and running. You
> > may not have to worry about this for some time...
> 
> Ah, can kexec randomly shuffle CPUs around?

Not completely randomly ;-). kexec can execute on any CPU, and all the
others will be shut down. When that CPU enters the secondary kernel,
it will be CPU0, no matter what it was in the previous instance.

> The solution here is obvious, but at this point I'm more keen on
> punting this to a future patch instead of introducing more complexity
> into the initial series; gotta leave behind some bugs to fix later
> :)

I think that's fine for now. kexec is a long way away (we need to
solve the firmware story first), so keeping it on the back burner
seems like a sensible approach.

Thanks,

	M.
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 9fe723033e63..a8f258fbb5f1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1636,6 +1636,8 @@  T:	git https://github.com/AsahiLinux/linux.git
 F:	Documentation/devicetree/bindings/arm/apple.yaml
 F:	Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
 F:	arch/arm64/include/asm/sysreg_apple.h
+F:	drivers/irqchip/irq-apple-aic.c
+F:	include/dt-bindings/interrupt-controller/apple-aic.h
 
 ARM/ARTPEC MACHINE SUPPORT
 M:	Jesper Nilsson <jesper.nilsson@axis.com>
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index b147f22a78f4..fca080640c1a 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -590,4 +590,14 @@  config MST_IRQ
 	help
 	  Support MStar Interrupt Controller.
 
+config APPLE_AIC
+	bool "Apple Interrupt Controller (AIC)"
+	depends on ARM64
+	default ARCH_APPLE
+	select IRQ_DOMAIN
+	select IRQ_DOMAIN_HIERARCHY
+	help
+	  Support for the Apple Interrupt Controller found on Apple Silicon SoCs,
+	  such as the M1.
+
 endmenu
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 0ac93bfaec61..0e2ba7c2dce7 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -113,3 +113,4 @@  obj-$(CONFIG_LOONGSON_PCH_PIC)		+= irq-loongson-pch-pic.o
 obj-$(CONFIG_LOONGSON_PCH_MSI)		+= irq-loongson-pch-msi.o
 obj-$(CONFIG_MST_IRQ)			+= irq-mst-intc.o
 obj-$(CONFIG_SL28CPLD_INTC)		+= irq-sl28cpld.o
+obj-$(CONFIG_APPLE_AIC)			+= irq-apple-aic.o
diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
new file mode 100644
index 000000000000..604dbb8fd898
--- /dev/null
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -0,0 +1,647 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright The Asahi Linux Contributors
+ *
+ * Based on irq-lpc32xx:
+ *   Copyright 2015-2016 Vladimir Zapolskiy <vz@mleia.com>
+ * Based on irq-bcm2836:
+ *   Copyright 2015 Broadcom
+ */
+
+/*
+ * AIC is a fairly simple interrupt controller with the following features:
+ *
+ * - 896 level-triggered hardware IRQs
+ *   - Single mask bit per IRQ
+ *   - Per-IRQ affinity setting
+ *   - Automatic masking on event delivery (auto-ack)
+ *   - Software triggering (ORed with hw line)
+ * - 2 per-CPU IPIs (meant as "self" and "other", but they are interchangeable if not symmetric)
+ * - Automatic prioritization (single event/ack register per CPU, lower IRQs = higher priority)
+ * - Automatic masking on ack
+ * - Default "this CPU" register view and explicit per-CPU views
+ *
+ * In addition, this driver also handles FIQs, as these are routed to the same IRQ vector. These
+ * are used for Fast IPIs (TODO), the ARMv8 timer IRQs, and performance counters (TODO).
+ *
+ * Implementation notes:
+ *
+ * - This driver creates two IRQ domains, one for HW IRQs and internal FIQs, and one for IPIs
+ * - Since Linux needs more than 2 IPIs, we implement a software IRQ controller and funnel all IPIs
+ *   into one per-CPU IPI (the second "self" IPI is unused).
+ * - FIQ hwirq numbers are assigned after true hwirqs, and are per-cpu
+ * - DT bindings use 3-cell form (like GIC):
+ *   - <0 nr flags> - hwirq #nr
+ *   - <1 nr flags> - FIQ #nr
+ *     - nr=0  Physical HV timer
+ *     - nr=1  Virtual HV timer
+ *     - nr=2  Physical guest timer
+ *     - nr=3  Virtual guest timer
+ *
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/cpuhotplug.h>
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <asm/exception.h>
+#include <asm/sysreg.h>
+#include <asm/sysreg_apple.h>
+
+#include <dt-bindings/interrupt-controller/apple-aic.h>
+
+#define AIC_INFO		0x0004
+#define AIC_INFO_NR_HW		GENMASK(15, 0)
+
+#define AIC_CONFIG		0x0010
+
+#define AIC_WHOAMI		0x2000
+#define AIC_EVENT		0x2004
+#define AIC_EVENT_TYPE		GENMASK(31, 16)
+#define AIC_EVENT_NUM		GENMASK(15, 0)
+
+#define AIC_EVENT_TYPE_HW	1
+#define AIC_EVENT_TYPE_IPI	4
+#define AIC_EVENT_IPI_OTHER	1
+#define AIC_EVENT_IPI_SELF	2
+
+#define AIC_IPI_SEND		0x2008
+#define AIC_IPI_ACK		0x200c
+#define AIC_IPI_MASK_SET	0x2024
+#define AIC_IPI_MASK_CLR	0x2028
+
+#define AIC_IPI_SEND_CPU(cpu)	BIT(cpu)
+
+#define AIC_IPI_OTHER		BIT(0)
+#define AIC_IPI_SELF		BIT(31)
+
+#define AIC_TARGET_CPU		0x3000
+#define AIC_SW_SET		0x4000
+#define AIC_SW_CLR		0x4080
+#define AIC_MASK_SET		0x4100
+#define AIC_MASK_CLR		0x4180
+
+#define AIC_CPU_IPI_SET(cpu)	(0x5008 + ((cpu) << 7))
+#define AIC_CPU_IPI_CLR(cpu)	(0x500c + ((cpu) << 7))
+#define AIC_CPU_IPI_MASK_SET(cpu) (0x5024 + ((cpu) << 7))
+#define AIC_CPU_IPI_MASK_CLR(cpu) (0x5028 + ((cpu) << 7))
+
+#define MASK_REG(x)		(4 * ((x) >> 5))
+#define MASK_BIT(x)		BIT((x) & 0x1f)
+
+#define AIC_NR_FIQ		4
+#define AIC_NR_SWIPI		32
+
+/*
+ * Max 31 bits in IPI SEND register (top bit is self).
+ * >=32-core chips will need code changes anyway.
+ */
+#define AIC_MAX_CPUS		31
+
+struct aic_irq_chip {
+	void __iomem *base;
+	struct irq_domain *hw_domain;
+	struct irq_domain *ipi_domain;
+	int nr_hw;
+	int ipi_hwirq;
+};
+
+static atomic_t aic_vipi_flag[AIC_MAX_CPUS];
+static atomic_t aic_vipi_mask[AIC_MAX_CPUS];
+
+static struct aic_irq_chip *aic_irqc;
+
+static void aic_handle_ipi(struct pt_regs *regs);
+
+static u32 aic_ic_read(struct aic_irq_chip *ic, u32 reg)
+{
+	return readl_relaxed(ic->base + reg);
+}
+
+static void aic_ic_write(struct aic_irq_chip *ic, u32 reg, u32 val)
+{
+	writel_relaxed(val, ic->base + reg);
+}
+
+/*
+ * IRQ irqchip
+ */
+
+static void aic_irq_mask(struct irq_data *d)
+{
+	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
+
+	aic_ic_write(ic, AIC_MASK_SET + MASK_REG(d->hwirq),
+		     MASK_BIT(d->hwirq));
+}
+
+static void aic_irq_unmask(struct irq_data *d)
+{
+	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
+
+	aic_ic_write(ic, AIC_MASK_CLR + MASK_REG(d->hwirq),
+		     MASK_BIT(d->hwirq));
+}
+
+static void aic_irq_eoi(struct irq_data *d)
+{
+	/*
+	 * Reading the interrupt reason automatically acknowledges and masks
+	 * the IRQ, so we just unmask it here if needed.
+	 */
+	if (!irqd_irq_disabled(d) && !irqd_irq_masked(d))
+		aic_irq_unmask(d);
+}
+
+static void aic_handle_irq(struct pt_regs *regs)
+{
+	struct aic_irq_chip *ic = aic_irqc;
+	u32 event, type, irq;
+
+	do {
+		/*
+		 * We cannot use a relaxed read here, as DMA needs to be
+		 * ordered with respect to the IRQ firing.
+		 */
+		event = readl(ic->base + AIC_EVENT);
+		type = FIELD_GET(AIC_EVENT_TYPE, event);
+		irq = FIELD_GET(AIC_EVENT_NUM, event);
+
+		if (type == AIC_EVENT_TYPE_HW)
+			handle_domain_irq(aic_irqc->hw_domain, irq, regs);
+		else if (type == AIC_EVENT_TYPE_IPI && irq == 1)
+			aic_handle_ipi(regs);
+		else if (event != 0)
+			pr_err("Unknown IRQ event %d, %d\n", type, irq);
+	} while (event);
+
+	/*
+	 * vGIC maintenance interrupts end up here too, so we need to check
+	 * for them separately. Just report and disable vGIC for now, until
+	 * we implement this properly.
+	 */
+	if ((read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EN) &&
+		read_sysreg_s(SYS_ICH_MISR_EL2) != 0) {
+		pr_err("vGIC IRQ fired, disabling.\n");
+		sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0);
+	}
+}
+
+static int aic_irq_set_affinity(struct irq_data *d,
+				const struct cpumask *mask_val, bool force)
+{
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
+	int cpu;
+
+	if (hwirq > ic->nr_hw)
+		return -EINVAL;
+
+	if (force)
+		cpu = cpumask_first(mask_val);
+	else
+		cpu = cpumask_any_and(mask_val, cpu_online_mask);
+
+	aic_ic_write(ic, AIC_TARGET_CPU + hwirq * 4, BIT(cpu));
+	irq_data_update_effective_affinity(d, cpumask_of(cpu));
+
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip aic_chip = {
+	.name = "AIC",
+	.irq_mask = aic_irq_mask,
+	.irq_unmask = aic_irq_unmask,
+	.irq_eoi = aic_irq_eoi,
+	.irq_set_affinity = aic_irq_set_affinity,
+};
+
+/*
+ * FIQ irqchip
+ */
+
+static void aic_fiq_mask(struct irq_data *d)
+{
+	/* Only the guest timers have real mask bits, unfortunately. */
+	switch (d->hwirq) {
+	case AIC_TMR_GUEST_PHYS:
+		sysreg_clear_set_s(SYS_APL_VM_TMR_MASK,
+					VM_TMR_MASK_P, 0);
+		break;
+	case AIC_TMR_GUEST_VIRT:
+		sysreg_clear_set_s(SYS_APL_VM_TMR_MASK,
+					VM_TMR_MASK_V, 0);
+		break;
+	}
+}
+
+static void aic_fiq_unmask(struct irq_data *d)
+{
+	switch (d->hwirq) {
+	case AIC_TMR_GUEST_PHYS:
+		sysreg_clear_set_s(SYS_APL_VM_TMR_MASK,
+					0, VM_TMR_MASK_P);
+		break;
+	case AIC_TMR_GUEST_VIRT:
+		sysreg_clear_set_s(SYS_APL_VM_TMR_MASK,
+					0, VM_TMR_MASK_V);
+		break;
+	}
+}
+
+static void aic_fiq_eoi(struct irq_data *d)
+{
+	/* We mask to ack (where we can), so we need to unmask at EOI. */
+	if (!irqd_irq_disabled(d) && !irqd_irq_masked(d))
+		aic_fiq_unmask(d);
+}
+
+#define TIMER_FIRING(x)                                                        \
+	(((x) & (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_MASK |            \
+		 ARCH_TIMER_CTRL_IT_STAT)) ==                                  \
+	 (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
+
+static void aic_handle_fiq(struct pt_regs *regs)
+{
+	/*
+	 * It would be really if we had a system register that lets us get
+	 * the FIQ source state without having to peek down into sources...
+	 * but such a register does not seem to exist.
+	 *
+	 * So, we have these potential sources to test for:
+	 *  - Fast IPIs (not yet used)
+	 *  - The 4 timers (CNTP, CNTV for each of HV and guest)
+	 *  - Per-core PMCs (not yet supported)
+	 *  - Per-cluster uncore PMCs (not yet supported)
+	 *
+	 * Since not dealing with any of these results in a FIQ storm,
+	 * we check for everything here, even things we don't support yet.
+	 */
+
+	if (read_sysreg_s(SYS_APL_IPI_SR) & IPI_SR_PENDING) {
+		pr_warn("Fast IPI fired. Acking.\n");
+		write_sysreg_s(IPI_SR_PENDING, SYS_APL_IPI_SR);
+	}
+
+	if (TIMER_FIRING(read_sysreg(cntp_ctl_el0)))
+		handle_domain_irq(aic_irqc->hw_domain,
+				  aic_irqc->nr_hw + AIC_TMR_HV_PHYS, regs);
+
+	if (TIMER_FIRING(read_sysreg(cntv_ctl_el0)))
+		handle_domain_irq(aic_irqc->hw_domain,
+				  aic_irqc->nr_hw + AIC_TMR_HV_VIRT, regs);
+
+	if (TIMER_FIRING(read_sysreg_s(SYS_CNTP_CTL_EL02)))
+		handle_domain_irq(aic_irqc->hw_domain,
+				  aic_irqc->nr_hw + AIC_TMR_GUEST_PHYS, regs);
+
+	if (TIMER_FIRING(read_sysreg_s(SYS_CNTV_CTL_EL02)))
+		handle_domain_irq(aic_irqc->hw_domain,
+				  aic_irqc->nr_hw + AIC_TMR_GUEST_VIRT, regs);
+
+	if ((read_sysreg_s(SYS_APL_PMCR0) & (PMCR0_IMODE | PMCR0_IACT))
+			== (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) {
+		/*
+		 * Not supported yet, let's figure out how to handle this when
+		 * we implement these proprietary performance counters. For now,
+		 * just mask it and move on.
+		 */
+		pr_warn("PMC FIQ fired. Masking.\n");
+		sysreg_clear_set_s(SYS_APL_PMCR0, PMCR0_IMODE | PMCR0_IACT,
+				   FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF));
+	}
+
+	if (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_APL_UPMCR0)) == UPMCR0_IMODE_FIQ &&
+			(read_sysreg_s(SYS_APL_UPMSR) & UPMSR_IACT)) {
+		/* Same story with uncore PMCs */
+		pr_warn("Uncore PMC FIQ fired. Masking.\n");
+		sysreg_clear_set_s(SYS_APL_UPMCR0, UPMCR0_IMODE,
+				   FIELD_PREP(UPMCR0_IMODE, UPMCR0_IMODE_OFF));
+	}
+}
+
+static struct irq_chip fiq_chip = {
+	.name = "AIC-FIQ",
+	.irq_mask = aic_fiq_mask,
+	.irq_unmask = aic_fiq_unmask,
+	.irq_ack = aic_fiq_mask,
+	.irq_eoi = aic_fiq_eoi,
+};
+
+/*
+ * Main IRQ domain
+ */
+
+static void __exception_irq_entry aic_handle_irq_or_fiq(struct pt_regs *regs)
+{
+	u64 isr = read_sysreg(isr_el1);
+
+	if (isr & PSR_F_BIT)
+		aic_handle_fiq(regs);
+
+	if (isr & PSR_I_BIT)
+		aic_handle_irq(regs);
+}
+
+static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq,
+			      irq_hw_number_t hw)
+{
+	struct aic_irq_chip *ic = id->host_data;
+
+	irq_set_chip_data(irq, ic);
+	if (hw < ic->nr_hw) {
+		irq_set_chip_and_handler(irq, &aic_chip, handle_fasteoi_irq);
+	} else {
+		irq_set_percpu_devid(irq);
+		irq_set_chip_and_handler(irq, &fiq_chip,
+					 handle_percpu_devid_irq);
+	}
+
+	irq_set_status_flags(irq, IRQ_LEVEL);
+	irq_set_noprobe(irq);
+
+	return 0;
+}
+
+static void aic_irq_domain_unmap(struct irq_domain *id, unsigned int irq)
+{
+	irq_set_chip_and_handler(irq, NULL, NULL);
+}
+
+static int aic_irq_domain_xlate(struct irq_domain *id,
+				struct device_node *ctrlr, const u32 *intspec,
+				unsigned int intsize,
+				irq_hw_number_t *out_hwirq,
+				unsigned int *out_type)
+{
+	struct aic_irq_chip *ic = id->host_data;
+
+	if (intsize != 3)
+		return -EINVAL;
+
+	if (intspec[0] == AIC_IRQ && intspec[1] < ic->nr_hw)
+		*out_hwirq = intspec[1];
+	else if (intspec[0] == AIC_FIQ && intspec[1] < AIC_NR_FIQ)
+		*out_hwirq = ic->nr_hw + intspec[1];
+	else
+		return -EINVAL;
+
+	*out_type = intspec[2] & IRQ_TYPE_SENSE_MASK;
+
+	return 0;
+}
+
+static const struct irq_domain_ops aic_irq_domain_ops = {
+	.map = aic_irq_domain_map,
+	.unmap = aic_irq_domain_unmap,
+	.xlate = aic_irq_domain_xlate,
+};
+
+/*
+ * IPI irqchip
+ */
+
+static void aic_ipi_mask(struct irq_data *d)
+{
+	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
+	u32 irq_bit = BIT(irqd_to_hwirq(d));
+	int this_cpu = smp_processor_id();
+
+	atomic_and(~irq_bit, &aic_vipi_mask[this_cpu]);
+
+	if (!atomic_read(&aic_vipi_mask[this_cpu]))
+		aic_ic_write(ic, AIC_IPI_MASK_SET, AIC_IPI_OTHER);
+}
+
+static void aic_ipi_unmask(struct irq_data *d)
+{
+	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
+	u32 irq_bit = BIT(irqd_to_hwirq(d));
+	int this_cpu = smp_processor_id();
+
+	atomic_or(irq_bit, &aic_vipi_mask[this_cpu]);
+
+	aic_ic_write(ic, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+}
+
+static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
+{
+	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
+	u32 irq_bit = BIT(irqd_to_hwirq(d));
+	u32 send = 0;
+	int cpu;
+
+	/*
+	 * Ensure that stores to normal memory are visible to the
+	 * other CPUs before issuing the IPI. This needs to happen
+	 * before setting any vIPI flag bits, since that can race the
+	 * atomic_xchg below.
+	 */
+	wmb();
+
+	for_each_cpu(cpu, mask) {
+		if (atomic_read(&aic_vipi_mask[cpu]) & irq_bit) {
+			atomic_or(irq_bit, &aic_vipi_flag[cpu]);
+			send |= AIC_IPI_SEND_CPU(cpu);
+		}
+	}
+
+	if (send) {
+		/*
+		 * Ensure that the vIPI flag writes complete before issuing
+		 * the physical IPI.
+		 */
+		wmb();
+		aic_ic_write(ic, AIC_IPI_SEND, send);
+	}
+}
+
+static struct irq_chip ipi_chip = {
+	.name = "AIC-IPI",
+	.irq_mask = aic_ipi_mask,
+	.irq_unmask = aic_ipi_unmask,
+	.ipi_send_mask = aic_ipi_send_mask,
+};
+
+/*
+ * IPI IRQ domain
+ */
+
+static void aic_handle_ipi(struct pt_regs *regs)
+{
+	int this_cpu = smp_processor_id();
+	int i;
+	unsigned long firing;
+
+	aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);
+
+	/*
+	 * Ensure that we've received and acked the IPI before we load the vIPI
+	 * flags. This pairs with the second wmb() above.
+	 */
+	mb();
+
+	firing = atomic_xchg(&aic_vipi_flag[this_cpu], 0);
+
+	/*
+	 * Ensure that we've exchanged the vIPI flags before running any IPI
+	 * handler code. This pairs with the first wmb() above.
+	 */
+	rmb();
+
+	for_each_set_bit(i, &firing, AIC_NR_SWIPI) {
+		handle_domain_irq(aic_irqc->ipi_domain, i, regs);
+	}
+
+	aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+}
+
+static int aic_ipi_alloc(struct irq_domain *d, unsigned int virq,
+			 unsigned int nr_irqs, void *args)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_set_percpu_devid(virq + i);
+		irq_domain_set_info(d, virq + i, i, &ipi_chip, d->host_data,
+				    handle_percpu_devid_irq, NULL, NULL);
+	}
+
+	return 0;
+}
+
+static void aic_ipi_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs)
+{
+	/* Not freeing IPIs */
+}
+
+static const struct irq_domain_ops aic_ipi_domain_ops = {
+	.alloc = aic_ipi_alloc,
+	.free = aic_ipi_free,
+};
+
+static int aic_init_smp(struct aic_irq_chip *irqc, struct device_node *node)
+{
+	int base_ipi;
+
+	irqc->ipi_domain = irq_domain_create_linear(irqc->hw_domain->fwnode, AIC_NR_SWIPI,
+						    &aic_ipi_domain_ops, irqc);
+	if (WARN_ON(!irqc->ipi_domain))
+		return -ENODEV;
+
+	irqc->ipi_domain->flags |= IRQ_DOMAIN_FLAG_IPI_SINGLE;
+	irq_domain_update_bus_token(irqc->ipi_domain, DOMAIN_BUS_IPI);
+
+	base_ipi = __irq_domain_alloc_irqs(irqc->ipi_domain, -1, AIC_NR_SWIPI,
+					   NUMA_NO_NODE, NULL, false, NULL);
+
+	if (WARN_ON(!base_ipi)) {
+		irq_domain_remove(irqc->ipi_domain);
+		return -ENODEV;
+	}
+
+	set_smp_ipi_range(base_ipi, AIC_NR_SWIPI);
+
+	return 0;
+}
+
+static int aic_init_cpu(unsigned int cpu)
+{
+	/* Mask all hard-wired per-CPU IRQ/FIQ sources */
+
+	/* vGIC maintenance IRQ */
+	sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0);
+
+	/* Pending Fast IPI FIQs */
+	write_sysreg_s(IPI_SR_PENDING, SYS_APL_IPI_SR);
+
+	/* Timer FIQs */
+	sysreg_clear_set(cntp_ctl_el0, 0, ARCH_TIMER_CTRL_IT_MASK);
+	sysreg_clear_set(cntv_ctl_el0, 0, ARCH_TIMER_CTRL_IT_MASK);
+	sysreg_clear_set_s(SYS_CNTP_CTL_EL02, 0, ARCH_TIMER_CTRL_IT_MASK);
+	sysreg_clear_set_s(SYS_CNTV_CTL_EL02, 0, ARCH_TIMER_CTRL_IT_MASK);
+
+	/* PMC FIQ */
+	sysreg_clear_set_s(SYS_APL_PMCR0, PMCR0_IMODE | PMCR0_IACT,
+			   FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF));
+
+	/* Uncore PMC FIQ */
+	sysreg_clear_set_s(SYS_APL_UPMCR0, UPMCR0_IMODE,
+			   FIELD_PREP(UPMCR0_IMODE, UPMCR0_IMODE_OFF));
+
+	/*
+	 * Make sure the kernel's idea of logical CPU order is the same as AIC's
+	 * If we ever end up with a mismatch here, we will have to introduce
+	 * a mapping table similar to what other irqchip drivers do.
+	 */
+	WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id());
+
+	return 0;
+
+}
+
+static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)
+{
+	int i;
+	void __iomem *regs;
+	u32 info;
+	struct aic_irq_chip *irqc;
+
+	regs = of_iomap(node, 0);
+	if (WARN_ON(!regs))
+		return -EIO;
+
+	irqc = kzalloc(sizeof(*irqc), GFP_KERNEL);
+	if (!irqc)
+		return -ENOMEM;
+
+	aic_irqc = irqc;
+	irqc->base = regs;
+
+	info = aic_ic_read(irqc, AIC_INFO);
+	irqc->nr_hw = FIELD_GET(AIC_INFO_NR_HW, info);
+
+	irqc->hw_domain = irq_domain_add_linear(node, irqc->nr_hw + AIC_NR_FIQ,
+						&aic_irq_domain_ops, irqc);
+	if (WARN_ON(!irqc->hw_domain)) {
+		iounmap(irqc->base);
+		kfree(irqc);
+		return -ENODEV;
+	}
+
+	irq_domain_update_bus_token(irqc->hw_domain, DOMAIN_BUS_WIRED);
+
+	if (aic_init_smp(irqc, node)) {
+		irq_domain_remove(irqc->hw_domain);
+		iounmap(irqc->base);
+		kfree(irqc);
+		return -ENODEV;
+	}
+
+	set_handle_irq(aic_handle_irq_or_fiq);
+
+	for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)
+		aic_ic_write(irqc, AIC_MASK_SET + i * 4, ~0);
+	for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)
+		aic_ic_write(irqc, AIC_SW_CLR + i * 4, ~0);
+	for (i = 0; i < irqc->nr_hw; i++)
+		aic_ic_write(irqc, AIC_TARGET_CPU + i * 4, 1);
+
+	cpuhp_setup_state(CPUHP_AP_IRQ_APPLE_AIC_STARTING,
+			  "irqchip/apple-aic/ipi:starting",
+			  aic_init_cpu, NULL);
+
+	pr_info("AIC: initialized with %d IRQs, %d FIQs, %d vIPIs\n",
+		irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI);
+
+	return 0;
+}
+
+IRQCHIP_DECLARE(apple_m1_aic, "apple,aic", aic_of_ic_init);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0042ef362511..7ea1fc537f12 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -100,6 +100,7 @@  enum cpuhp_state {
 	CPUHP_AP_CPU_PM_STARTING,
 	CPUHP_AP_IRQ_GIC_STARTING,
 	CPUHP_AP_IRQ_HIP04_STARTING,
+	CPUHP_AP_IRQ_APPLE_AIC_STARTING,
 	CPUHP_AP_IRQ_ARMADA_XP_STARTING,
 	CPUHP_AP_IRQ_BCM2836_STARTING,
 	CPUHP_AP_IRQ_MIPS_GIC_STARTING,