diff mbox series

[08/18] arm64: cpufeature: Add a feature for FIQ support

Message ID 20210204203951.52105-9-marcan@marcan.st (mailing list archive)
State Changes Requested
Headers show
Series Apple M1 SoC platform bring-up | expand

Commit Message

Hector Martin Feb. 4, 2021, 8:39 p.m. UTC
Apple ARM SoCs (A11 and newer) have some interrupt sources hard-wired to
the FIQ line. Introduce a cpufeature that can be used to enable FIQ
unmasking and handling via alternatives.

This is currently enabled for all Apple CPUs. If/when support is
implemented for older (pre-A11) iPhone/iPad SoCs which do not need FIQs,
or if newer SoCs are released without the FIQ requirement, we can
revisit the condition.

Signed-off-by: Hector Martin <marcan@marcan.st>
---
 arch/arm64/Kconfig                  | 10 +++++++++
 arch/arm64/include/asm/cpucaps.h    |  3 ++-
 arch/arm64/include/asm/cpufeature.h |  6 ++++++
 arch/arm64/include/asm/cputype.h    |  1 +
 arch/arm64/kernel/cpufeature.c      | 32 +++++++++++++++++++++++++++++
 5 files changed, 51 insertions(+), 1 deletion(-)

Comments

Marc Zyngier Feb. 6, 2021, 1:58 p.m. UTC | #1
Hi Hector,

On Thu, 04 Feb 2021 20:39:41 +0000,
Hector Martin <marcan@marcan.st> wrote:
> 
> Apple ARM SoCs (A11 and newer) have some interrupt sources hard-wired to
> the FIQ line. Introduce a cpufeature that can be used to enable FIQ
> unmasking and handling via alternatives.
> 
> This is currently enabled for all Apple CPUs. If/when support is
> implemented for older (pre-A11) iPhone/iPad SoCs which do not need FIQs,
> or if newer SoCs are released without the FIQ requirement, we can
> revisit the condition.
> 
> Signed-off-by: Hector Martin <marcan@marcan.st>
> ---
>  arch/arm64/Kconfig                  | 10 +++++++++
>  arch/arm64/include/asm/cpucaps.h    |  3 ++-
>  arch/arm64/include/asm/cpufeature.h |  6 ++++++
>  arch/arm64/include/asm/cputype.h    |  1 +
>  arch/arm64/kernel/cpufeature.c      | 32 +++++++++++++++++++++++++++++
>  5 files changed, 51 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index f39568b28ec1..11cfdc07404f 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1756,6 +1756,16 @@ config ARM64_DEBUG_PRIORITY_MASKING
>  	  If unsure, say N
>  endif
>  
> +config ARM64_FIQ_SUPPORT
> +	bool "Support for FIQ interrupts"
> +	help
> +	  Adds support for handling FIQ interrupts as normal IRQs.
> +	  This is required on Apple platforms where some IRQ sources are
> +	  hardwired to the FIQ interrupt line.
> +
> +	  FIQs are only enabled at runtime on platforms that require them
> +	  via the CPU feature framework.
> +

This definitely should be selected by CONFIG_ARCH_APPLE. Otherwise,
you can easily end-up with a non-working system.

>  config RELOCATABLE
>  	bool "Build a relocatable kernel image" if EXPERT
>  	select ARCH_HAS_RELR
> diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
> index b77d997b173b..c36d926ad801 100644
> --- a/arch/arm64/include/asm/cpucaps.h
> +++ b/arch/arm64/include/asm/cpucaps.h
> @@ -66,7 +66,8 @@
>  #define ARM64_WORKAROUND_1508412		58
>  #define ARM64_HAS_LDAPR				59
>  #define ARM64_KVM_PROTECTED_MODE		60
> +#define ARM64_NEEDS_FIQ				61
>  
> -#define ARM64_NCAPS				61
> +#define ARM64_NCAPS				62
>  
>  #endif /* __ASM_CPUCAPS_H */
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index 9a555809b89c..3a00cfb347c9 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -716,6 +716,12 @@ static __always_inline bool system_uses_irq_prio_masking(void)
>  	       cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING);
>  }
>  
> +static __always_inline bool system_uses_fiqs(void)

nit: fiq, not fiqs.

> +{
> +	return IS_ENABLED(CONFIG_ARM64_FIQ_SUPPORT) &&
> +	       cpus_have_const_cap(ARM64_NEEDS_FIQ);
> +}
> +
>  static inline bool system_supports_mte(void)
>  {
>  	return IS_ENABLED(CONFIG_ARM64_MTE) &&
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index ef5b040dee44..2084a0340d16 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -59,6 +59,7 @@
>  #define ARM_CPU_IMP_NVIDIA		0x4E
>  #define ARM_CPU_IMP_FUJITSU		0x46
>  #define ARM_CPU_IMP_HISI		0x48
> +#define ARM_CPU_IMP_APPLE		0x61
>  
>  #define ARM_CPU_PART_AEM_V8		0xD0F
>  #define ARM_CPU_PART_FOUNDATION		0xD00
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index e99eddec0a46..0863cf7cf807 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -1237,6 +1237,29 @@ static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
>  	return ctr & BIT(CTR_IDC_SHIFT);
>  }
>  
> +static void cpu_sync_irq_to_fiq(struct arm64_cpu_capabilities const *cap)
> +{
> +	u64 daif = read_sysreg(daif);
> +
> +	/*
> +	 * By this point in the boot process IRQs are likely masked and FIOs
> +	 * aren't, so we need to sync things to avoid spurious early FIQs.
> +	 */
> +
> +	if (daif & PSR_I_BIT)
> +		daif |= PSR_F_BIT;
> +	else
> +		daif &= ~PSR_F_BIT;
> +
> +	write_sysreg(daif, daif);

Could this happen too late? If, as explained above, we can get a FIQ
until we mask it here, what prevents something (a timer?) from kicking
and creating havoc just before the sync?

If the answer is "nothing", then it probably means that the default
behaviour should be to treat PSTATE.I and PSTATE.F as containing the
same value at all times, and not just as an afterthought when we
detect that we're on a CPU type or another.

This could expand into enabling Group-0 interrupts with GICv3 on
systems that have a single security state (such as virtual machines),
though I don't really see a good use case for it.

> +}
> +
> +static bool needs_fiq(const struct arm64_cpu_capabilities *entry, int __unused)
> +{
> +	/* All supported Apple cores need this */
> +	return read_cpuid_implementor() == ARM_CPU_IMP_APPLE;
> +}
> +
>  static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused)
>  {
>  	/*
> @@ -2154,6 +2177,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>  		.matches = has_cpuid_feature,
>  		.min_field_value = 1,
>  	},
> +#ifdef CONFIG_ARM64_FIQ_SUPPORT
> +	{
> +		.desc = "FIQs",
> +		.capability = ARM64_NEEDS_FIQ,
> +		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
> +		.matches = needs_fiq,
> +		.cpu_enable = cpu_sync_irq_to_fiq,
> +	},
> +#endif
>  	{},
>  };

Thanks,

	M.
Hector Martin Feb. 7, 2021, 8:28 a.m. UTC | #2
On 06/02/2021 22.58, Marc Zyngier wrote:
> Hector Martin <marcan@marcan.st> wrote:
>> +static void cpu_sync_irq_to_fiq(struct arm64_cpu_capabilities const *cap)
>> +{
>> +	u64 daif = read_sysreg(daif);
>> +
>> +	/*
>> +	 * By this point in the boot process IRQs are likely masked and FIOs
>> +	 * aren't, so we need to sync things to avoid spurious early FIQs.
>> +	 */
>> +
>> +	if (daif & PSR_I_BIT)
>> +		daif |= PSR_F_BIT;
>> +	else
>> +		daif &= ~PSR_F_BIT;
>> +
>> +	write_sysreg(daif, daif);
> 
> Could this happen too late? If, as explained above, we can get a FIQ
> until we mask it here, what prevents something (a timer?) from kicking
> and creating havoc just before the sync?

Nothing, other than timers not being enabled this early (hopefully the 
bootloader doesn't leave a rogue timer running for us...)

> If the answer is "nothing", then it probably means that the default
> behaviour should be to treat PSTATE.I and PSTATE.F as containing the
> same value at all times, and not just as an afterthought when we
> detect that we're on a CPU type or another.

I thought of this too. Originally I thought PSTATE.F was always set on 
other systems, and thus unmasking FIQs could cause problems if there is 
a pending rogue FIQ for some reason. However, while writing this patch I 
realized that as part of normal process state changes we already unmask 
FIQs anyway (see DAIF_PROCCTX).

Thus, in fact, this patch actually changes things (when the cpufeature 
is set) to mask FIQs in some cases where they currently aren't, and 
conversely to unmask them in some cases where they currently are. But 
the fact that FIQ masking is somewhat inconsistent to begin with 
suggests that we should be able to mess with it without causing breakage 
for other systems.

So I guess in this case it would be legitimate to just make I==F on 
every system, and if something breaks it should be fixed by making 
whatever is causing a rogue FIQ not do that, right?

That would leave the vector switcheroo as the only thing the cpufeature 
does, which would certainly simplify a lot of the patch.

> This could expand into enabling Group-0 interrupts with GICv3 on
> systems that have a single security state (such as virtual machines),
> though I don't really see a good use case for it.

I could see having a separate vector path opening up the door for 
performance hacks for very specific use cases that want really low 
latency for *one* thing (e.g. the mess the Raspberry Pi folks do to work 
around that braindead USB controller's performance issues), though I 
imagine there would have to be very compelling reasons to develop a 
framework to do this sanely upstream.

Incidentally, I have a personal interest in real-time performance 
(especially audio); once the dust settles and we have a workable kernel 
for normal use I do hope to spend some time taking a deep dive into 
latencies and finding RT-unfriendly code, but that's pretty far off 
right now. Maybe PREEMPT_RT will even be merged by then :-) (I hope that 
without SMM to screw things up on these machines they might make very 
nice RT-capable boxes...)
Marc Zyngier Feb. 8, 2021, 11:29 a.m. UTC | #3
On Sun, 07 Feb 2021 08:28:35 +0000,
Hector Martin 'marcan' <marcan@marcan.st> wrote:
> 
> On 06/02/2021 22.58, Marc Zyngier wrote:
> > Hector Martin <marcan@marcan.st> wrote:
> >> +static void cpu_sync_irq_to_fiq(struct arm64_cpu_capabilities const *cap)
> >> +{
> >> +	u64 daif = read_sysreg(daif);
> >> +
> >> +	/*
> >> +	 * By this point in the boot process IRQs are likely masked and FIOs
> >> +	 * aren't, so we need to sync things to avoid spurious early FIQs.
> >> +	 */
> >> +
> >> +	if (daif & PSR_I_BIT)
> >> +		daif |= PSR_F_BIT;
> >> +	else
> >> +		daif &= ~PSR_F_BIT;
> >> +
> >> +	write_sysreg(daif, daif);
> > 
> > Could this happen too late? If, as explained above, we can get a FIQ
> > until we mask it here, what prevents something (a timer?) from kicking
> > and creating havoc just before the sync?
> 
> Nothing, other than timers not being enabled this early (hopefully the
> bootloader doesn't leave a rogue timer running for us...)

I'm not sure we want to trust the FW on that particular front (no
offence intended...;-).

> 
> > If the answer is "nothing", then it probably means that the default
> > behaviour should be to treat PSTATE.I and PSTATE.F as containing the
> > same value at all times, and not just as an afterthought when we
> > detect that we're on a CPU type or another.
> 
> I thought of this too. Originally I thought PSTATE.F was always set on
> other systems, and thus unmasking FIQs could cause problems if there
> is a pending rogue FIQ for some reason. However, while writing this
> patch I realized that as part of normal process state changes we
> already unmask FIQs anyway (see DAIF_PROCCTX).
> 
> Thus, in fact, this patch actually changes things (when the cpufeature
> is set) to mask FIQs in some cases where they currently aren't, and
> conversely to unmask them in some cases where they currently are. But
> the fact that FIQ masking is somewhat inconsistent to begin with
> suggests that we should be able to mess with it without causing
> breakage for other systems.
> 
> So I guess in this case it would be legitimate to just make I==F on
> every system, and if something breaks it should be fixed by making
> whatever is causing a rogue FIQ not do that, right?

That is my current take on this patch. Nothing in the arm64 kernel
expects a FIQ today, so *when* a FIQ fires is pretty much irrelevant,
as long as we handle it properly (panic). Keeping the two bits in sync
is trivial, and shouldn't carry material overhead.

> That would leave the vector switcheroo as the only thing the
> cpufeature does, which would certainly simplify a lot of the patch.
> 
> > This could expand into enabling Group-0 interrupts with GICv3 on
> > systems that have a single security state (such as virtual machines),
> > though I don't really see a good use case for it.
> 
> I could see having a separate vector path opening up the door for
> performance hacks for very specific use cases that want really low
> latency for *one* thing (e.g. the mess the Raspberry Pi folks do to
> work around that braindead USB controller's performance issues),
> though I imagine there would have to be very compelling reasons to
> develop a framework to do this sanely upstream.

In general, this only works for single security state systems (systems
without EL3 and VMs), as FIQs are usually routed to the secure side
otherwise. If tied to a GIC, Group-0 interrupts aren't configurable
from NS either.

> Incidentally, I have a personal interest in real-time performance
> (especially audio); once the dust settles and we have a workable
> kernel for normal use I do hope to spend some time taking a deep dive
> into latencies and finding RT-unfriendly code, but that's pretty far
> off right now. Maybe PREEMPT_RT will even be merged by then :-) (I
> hope that without SMM to screw things up on these machines they might
> make very nice RT-capable boxes...)

Aside from the lack of programmable priority, the lack of convenient
masking for per-CPU interrupts is a bit of an issue...

Thanks,

	M.
Hector Martin Feb. 8, 2021, 3:51 p.m. UTC | #4
On 08/02/2021 20.29, Marc Zyngier wrote:
> I'm not sure we want to trust the FW on that particular front (no
> offence intended...;-).

Hey, I don't even *use* the timers IRQs; if they are unmasked it's 
iBoot's fault! :-)

> That is my current take on this patch. Nothing in the arm64 kernel
> expects a FIQ today, so *when* a FIQ fires is pretty much irrelevant,
> as long as we handle it properly (panic). Keeping the two bits in sync
> is trivial, and shouldn't carry material overhead.

Sounds good then, and again that simplifies a ton of stuff. Will go for 
that in v2.

> Aside from the lack of programmable priority, the lack of convenient
> masking for per-CPU interrupts is a bit of an issue...

Yeah... we'll see how that goes.
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f39568b28ec1..11cfdc07404f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1756,6 +1756,16 @@  config ARM64_DEBUG_PRIORITY_MASKING
 	  If unsure, say N
 endif
 
+config ARM64_FIQ_SUPPORT
+	bool "Support for FIQ interrupts"
+	help
+	  Adds support for handling FIQ interrupts as normal IRQs.
+	  This is required on Apple platforms where some IRQ sources are
+	  hardwired to the FIQ interrupt line.
+
+	  FIQs are only enabled at runtime on platforms that require them
+	  via the CPU feature framework.
+
 config RELOCATABLE
 	bool "Build a relocatable kernel image" if EXPERT
 	select ARCH_HAS_RELR
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index b77d997b173b..c36d926ad801 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -66,7 +66,8 @@ 
 #define ARM64_WORKAROUND_1508412		58
 #define ARM64_HAS_LDAPR				59
 #define ARM64_KVM_PROTECTED_MODE		60
+#define ARM64_NEEDS_FIQ				61
 
-#define ARM64_NCAPS				61
+#define ARM64_NCAPS				62
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9a555809b89c..3a00cfb347c9 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -716,6 +716,12 @@  static __always_inline bool system_uses_irq_prio_masking(void)
 	       cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING);
 }
 
+static __always_inline bool system_uses_fiqs(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_FIQ_SUPPORT) &&
+	       cpus_have_const_cap(ARM64_NEEDS_FIQ);
+}
+
 static inline bool system_supports_mte(void)
 {
 	return IS_ENABLED(CONFIG_ARM64_MTE) &&
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index ef5b040dee44..2084a0340d16 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -59,6 +59,7 @@ 
 #define ARM_CPU_IMP_NVIDIA		0x4E
 #define ARM_CPU_IMP_FUJITSU		0x46
 #define ARM_CPU_IMP_HISI		0x48
+#define ARM_CPU_IMP_APPLE		0x61
 
 #define ARM_CPU_PART_AEM_V8		0xD0F
 #define ARM_CPU_PART_FOUNDATION		0xD00
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e99eddec0a46..0863cf7cf807 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1237,6 +1237,29 @@  static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
 	return ctr & BIT(CTR_IDC_SHIFT);
 }
 
+static void cpu_sync_irq_to_fiq(struct arm64_cpu_capabilities const *cap)
+{
+	u64 daif = read_sysreg(daif);
+
+	/*
+	 * By this point in the boot process IRQs are likely masked and FIOs
+	 * aren't, so we need to sync things to avoid spurious early FIQs.
+	 */
+
+	if (daif & PSR_I_BIT)
+		daif |= PSR_F_BIT;
+	else
+		daif &= ~PSR_F_BIT;
+
+	write_sysreg(daif, daif);
+}
+
+static bool needs_fiq(const struct arm64_cpu_capabilities *entry, int __unused)
+{
+	/* All supported Apple cores need this */
+	return read_cpuid_implementor() == ARM_CPU_IMP_APPLE;
+}
+
 static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused)
 {
 	/*
@@ -2154,6 +2177,15 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.min_field_value = 1,
 	},
+#ifdef CONFIG_ARM64_FIQ_SUPPORT
+	{
+		.desc = "FIQs",
+		.capability = ARM64_NEEDS_FIQ,
+		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+		.matches = needs_fiq,
+		.cpu_enable = cpu_sync_irq_to_fiq,
+	},
+#endif
 	{},
 };