diff mbox series

[v3,5/5] arm64: irqflags: use alternative branches for pseudo-NMI logic

Message ID 20230130145429.903791-6-mark.rutland@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: pseudo-nmi: elide code when CONFIG_ARM64_PSEUDO_NMI=n | expand

Commit Message

Mark Rutland Jan. 30, 2023, 2:54 p.m. UTC
Due to the way we use alternatives in the irqflags code, even when
CONFIG_ARM64_PSEUDO_NMI=n, we generate unused alternative code for
pseudo-NMI management. This patch reworks the irqflags code to remove
the redundant code when CONFIG_ARM64_PSEUDO_NMI=n, which benefits the
more common case, and will permit further rework of our DAIF management
(e.g. in preparation for ARMv8.8-A's NMI feature).

Prior to this patch a defconfig kernel has hundreds of redundant
instructions to access ICC_PMR_EL1 (which should only need to be
manipulated in setup code), which this patch removes:

| [mark@lakrids:~/src/linux]% usekorg 12.1.0 aarch64-linux-objdump -d vmlinux-before-defconfig | grep icc_pmr_el1 | wc -l
| 885
| [mark@lakrids:~/src/linux]% usekorg 12.1.0 aarch64-linux-objdump -d vmlinux-after-defconfig | grep icc_pmr_el1 | wc -l
| 5

Those instructions alone account for more than 3KiB of kernel text, and
will be associated with additional alt_instr entries, padding and
branches, etc.

These redundant instructions exist because we use alternative sequences
for to choose between DAIF / PMR management in irqflags.h, and even when
CONFIG_ARM64_PSEUDO_NMI=n, those alternative sequences will generate the
code for PMR management, along with alt_instr entries. We use
alternatives here as this was necessary to ensure that we never
encounter a mismatched local_irq_save() ... local_irq_restore() sequence
in the middle of patching, which was possible to see if we used static
keys to choose between DAIF and PMR management.

Since commit:

  21fb26bfb01ffe0d ("arm64: alternatives: add alternative_has_feature_*()")

... we have a mechanism to use alternatives similarly to static keys,
allowing us to write the bulk of the logic in C code while also being
able to rely on all sites being patched in one go, and avoiding a
mismatched mismatched local_irq_save() ... local_irq_restore() sequence
during patching.

This patch rewrites arm64's local_irq_*() functions to use alternative
branches. This allows for the pseudo-NMI code to be entirely elided when
CONFIG_ARM64_PSEUDO_NMI=n, making a defconfig Image 64KiB smaller, and
not affectint the size of an Image with CONFIG_ARM64_PSEUDO_NMI=y:

| [mark@lakrids:~/src/linux]% ls -al vmlinux-*
| -rwxr-xr-x 1 mark mark 137473432 Jan 18 11:11 vmlinux-after-defconfig
| -rwxr-xr-x 1 mark mark 137918776 Jan 18 11:15 vmlinux-after-pnmi
| -rwxr-xr-x 1 mark mark 137380152 Jan 18 11:03 vmlinux-before-defconfig
| -rwxr-xr-x 1 mark mark 137523704 Jan 18 11:08 vmlinux-before-pnmi
| [mark@lakrids:~/src/linux]% ls -al Image-*
| -rw-r--r-- 1 mark mark 38646272 Jan 18 11:11 Image-after-defconfig
| -rw-r--r-- 1 mark mark 38777344 Jan 18 11:14 Image-after-pnmi
| -rw-r--r-- 1 mark mark 38711808 Jan 18 11:03 Image-before-defconfig
| -rw-r--r-- 1 mark mark 38777344 Jan 18 11:08 Image-before-pnmi

Some sensitive code depends on being run with interrupts enabled or with
interrupts disabled, and so when enabling or disabling interrupts we
must ensure that the compiler does not move such code around the actual
enable/disable. Before this patch, that was ensured by the combined asm
volatile blocks having memory clobbers (and any sensitive code either
being asm volatile, or touching memory). This patch consistently uses
explicit barrier() operations before and after the enable/disable, which
allows us to use the usual sysreg accessors (which are asm volatile) to
manipulate the interrupt masks. The use of pmr_sync() is pulled within
this critical section for consistency.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/irqflags.h | 191 +++++++++++++++++++++---------
 1 file changed, 132 insertions(+), 59 deletions(-)
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index f51653fb90e4..e0f5f6b73edd 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -21,43 +21,77 @@ 
  * exceptions should be unmasked.
  */
 
-/*
- * CPU interrupt mask handling.
- */
-static inline void arch_local_irq_enable(void)
+static __always_inline bool __irqflags_uses_pmr(void)
 {
-	if (system_has_prio_mask_debugging()) {
-		u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+	return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
+	       alternative_has_feature_unlikely(ARM64_HAS_GIC_PRIO_MASKING);
+}
 
+static __always_inline void __daif_local_irq_enable(void)
+{
+	barrier();
+	asm volatile("msr daifclr, #3");
+	barrier();
+}
+
+static __always_inline void __pmr_local_irq_enable(void)
+{
+	if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) {
+		u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
 		WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
 	}
 
-	asm volatile(ALTERNATIVE(
-		"msr	daifclr, #3		// arch_local_irq_enable",
-		__msr_s(SYS_ICC_PMR_EL1, "%0"),
-		ARM64_HAS_GIC_PRIO_MASKING)
-		:
-		: "r" ((unsigned long) GIC_PRIO_IRQON)
-		: "memory");
-
+	barrier();
+	write_sysreg_s(GIC_PRIO_IRQON, SYS_ICC_PMR_EL1);
 	pmr_sync();
+	barrier();
 }
 
-static inline void arch_local_irq_disable(void)
+static inline void arch_local_irq_enable(void)
 {
-	if (system_has_prio_mask_debugging()) {
-		u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+	if (__irqflags_uses_pmr()) {
+		__pmr_local_irq_enable();
+	} else {
+		__daif_local_irq_enable();
+	}
+}
 
+static __always_inline void __daif_local_irq_disable(void)
+{
+	barrier();
+	asm volatile("msr daifset, #3");
+	barrier();
+}
+
+static __always_inline void __pmr_local_irq_disable(void)
+{
+	if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) {
+		u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
 		WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
 	}
 
-	asm volatile(ALTERNATIVE(
-		"msr	daifset, #3		// arch_local_irq_disable",
-		__msr_s(SYS_ICC_PMR_EL1, "%0"),
-		ARM64_HAS_GIC_PRIO_MASKING)
-		:
-		: "r" ((unsigned long) GIC_PRIO_IRQOFF)
-		: "memory");
+	barrier();
+	write_sysreg_s(GIC_PRIO_IRQOFF, SYS_ICC_PMR_EL1);
+	barrier();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	if (__irqflags_uses_pmr()) {
+		__pmr_local_irq_disable();
+	} else {
+		__daif_local_irq_disable();
+	}
+}
+
+static __always_inline unsigned long __daif_local_save_flags(void)
+{
+	return read_sysreg(daif);
+}
+
+static __always_inline unsigned long __pmr_local_save_flags(void)
+{
+	return read_sysreg_s(SYS_ICC_PMR_EL1);
 }
 
 /*
@@ -65,69 +99,108 @@  static inline void arch_local_irq_disable(void)
  */
 static inline unsigned long arch_local_save_flags(void)
 {
-	unsigned long flags;
+	if (__irqflags_uses_pmr()) {
+		return __pmr_local_save_flags();
+	} else {
+		return __daif_local_save_flags();
+	}
+}
 
-	asm volatile(ALTERNATIVE(
-		"mrs	%0, daif",
-		__mrs_s("%0", SYS_ICC_PMR_EL1),
-		ARM64_HAS_GIC_PRIO_MASKING)
-		: "=&r" (flags)
-		:
-		: "memory");
+static __always_inline bool __daif_irqs_disabled_flags(unsigned long flags)
+{
+	return flags & PSR_I_BIT;
+}
 
-	return flags;
+static __always_inline bool __pmr_irqs_disabled_flags(unsigned long flags)
+{
+	return flags != GIC_PRIO_IRQON;
 }
 
-static inline int arch_irqs_disabled_flags(unsigned long flags)
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
 {
-	int res;
+	if (__irqflags_uses_pmr()) {
+		return __pmr_irqs_disabled_flags(flags);
+	} else {
+		return __daif_irqs_disabled_flags(flags);
+	}
+}
 
-	asm volatile(ALTERNATIVE(
-		"and	%w0, %w1, #" __stringify(PSR_I_BIT),
-		"eor	%w0, %w1, #" __stringify(GIC_PRIO_IRQON),
-		ARM64_HAS_GIC_PRIO_MASKING)
-		: "=&r" (res)
-		: "r" ((int) flags)
-		: "memory");
+static __always_inline bool __daif_irqs_disabled(void)
+{
+	return __daif_irqs_disabled_flags(__daif_local_save_flags());
+}
 
-	return res;
+static __always_inline bool __pmr_irqs_disabled(void)
+{
+	return __pmr_irqs_disabled_flags(__pmr_local_save_flags());
 }
 
-static inline int arch_irqs_disabled(void)
+static inline bool arch_irqs_disabled(void)
 {
-	return arch_irqs_disabled_flags(arch_local_save_flags());
+	if (__irqflags_uses_pmr()) {
+		return __pmr_irqs_disabled();
+	} else {
+		return __daif_irqs_disabled();
+	}
 }
 
-static inline unsigned long arch_local_irq_save(void)
+static __always_inline unsigned long __daif_local_irq_save(void)
 {
-	unsigned long flags;
+	unsigned long flags = __daif_local_save_flags();
+
+	__daif_local_irq_disable();
+
+	return flags;
+}
 
-	flags = arch_local_save_flags();
+static __always_inline unsigned long __pmr_local_irq_save(void)
+{
+	unsigned long flags = __pmr_local_save_flags();
 
 	/*
 	 * There are too many states with IRQs disabled, just keep the current
 	 * state if interrupts are already disabled/masked.
 	 */
-	if (!arch_irqs_disabled_flags(flags))
-		arch_local_irq_disable();
+	if (!__pmr_irqs_disabled_flags(flags))
+		__pmr_local_irq_disable();
 
 	return flags;
 }
 
+static inline unsigned long arch_local_irq_save(void)
+{
+	if (__irqflags_uses_pmr()) {
+		return __pmr_local_irq_save();
+	} else {
+		return __daif_local_irq_save();
+	}
+}
+
+static __always_inline void __daif_local_irq_restore(unsigned long flags)
+{
+	barrier();
+	write_sysreg(flags, daif);
+	barrier();
+}
+
+static __always_inline void __pmr_local_irq_restore(unsigned long flags)
+{
+	barrier();
+	write_sysreg_s(flags, SYS_ICC_PMR_EL1);
+	pmr_sync();
+	barrier();
+}
+
 /*
  * restore saved IRQ state
  */
 static inline void arch_local_irq_restore(unsigned long flags)
 {
-	asm volatile(ALTERNATIVE(
-		"msr	daif, %0",
-		__msr_s(SYS_ICC_PMR_EL1, "%0"),
-		ARM64_HAS_GIC_PRIO_MASKING)
-		:
-		: "r" (flags)
-		: "memory");
-
-	pmr_sync();
+	if (__irqflags_uses_pmr()) {
+		__pmr_local_irq_restore(flags);
+	} else {
+		__daif_local_irq_restore(flags);
+	}
 }
 
 #endif /* __ASM_IRQFLAGS_H */