diff mbox series

[v2,5/5] arm64: irqflags: use alternative branches for pseudo-NMI logic

Message ID 20230125163826.496739-6-mark.rutland@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: pseudo-nmi: elide code when CONFIG_ARM64_PSEUDO_NMI=n | expand

Commit Message

Mark Rutland Jan. 25, 2023, 4:38 p.m. UTC
Due to the way we use alternatives in the irqflags code, even when
CONFIG_ARM64_PSEUDO_NMI=n, we generate unused alternative code for
pseudo-NMI management. This patch reworks the irqflags code to remove
the redundant code when CONFIG_ARM64_PSEUDO_NMI=n, which benefits the
more common case, and will permit further rework of our DAIF management
(e.g. in preparation for ARMv8.8-A's NMI feature).

Prior to this patch a defconfig kernel has hundreds of redundant
instructions to access ICC_PMR_EL1 (which should only need to be
manipulated in setup code), which this patch removes:

| [mark@lakrids:~/src/linux]% usekorg 12.1.0 aarch64-linux-objdump -d vmlinux-before-defconfig | grep icc_pmr_el1 | wc -l
| 885
| [mark@lakrids:~/src/linux]% usekorg 12.1.0 aarch64-linux-objdump -d vmlinux-after-defconfig | grep icc_pmr_el1 | wc -l
| 5

Those instructions alone account for more than 3KiB of kernel text, and
will be associated with additional alt_instr entries, padding and
branches, etc.

These redundant instructions exist because we use alternative sequences
for to choose between DAIF / PMR management in irqflags.h, and even when
CONFIG_ARM64_PSEUDO_NMI=n, those alternative sequences will generate the
code for PMR management, along with alt_instr entries. We use
alternatives here as this was necessary to ensure that we never
encounter a mismatched local_irq_save() ... local_irq_restore() sequence
in the middle of patching, which was possible to see if we used static
keys to choose between DAIF and PMR management.

Since commit:

  21fb26bfb01ffe0d ("arm64: alternatives: add alternative_has_feature_*()")

... we have a mechanism to use alternatives similarly to static keys,
allowing us to write the bulk of the logic in C code while also being
able to rely on all sites being patched in one go, and avoiding a
mismatched mismatched local_irq_save() ... local_irq_restore() sequence
during patching.

This patch rewrites arm64's local_irq_*() functions to use alternative
branches. This allows for the pseudo-NMI code to be entirely elided when
CONFIG_ARM64_PSEUDO_NMI=n, making a defconfig Image 64KiB smaller, and
not affectint the size of an Image with CONFIG_ARM64_PSEUDO_NMI=y:

| [mark@lakrids:~/src/linux]% ls -al vmlinux-*
| -rwxr-xr-x 1 mark mark 137473432 Jan 18 11:11 vmlinux-after-defconfig
| -rwxr-xr-x 1 mark mark 137918776 Jan 18 11:15 vmlinux-after-pnmi
| -rwxr-xr-x 1 mark mark 137380152 Jan 18 11:03 vmlinux-before-defconfig
| -rwxr-xr-x 1 mark mark 137523704 Jan 18 11:08 vmlinux-before-pnmi
| [mark@lakrids:~/src/linux]% ls -al Image-*
| -rw-r--r-- 1 mark mark 38646272 Jan 18 11:11 Image-after-defconfig
| -rw-r--r-- 1 mark mark 38777344 Jan 18 11:14 Image-after-pnmi
| -rw-r--r-- 1 mark mark 38711808 Jan 18 11:03 Image-before-defconfig
| -rw-r--r-- 1 mark mark 38777344 Jan 18 11:08 Image-before-pnmi

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/irqflags.h | 183 ++++++++++++++++++++----------
 1 file changed, 124 insertions(+), 59 deletions(-)

Comments

Marc Zyngier Jan. 26, 2023, 8:49 a.m. UTC | #1
On Wed, 25 Jan 2023 16:38:26 +0000,
Mark Rutland <mark.rutland@arm.com> wrote:

[...]

> +static __always_inline void __daif_local_irq_restore(unsigned long flags)
> +{
> +	barrier();
> +	write_sysreg(flags, daif);
> +	barrier();
> +}
> +
> +static __always_inline void __pmr_local_irq_restore(unsigned long flags)
> +{
> +	barrier();
> +	write_sysreg_s(flags, SYS_ICC_PMR_EL1);
> +	pmr_sync();
> +	barrier();
> +}

It would be good to at least mention why we need these compile-time
barriers which are not that explicit in the existing code. I guess
they are equivalent to the "memory" clobber in the asm sequences that
this replaces, but they don't seem to be used consistently throughout
this patch.

Thanks,

	M.
Mark Rutland Jan. 26, 2023, 10:31 a.m. UTC | #2
On Thu, Jan 26, 2023 at 08:49:38AM +0000, Marc Zyngier wrote:
> On Wed, 25 Jan 2023 16:38:26 +0000,
> Mark Rutland <mark.rutland@arm.com> wrote:
> 
> [...]
> 
> > +static __always_inline void __daif_local_irq_restore(unsigned long flags)
> > +{
> > +	barrier();
> > +	write_sysreg(flags, daif);
> > +	barrier();
> > +}
> > +
> > +static __always_inline void __pmr_local_irq_restore(unsigned long flags)
> > +{
> > +	barrier();
> > +	write_sysreg_s(flags, SYS_ICC_PMR_EL1);
> > +	pmr_sync();
> > +	barrier();
> > +}
> 
> It would be good to at least mention why we need these compile-time
> barriers which are not that explicit in the existing code. I guess
> they are equivalent to the "memory" clobber in the asm sequences that
> this replaces, 

Yes; that was the idea.

> but they don't seem to be used consistently throughout this patch.

Yes; looking with fresh eyes, the missing barriers around PMR manipulation are
buggy. Thanks for pointing that out!

I'll go and add the missing barriers, and update the commit message
accordingly.

I'll also use barrier() around the DAIF helpers rather than a memory clobber so
that the equivalent DAIF and PMR helpers more clearly correspond to one
another.

Thanks,
Mark.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index f51653fb90e43..4175ffb1a64b9 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -21,43 +21,69 @@ 
  * exceptions should be unmasked.
  */
 
-/*
- * CPU interrupt mask handling.
- */
-static inline void arch_local_irq_enable(void)
+static __always_inline bool __irqflags_uses_pmr(void)
 {
-	if (system_has_prio_mask_debugging()) {
-		u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+	return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
+	       alternative_has_feature_unlikely(ARM64_HAS_GIC_PRIO_MASKING);
+}
 
+static __always_inline void __daif_local_irq_enable(void)
+{
+	asm volatile("msr daifclr, #3" ::: "memory");
+}
+
+static __always_inline void __pmr_local_irq_enable(void)
+{
+	if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) {
+		u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
 		WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
 	}
 
-	asm volatile(ALTERNATIVE(
-		"msr	daifclr, #3		// arch_local_irq_enable",
-		__msr_s(SYS_ICC_PMR_EL1, "%0"),
-		ARM64_HAS_GIC_PRIO_MASKING)
-		:
-		: "r" ((unsigned long) GIC_PRIO_IRQON)
-		: "memory");
-
+	write_sysreg_s(GIC_PRIO_IRQON, SYS_ICC_PMR_EL1);
 	pmr_sync();
 }
 
-static inline void arch_local_irq_disable(void)
+static inline void arch_local_irq_enable(void)
 {
-	if (system_has_prio_mask_debugging()) {
-		u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+	if (__irqflags_uses_pmr()) {
+		__pmr_local_irq_enable();
+	} else {
+		__daif_local_irq_enable();
+	}
+}
 
+static __always_inline void __daif_local_irq_disable(void)
+{
+	asm volatile("msr daifset, #3" ::: "memory");
+}
+
+static __always_inline void __pmr_local_irq_disable(void)
+{
+	if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) {
+		u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
 		WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
 	}
 
-	asm volatile(ALTERNATIVE(
-		"msr	daifset, #3		// arch_local_irq_disable",
-		__msr_s(SYS_ICC_PMR_EL1, "%0"),
-		ARM64_HAS_GIC_PRIO_MASKING)
-		:
-		: "r" ((unsigned long) GIC_PRIO_IRQOFF)
-		: "memory");
+	write_sysreg_s(GIC_PRIO_IRQOFF, SYS_ICC_PMR_EL1);
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	if (__irqflags_uses_pmr()) {
+		__pmr_local_irq_disable();
+	} else {
+		__daif_local_irq_disable();
+	}
+}
+
+static __always_inline unsigned long __daif_local_save_flags(void)
+{
+	return read_sysreg(daif);
+}
+
+static __always_inline unsigned long __pmr_local_save_flags(void)
+{
+	return read_sysreg_s(SYS_ICC_PMR_EL1);
 }
 
 /*
@@ -65,69 +91,108 @@  static inline void arch_local_irq_disable(void)
  */
 static inline unsigned long arch_local_save_flags(void)
 {
-	unsigned long flags;
+	if (__irqflags_uses_pmr()) {
+		return __pmr_local_save_flags();
+	} else {
+		return __daif_local_save_flags();
+	}
+}
 
-	asm volatile(ALTERNATIVE(
-		"mrs	%0, daif",
-		__mrs_s("%0", SYS_ICC_PMR_EL1),
-		ARM64_HAS_GIC_PRIO_MASKING)
-		: "=&r" (flags)
-		:
-		: "memory");
+static __always_inline bool __daif_irqs_disabled_flags(unsigned long flags)
+{
+	return flags & PSR_I_BIT;
+}
 
-	return flags;
+static __always_inline bool __pmr_irqs_disabled_flags(unsigned long flags)
+{
+	return flags != GIC_PRIO_IRQON;
 }
 
-static inline int arch_irqs_disabled_flags(unsigned long flags)
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
 {
-	int res;
+	if (__irqflags_uses_pmr()) {
+		return __pmr_irqs_disabled_flags(flags);
+	} else {
+		return __daif_irqs_disabled_flags(flags);
+	}
+}
 
-	asm volatile(ALTERNATIVE(
-		"and	%w0, %w1, #" __stringify(PSR_I_BIT),
-		"eor	%w0, %w1, #" __stringify(GIC_PRIO_IRQON),
-		ARM64_HAS_GIC_PRIO_MASKING)
-		: "=&r" (res)
-		: "r" ((int) flags)
-		: "memory");
+static __always_inline bool __daif_irqs_disabled(void)
+{
+	return __daif_irqs_disabled_flags(__daif_local_save_flags());
+}
 
-	return res;
+static __always_inline bool __pmr_irqs_disabled(void)
+{
+	return __pmr_irqs_disabled_flags(__pmr_local_save_flags());
 }
 
-static inline int arch_irqs_disabled(void)
+static inline bool arch_irqs_disabled(void)
 {
-	return arch_irqs_disabled_flags(arch_local_save_flags());
+	if (__irqflags_uses_pmr()) {
+		return __pmr_irqs_disabled();
+	} else {
+		return __daif_irqs_disabled();
+	}
 }
 
-static inline unsigned long arch_local_irq_save(void)
+static __always_inline unsigned long __daif_local_irq_save(void)
 {
-	unsigned long flags;
+	unsigned long flags = __daif_local_save_flags();
+
+	__daif_local_irq_disable();
+
+	return flags;
+}
 
-	flags = arch_local_save_flags();
+static __always_inline unsigned long __pmr_local_irq_save(void)
+{
+	unsigned long flags = __pmr_local_save_flags();
 
 	/*
 	 * There are too many states with IRQs disabled, just keep the current
 	 * state if interrupts are already disabled/masked.
 	 */
-	if (!arch_irqs_disabled_flags(flags))
-		arch_local_irq_disable();
+	if (!__pmr_irqs_disabled_flags(flags))
+		__pmr_local_irq_disable();
 
 	return flags;
 }
 
+static inline unsigned long arch_local_irq_save(void)
+{
+	if (__irqflags_uses_pmr()) {
+		return __pmr_local_irq_save();
+	} else {
+		return __daif_local_irq_save();
+	}
+}
+
+static __always_inline void __daif_local_irq_restore(unsigned long flags)
+{
+	barrier();
+	write_sysreg(flags, daif);
+	barrier();
+}
+
+static __always_inline void __pmr_local_irq_restore(unsigned long flags)
+{
+	barrier();
+	write_sysreg_s(flags, SYS_ICC_PMR_EL1);
+	pmr_sync();
+	barrier();
+}
+
 /*
  * restore saved IRQ state
  */
 static inline void arch_local_irq_restore(unsigned long flags)
 {
-	asm volatile(ALTERNATIVE(
-		"msr	daif, %0",
-		__msr_s(SYS_ICC_PMR_EL1, "%0"),
-		ARM64_HAS_GIC_PRIO_MASKING)
-		:
-		: "r" (flags)
-		: "memory");
-
-	pmr_sync();
+	if (__irqflags_uses_pmr()) {
+		__pmr_local_irq_restore(flags);
+	} else {
+		__daif_local_irq_restore(flags);
+	}
 }
 
 #endif /* __ASM_IRQFLAGS_H */