@@ -42,7 +42,7 @@ bool alternative_is_applied(u16 cpufeature)
/*
* Check if the target PC is within an alternative block.
*/
-static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
+static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
{
unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt);
return !(pc >= replptr && pc <= (replptr + alt->alt_len));
@@ -50,7 +50,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
#define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
-static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
+static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
{
u32 insn;
@@ -95,7 +95,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
return insn;
}
-static void patch_alternative(struct alt_instr *alt,
+static noinstr void patch_alternative(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst)
{
__le32 *replptr;
The alternatives code must be `noinstr` such that it does not patch itself, as the cache invalidation is only performed after all the alternatives have been applied. Mark patch_alternative() as `noinstr`. Mark branch_insn_requires_update() and get_alt_insn() with `__always_inline` since they are both only called through patch_alternative(). Booting a kernel in QEMU TCG with KCSAN=y and ARM64_USE_LSE_ATOMICS=y caused a boot hang: [ 0.241121] CPU: All CPU(s) started at EL2 The alternatives code was patching the atomics in __tsan_read4() from LL/SC atomics to LSE atomics. The following fragment is using LL/SC atomics in the .text section: | <__tsan_unaligned_read4+304>: ldxr x6, [x2] | <__tsan_unaligned_read4+308>: add x6, x6, x5 | <__tsan_unaligned_read4+312>: stxr w7, x6, [x2] | <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304> This LL/SC atomic sequence was to be replaced with LSE atomics. However since the alternatives code was instrumentable, __tsan_read4() was being called after only the first instruction was replaced, which led to the following code in memory: | <__tsan_unaligned_read4+304>: ldadd x5, x6, [x2] | <__tsan_unaligned_read4+308>: add x6, x6, x5 | <__tsan_unaligned_read4+312>: stxr w7, x6, [x2] | <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304> This caused an infinite loop as the `stxr` instruction never completed successfully, so `w7` was always 0. Signed-off-by: Joey Gouly <joey.gouly@arm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> --- arch/arm64/kernel/alternative.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)