@@ -10,4 +10,8 @@ int aarch64_insn_write(void *addr, u32 insn);
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+typedef int (*patch_machine_func_t)(void *);
+int patch_machine_cpuslocked(patch_machine_func_t func, void *arg);
+int patch_machine(patch_machine_func_t func, void *arg);
+
#endif /* __ASM_PATCHING_H */
@@ -14,8 +14,8 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/insn.h>
+#include <asm/patching.h>
#include <asm/sections.h>
-#include <linux/stop_machine.h>
#define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f)
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
@@ -189,43 +189,17 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu
}
}
-/*
- * Apply alternatives, ensuring that no CPUs are concurrently executing code
- * being patched.
- *
- * We might be patching the stop_machine state machine or READ_ONCE(), so
- * we implement a simple polling protocol.
- */
-static int __apply_alternatives_multi_stop(void *unused)
+static int __apply_alternatives_stopped(void *unused)
{
- /* Volatile, as we may be patching the guts of READ_ONCE() */
- static volatile int all_alternatives_applied;
- static atomic_t stopped_cpus = ATOMIC_INIT(0);
struct alt_region region = {
.begin = (struct alt_instr *)__alt_instructions,
.end = (struct alt_instr *)__alt_instructions_end,
};
+ DECLARE_BITMAP(remaining_capabilities, ARM64_NPATCHABLE);
- /* We always have a CPU 0 at this point (__init) */
- if (smp_processor_id()) {
- arch_atomic_inc(&stopped_cpus);
- while (!all_alternatives_applied)
- cpu_relax();
- isb();
- } else {
- DECLARE_BITMAP(remaining_capabilities, ARM64_NPATCHABLE);
-
- while (arch_atomic_read(&stopped_cpus) != num_online_cpus() - 1)
- cpu_relax();
-
- bitmap_complement(remaining_capabilities, boot_capabilities,
- ARM64_NPATCHABLE);
-
- BUG_ON(all_alternatives_applied);
- __apply_alternatives(®ion, false, remaining_capabilities);
- /* Barriers provided by the cache flushing */
- all_alternatives_applied = 1;
- }
+ bitmap_complement(remaining_capabilities, boot_capabilities,
+ ARM64_NPATCHABLE);
+ __apply_alternatives(®ion, false, remaining_capabilities);
return 0;
}
@@ -233,7 +207,7 @@ static int __apply_alternatives_multi_stop(void *unused)
void __init apply_alternatives_all(void)
{
/* better not try code patching on a live SMP system */
- stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);
+ patch_machine(__apply_alternatives_stopped, NULL);
}
/*
@@ -105,31 +105,88 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
return ret;
}
+struct patch_machine_info {
+ patch_machine_func_t func;
+ void *arg;
+ int cpu;
+ atomic_t active;
+ volatile int done;
+};
+
+/*
+ * Run a code patching function on a single CPU, ensuring that no CPUs are
+ * concurrently executing code being patched.
+ *
+ * We wait for other CPUs to become quiescent before starting patching, and
+ * wait until patching is completed before other CPUs are woken.
+ *
+ * The patching function is responsible for any barriers necessary to make new
+ * instructions visible to other CPUs. The other CPUs will issue an ISB upon
+ * being woken to ensure they use the new instructions.
+ */
+static int noinstr do_patch_machine(void *arg)
+{
+ struct patch_machine_info *pmi = arg;
+ int cpu = smp_processor_id();
+ int ret = 0;
+
+ if (pmi->cpu == cpu) {
+ while (arch_atomic_read(&pmi->active))
+ cpu_relax();
+ ret = pmi->func(pmi->arg);
+ pmi->done = 1;
+ } else {
+ arch_atomic_dec(&pmi->active);
+ while (!pmi->done)
+ cpu_relax();
+ isb();
+ }
+
+ return ret;
+}
+
+/*
+ * Run a code patching function on a single CPU, ensuring that no CPUs are
+ * concurrently executing code being patched.
+ */
+int patch_machine_cpuslocked(patch_machine_func_t func, void *arg)
+{
+ struct patch_machine_info pmi = {
+ .func = func,
+ .arg = arg,
+ .cpu = raw_smp_processor_id(),
+ .active = ATOMIC_INIT(num_online_cpus() - 1),
+ .done = 0,
+ };
+
+ return stop_machine_cpuslocked(do_patch_machine, &pmi, cpu_online_mask);
+}
+
+int patch_machine(patch_machine_func_t func, void *arg)
+{
+ int ret;
+
+ cpus_read_lock();
+ ret = patch_machine_cpuslocked(func, arg);
+ cpus_read_unlock();
+
+ return ret;
+}
+
struct aarch64_insn_patch {
void **text_addrs;
u32 *new_insns;
int insn_cnt;
- atomic_t cpu_count;
};
static int __kprobes aarch64_insn_patch_text_cb(void *arg)
{
int i, ret = 0;
struct aarch64_insn_patch *pp = arg;
- int num_cpus = num_online_cpus();
-
- /* The last CPU becomes master */
- if (arch_atomic_inc_return(&pp->cpu_count) == num_cpus) {
- for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
- ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
- pp->new_insns[i]);
- /* Notify other processors with an additional increment. */
- atomic_inc(&pp->cpu_count);
- } else {
- while (arch_atomic_read(&pp->cpu_count) <= num_cpus)
- cpu_relax();
- isb();
- }
+
+ for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
+ ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
+ pp->new_insns[i]);
return ret;
}
@@ -140,12 +197,10 @@ int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
.text_addrs = addrs,
.new_insns = insns,
.insn_cnt = cnt,
- .cpu_count = ATOMIC_INIT(0),
};
if (cnt <= 0)
return -EINVAL;
- return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
- cpu_online_mask);
+ return patch_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch);
}
Some instruction sequences cannot be safely modified while they may be concurrently executed, and so it's necessary to temporarily stop all CPUs while performing the modification. We have separate implementations of this for alternatives and kprobes. This patch unifies these with a common patch_machine() helper function which handles the necessary synchronization to ensure that CPUs are stopped during patching. This separates the patching logic, making it easier to understand, and means that we only have to maintain one synchronization algorithm. The synchronization logic in do_patch_machine() only uses unpatchable functions, and the function itself is marked `noinstr` to prevent instrumentation. The patch_machine() helper is left instrumentatble as stop_machine() is instrumentable, and therefore there is no benefit to forbidding instrumentation. As with the prior alternative patching sequence, the CPU to apply the patch is chosen early so that this may be deterministic. Since __apply_alternatives_stopped() is only ever called once under apply_alternatives_all(), the `all_alternatives_applied` variable and warning are redundant and therefore removed. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Andre Przywara <andre.przywara@arm.com> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Joey Gouly <joey.gouly@arm.com> Cc: Suzuki K Poulose <suzuki.poulose@arm.com> Cc: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/patching.h | 4 ++ arch/arm64/kernel/alternative.c | 40 +++----------- arch/arm64/kernel/patching.c | 91 +++++++++++++++++++++++++------ 3 files changed, 84 insertions(+), 51 deletions(-)