@@ -2,11 +2,13 @@
#ifndef _ASM_X86_CONTEXT_TRACKING_WORK_H
#define _ASM_X86_CONTEXT_TRACKING_WORK_H
+#include <asm/sync_core.h>
+
static __always_inline void arch_context_tracking_work(int work)
{
switch (work) {
- case CONTEXT_WORK_n:
- // Do work...
+ case CONTEXT_WORK_SYNC:
+ sync_core();
break;
}
}
@@ -33,6 +33,7 @@ extern void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u
*/
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void text_poke_sync(void);
+extern void text_poke_sync_deferrable(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok);
@@ -18,6 +18,7 @@
#include <linux/mmu_context.h>
#include <linux/bsearch.h>
#include <linux/sync_core.h>
+#include <linux/context_tracking.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@@ -2080,9 +2081,24 @@ static void do_sync_core(void *info)
sync_core();
}
+static bool do_sync_core_defer_cond(int cpu, void *info)
+{
+ return !ct_set_cpu_work(cpu, CONTEXT_WORK_SYNC);
+}
+
+static void __text_poke_sync(smp_cond_func_t cond_func)
+{
+ on_each_cpu_cond(cond_func, do_sync_core, NULL, 1);
+}
+
void text_poke_sync(void)
{
- on_each_cpu(do_sync_core, NULL, 1);
+ __text_poke_sync(NULL);
+}
+
+void text_poke_sync_deferrable(void)
+{
+ __text_poke_sync(do_sync_core_defer_cond);
}
/*
@@ -2257,6 +2273,8 @@ static int tp_vec_nr;
static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{
unsigned char int3 = INT3_INSN_OPCODE;
+ bool force_ipi = false;
+ void (*sync_fn)(void);
unsigned int i;
int do_sync;
@@ -2291,11 +2309,18 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
* First step: add a int3 trap to the address that will be patched.
*/
for (i = 0; i < nr_entries; i++) {
+ /*
+ * Record that we need to send the IPI if at least one location
+ * in the batch requires it.
+ */
+ force_ipi |= tp[i].force_ipi;
tp[i].old = *(u8 *)text_poke_addr(&tp[i]);
text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
}
- text_poke_sync();
+ sync_fn = force_ipi ? text_poke_sync : text_poke_sync_deferrable;
+
+ sync_fn();
/*
* Second step: update all but the first byte of the patched range.
@@ -2357,7 +2382,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
* not necessary and we'd be safe even without it. But
* better safe than sorry (plus there's not only Intel).
*/
- text_poke_sync();
+ sync_fn();
}
/*
@@ -2378,7 +2403,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
}
if (do_sync)
- text_poke_sync();
+ sync_fn();
/*
* Remove and wait for refs to be zero.
@@ -817,7 +817,7 @@ void arch_arm_kprobe(struct kprobe *p)
u8 int3 = INT3_INSN_OPCODE;
text_poke(p->addr, &int3, 1);
- text_poke_sync();
+ text_poke_sync_deferrable();
perf_event_text_poke(p->addr, &p->opcode, 1, &int3, 1);
}
@@ -827,7 +827,7 @@ void arch_disarm_kprobe(struct kprobe *p)
perf_event_text_poke(p->addr, &int3, 1, &p->opcode, 1);
text_poke(p->addr, &p->opcode, 1);
- text_poke_sync();
+ text_poke_sync_deferrable();
}
void arch_remove_kprobe(struct kprobe *p)
@@ -513,11 +513,11 @@ void arch_unoptimize_kprobe(struct optimized_kprobe *op)
JMP32_INSN_SIZE - INT3_INSN_SIZE);
text_poke(addr, new, INT3_INSN_SIZE);
- text_poke_sync();
+ text_poke_sync_deferrable();
text_poke(addr + INT3_INSN_SIZE,
new + INT3_INSN_SIZE,
JMP32_INSN_SIZE - INT3_INSN_SIZE);
- text_poke_sync();
+ text_poke_sync_deferrable();
perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
}
@@ -191,7 +191,7 @@ static int write_relocate_add(Elf64_Shdr *sechdrs,
write, apply);
if (!early) {
- text_poke_sync();
+ text_poke_sync_deferrable();
mutex_unlock(&text_mutex);
}
@@ -5,12 +5,12 @@
#include <linux/bitops.h>
enum {
- CONTEXT_WORK_n_OFFSET,
+ CONTEXT_WORK_SYNC_OFFSET,
CONTEXT_WORK_MAX_OFFSET
};
enum ct_work {
- CONTEXT_WORK_n = BIT(CONTEXT_WORK_n_OFFSET),
+ CONTEXT_WORK_SYNC = BIT(CONTEXT_WORK_SYNC_OFFSET),
CONTEXT_WORK_MAX = BIT(CONTEXT_WORK_MAX_OFFSET)
};