Context |
Check |
Description |
netdev/tree_selection |
success
|
Not a local patch
|
bpf/vmtest-bpf-next-VM_Test-2 |
success
|
Logs for Unittests
|
bpf/vmtest-bpf-next-VM_Test-1 |
success
|
Logs for ShellCheck
|
bpf/vmtest-bpf-next-VM_Test-0 |
success
|
Logs for Lint
|
bpf/vmtest-bpf-next-VM_Test-3 |
success
|
Logs for Validate matrix.py
|
bpf/vmtest-bpf-next-VM_Test-4 |
success
|
Logs for aarch64-gcc / GCC BPF
|
bpf/vmtest-bpf-next-VM_Test-5 |
success
|
Logs for aarch64-gcc / build / build for aarch64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-6 |
success
|
Logs for aarch64-gcc / build-release
|
bpf/vmtest-bpf-next-VM_Test-10 |
success
|
Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-11 |
success
|
Logs for aarch64-gcc / veristat-kernel
|
bpf/vmtest-bpf-next-VM_Test-12 |
success
|
Logs for aarch64-gcc / veristat-meta
|
bpf/vmtest-bpf-next-VM_Test-13 |
success
|
Logs for s390x-gcc / GCC BPF
|
bpf/vmtest-bpf-next-VM_Test-14 |
success
|
Logs for s390x-gcc / build / build for s390x with gcc
|
bpf/vmtest-bpf-next-VM_Test-15 |
success
|
Logs for s390x-gcc / build-release
|
bpf/vmtest-bpf-next-VM_Test-18 |
success
|
Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
|
bpf/vmtest-bpf-next-VM_Test-19 |
success
|
Logs for s390x-gcc / veristat-kernel
|
bpf/vmtest-bpf-next-VM_Test-20 |
success
|
Logs for s390x-gcc / veristat-meta
|
bpf/vmtest-bpf-next-VM_Test-21 |
success
|
Logs for set-matrix
|
bpf/vmtest-bpf-next-VM_Test-23 |
success
|
Logs for x86_64-gcc / build / build for x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-24 |
success
|
Logs for x86_64-gcc / build-release
|
bpf/vmtest-bpf-next-VM_Test-30 |
success
|
Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-34 |
success
|
Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
|
bpf/vmtest-bpf-next-VM_Test-35 |
success
|
Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
|
bpf/vmtest-bpf-next-VM_Test-39 |
success
|
Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
|
bpf/vmtest-bpf-next-VM_Test-40 |
success
|
Logs for x86_64-llvm-17 / veristat-kernel
|
bpf/vmtest-bpf-next-VM_Test-41 |
success
|
Logs for x86_64-llvm-17 / veristat-meta
|
bpf/vmtest-bpf-next-VM_Test-43 |
success
|
Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
|
bpf/vmtest-bpf-next-VM_Test-44 |
success
|
Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
|
bpf/vmtest-bpf-next-VM_Test-49 |
success
|
Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
|
bpf/vmtest-bpf-next-VM_Test-50 |
success
|
Logs for x86_64-llvm-18 / veristat-kernel
|
bpf/vmtest-bpf-next-VM_Test-51 |
success
|
Logs for x86_64-llvm-18 / veristat-meta
|
bpf/vmtest-bpf-next-PR |
fail
|
PR summary
|
bpf/vmtest-bpf-next-VM_Test-7 |
success
|
Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-8 |
success
|
Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-9 |
success
|
Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-16 |
success
|
Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
|
bpf/vmtest-bpf-next-VM_Test-17 |
success
|
Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
|
bpf/vmtest-bpf-next-VM_Test-22 |
success
|
Logs for x86_64-gcc / GCC BPF / GCC BPF
|
bpf/vmtest-bpf-next-VM_Test-25 |
success
|
Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-26 |
fail
|
Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-27 |
fail
|
Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-28 |
success
|
Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-29 |
success
|
Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-31 |
success
|
Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
|
bpf/vmtest-bpf-next-VM_Test-32 |
success
|
Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
|
bpf/vmtest-bpf-next-VM_Test-33 |
success
|
Logs for x86_64-llvm-17 / GCC BPF / GCC BPF
|
bpf/vmtest-bpf-next-VM_Test-36 |
success
|
Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
|
bpf/vmtest-bpf-next-VM_Test-37 |
fail
|
Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
|
bpf/vmtest-bpf-next-VM_Test-38 |
fail
|
Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
|
bpf/vmtest-bpf-next-VM_Test-42 |
success
|
Logs for x86_64-llvm-18 / GCC BPF / GCC BPF
|
bpf/vmtest-bpf-next-VM_Test-45 |
success
|
Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
|
bpf/vmtest-bpf-next-VM_Test-46 |
fail
|
Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
|
bpf/vmtest-bpf-next-VM_Test-47 |
fail
|
Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
|
bpf/vmtest-bpf-next-VM_Test-48 |
fail
|
Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
|
@@ -20,6 +20,10 @@ typedef u8 uprobe_opcode_t;
#define UPROBE_SWBP_INSN 0xcc
#define UPROBE_SWBP_INSN_SIZE 1
+enum {
+ ARCH_UPROBE_FLAG_CAN_OPTIMIZE = 0,
+};
+
struct uprobe_xol_ops;
struct arch_uprobe {
@@ -45,6 +49,8 @@ struct arch_uprobe {
u8 ilen;
} push;
};
+
+ unsigned long flags;
};
struct arch_uprobe_task {
@@ -18,6 +18,7 @@
#include <asm/processor.h>
#include <asm/insn.h>
#include <asm/mmu_context.h>
+#include <asm/nops.h>
/* Post-execution fixups. */
@@ -768,7 +769,7 @@ static struct uprobe_trampoline *create_uprobe_trampoline(unsigned long vaddr)
return NULL;
}
-static __maybe_unused struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr)
+static struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr)
{
struct uprobes_state *state = ¤t->mm->uprobes_state;
struct uprobe_trampoline *tramp = NULL;
@@ -794,7 +795,7 @@ static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp)
kfree(tramp);
}
-static __maybe_unused void uprobe_trampoline_put(struct uprobe_trampoline *tramp)
+static void uprobe_trampoline_put(struct uprobe_trampoline *tramp)
{
if (tramp == NULL)
return;
@@ -807,6 +808,7 @@ struct mm_uprobe {
struct rb_node rb_node;
unsigned long auprobe;
unsigned long vaddr;
+ bool optimized;
};
#define __node_2_mm_uprobe(node) rb_entry((node), struct mm_uprobe, rb_node)
@@ -874,6 +876,7 @@ static struct mm_uprobe *insert_mm_uprobe(struct mm_struct *mm, struct arch_upro
if (mmu) {
mmu->auprobe = (unsigned long) auprobe;
mmu->vaddr = vaddr;
+ mmu->optimized = false;
RB_CLEAR_NODE(&mmu->rb_node);
rb_add(&mmu->rb_node, &mm->uprobes_state.root_uprobes, __mm_uprobe_less);
}
@@ -886,6 +889,134 @@ static void destroy_mm_uprobe(struct mm_uprobe *mmu, struct rb_root *root)
kfree(mmu);
}
+enum {
+ OPT_PART,
+ OPT_INSN,
+ UNOPT_INT3,
+ UNOPT_PART,
+};
+
+struct write_opcode_ctx {
+ unsigned long base;
+ int update;
+};
+
+static int is_call_insn(uprobe_opcode_t *insn)
+{
+ return *insn == CALL_INSN_OPCODE;
+}
+
+static int verify_insn(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode,
+ int nbytes, void *data)
+{
+ struct write_opcode_ctx *ctx = data;
+ uprobe_opcode_t old_opcode[5];
+
+ uprobe_copy_from_page(page, ctx->base, (uprobe_opcode_t *) &old_opcode, 5);
+
+ switch (ctx->update) {
+ case OPT_PART:
+ case OPT_INSN:
+ if (is_swbp_insn(&old_opcode[0]))
+ return 1;
+ break;
+ case UNOPT_INT3:
+ if (is_call_insn(&old_opcode[0]))
+ return 1;
+ break;
+ case UNOPT_PART:
+ if (is_swbp_insn(&old_opcode[0]))
+ return 1;
+ break;
+ }
+
+ return -1;
+}
+
+static int write_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr,
+ uprobe_opcode_t *insn, int nbytes, void *ctx)
+{
+ return uprobe_write(auprobe, mm, vaddr, insn, nbytes, verify_insn, false, ctx);
+}
+
+static void relative_call(void *dest, long from, long to)
+{
+ struct __packed __arch_relative_insn {
+ u8 op;
+ s32 raddr;
+ } *insn;
+
+ insn = (struct __arch_relative_insn *)dest;
+ insn->raddr = (s32)(to - (from + 5));
+ insn->op = CALL_INSN_OPCODE;
+}
+
+static int swbp_optimize(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr,
+ unsigned long tramp)
+{
+ struct write_opcode_ctx ctx = {
+ .base = vaddr,
+ };
+ char call[5];
+ int err;
+
+ relative_call(call, vaddr, tramp);
+
+ /*
+ * We are in state where breakpoint (int3) is installed on top of first
+ * byte of the nop5 instruction. We will do following steps to overwrite
+ * this to call instruction:
+ *
+ * - sync cores
+ * - write last 4 bytes of the call instruction
+ * - sync cores
+ * - update the call instruction opcode
+ */
+ text_poke_sync();
+
+ ctx.update = OPT_PART;
+ err = write_insn(auprobe, mm, vaddr + 1, call + 1, 4, &ctx);
+ if (err)
+ return err;
+
+ text_poke_sync();
+
+ ctx.update = OPT_INSN;
+ return write_insn(auprobe, mm, vaddr, call, 1, &ctx);
+}
+
+static int swbp_unoptimize(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
+{
+ uprobe_opcode_t int3 = UPROBE_SWBP_INSN;
+ struct write_opcode_ctx ctx = {
+ .base = vaddr,
+ };
+ int err;
+
+ /*
+ * We need to overwrite call instruction into nop5 instruction with
+ * breakpoint (int3) installed on top of its first byte. We will:
+ *
+ * - overwrite call opcode with breakpoint (int3)
+ * - sync cores
+ * - write last 4 bytes of the nop5 instruction
+ * - sync cores
+ */
+
+ ctx.update = UNOPT_INT3;
+ err = write_insn(auprobe, mm, vaddr, &int3, 1, &ctx);
+ if (err)
+ return err;
+
+ text_poke_sync();
+
+ ctx.update = UNOPT_PART;
+ err = write_insn(auprobe, mm, vaddr + 1, (uprobe_opcode_t *) auprobe->insn + 1, 4, &ctx);
+
+ text_poke_sync();
+ return err;
+}
+
int set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
{
struct mm_uprobe *mmu;
@@ -905,6 +1036,8 @@ int set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned lo
mmu = find_mm_uprobe(mm, auprobe, vaddr);
if (!mmu)
return 0;
+ if (mmu->optimized)
+ WARN_ON_ONCE(swbp_unoptimize(auprobe, mm, vaddr));
destroy_mm_uprobe(mmu, &mm->uprobes_state.root_uprobes);
return uprobe_write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)&auprobe->insn, true);
}
@@ -937,6 +1070,41 @@ static bool emulate_nop5_insn(struct arch_uprobe *auprobe)
{
return is_nop5_insn((uprobe_opcode_t *) &auprobe->insn);
}
+
+void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr)
+{
+ struct mm_struct *mm = current->mm;
+ struct uprobe_trampoline *tramp;
+ struct mm_uprobe *mmu;
+
+ if (!test_bit(ARCH_UPROBE_FLAG_CAN_OPTIMIZE, &auprobe->flags))
+ return;
+
+ mmap_write_lock(mm);
+ mmu = find_mm_uprobe(mm, auprobe, vaddr);
+ if (!mmu || mmu->optimized)
+ goto unlock;
+
+ tramp = uprobe_trampoline_get(vaddr);
+ if (!tramp)
+ goto unlock;
+
+ if (WARN_ON_ONCE(swbp_optimize(auprobe, mm, vaddr, tramp->vaddr)))
+ uprobe_trampoline_put(tramp);
+ else
+ mmu->optimized = true;
+
+unlock:
+ mmap_write_unlock(mm);
+}
+
+static bool can_optimize(struct arch_uprobe *auprobe, unsigned long vaddr)
+{
+ if (!is_nop5_insn((uprobe_opcode_t *) &auprobe->insn))
+ return false;
+ /* We can't do cross page atomic writes yet. */
+ return PAGE_SIZE - (vaddr & ~PAGE_MASK) >= 5;
+}
#else /* 32-bit: */
/*
* No RIP-relative addressing on 32-bit
@@ -954,6 +1122,10 @@ static bool emulate_nop5_insn(struct arch_uprobe *auprobe)
{
return false;
}
+static bool can_optimize(struct arch_uprobe *auprobe, unsigned long vaddr)
+{
+ return false;
+}
#endif /* CONFIG_X86_64 */
struct uprobe_xol_ops {
@@ -1317,6 +1489,9 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
if (ret)
return ret;
+ if (can_optimize(auprobe, addr))
+ set_bit(ARCH_UPROBE_FLAG_CAN_OPTIMIZE, &auprobe->flags);
+
ret = branch_setup_xol_ops(auprobe, &insn);
if (ret != -ENOSYS)
return ret;
@@ -1523,15 +1698,23 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
{
int rasize = sizeof_long(regs), nleft;
unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */
+ unsigned long off = 0;
+
+ /*
+ * Optimized uprobe goes through uprobe trampoline which adds 4 8-byte
+ * values on stack, check uprobe_trampoline_entry for details.
+ */
+ if (!swbp)
+ off = 4*8;
- if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize))
+ if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp + off, rasize))
return -1;
/* check whether address has been already hijacked */
if (orig_ret_vaddr == trampoline_vaddr)
return orig_ret_vaddr;
- nleft = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize);
+ nleft = copy_to_user((void __user *)regs->sp + off, &trampoline_vaddr, rasize);
if (likely(!nleft)) {
if (shstk_update_last_frame(trampoline_vaddr)) {
force_sig(SIGSEGV);
@@ -190,7 +190,8 @@ struct uprobes_state {
#endif
};
-typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode, int nbytes);
+typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode,
+ int nbytes, void *data);
extern void __init uprobes_init(void);
extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
@@ -202,7 +203,7 @@ extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr,
uprobe_opcode_t, bool);
extern int uprobe_write(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr,
- uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool orig);
+ uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool orig, void *data);
extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
@@ -239,6 +240,7 @@ extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *
extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr);
extern void arch_uprobe_clear_state(struct mm_struct *mm);
extern void arch_uprobe_init_state(struct mm_struct *mm);
+extern void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr);
#else /* !CONFIG_UPROBES */
struct uprobes_state {
};
@@ -264,7 +264,8 @@ static void uprobe_copy_to_page(struct page *page, unsigned long vaddr, const vo
kunmap_atomic(kaddr);
}
-static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode, int nbytes)
+static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode,
+ int nbytes, void *data)
{
uprobe_opcode_t old_opcode;
bool is_swbp;
@@ -473,12 +474,12 @@ static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
unsigned long vaddr, uprobe_opcode_t opcode, bool orig)
{
- return uprobe_write(auprobe, mm, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE, verify_opcode, orig);
+ return uprobe_write(auprobe, mm, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE, verify_opcode, orig, NULL);
}
int uprobe_write(struct arch_uprobe *auprobe, struct mm_struct *mm,
unsigned long vaddr, uprobe_opcode_t *insn,
- int nbytes, uprobe_write_verify_t verify, bool orig)
+ int nbytes, uprobe_write_verify_t verify, bool orig, void *data)
{
struct page *old_page, *new_page;
struct vm_area_struct *vma;
@@ -494,7 +495,7 @@ int uprobe_write(struct arch_uprobe *auprobe, struct mm_struct *mm,
if (IS_ERR(old_page))
return PTR_ERR(old_page);
- ret = verify(old_page, vaddr, insn, nbytes);
+ ret = verify(old_page, vaddr, insn, nbytes, data);
if (ret <= 0)
goto put_old;
@@ -2668,6 +2669,10 @@ bool __weak arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check c
return true;
}
+void __weak arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr)
+{
+}
+
/*
* Run handler and ask thread to singlestep.
* Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
@@ -2732,6 +2737,9 @@ static void handle_swbp(struct pt_regs *regs)
handler_chain(uprobe, regs, true);
+ /* Try to optimize after first hit. */
+ arch_uprobe_optimize(&uprobe->arch, bp_vaddr);
+
if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
goto out;
Putting together all the previously added pieces to support optimized uprobes on top of 5-byte nop instruction. The current uprobe execution goes through following: - installs breakpoint instruction over original instruction - exception handler hit and calls related uprobe consumers - and either simulates original instruction or does out of line single step execution of it - returns to user space The optimized uprobe path - checks the original instruction is 5-byte nop (plus other checks) - adds (or uses existing) user space trampoline and overwrites original instruction (5-byte nop) with call to user space trampoline - the user space trampoline executes uprobe syscall that calls related uprobe consumers - trampoline returns back to next instruction This approach won't speed up all uprobes as it's limited to using nop5 as original instruction, but we could use nop5 as USDT probe instruction (which uses single byte nop ATM) and speed up the USDT probes. This patch overloads related arch functions in uprobe_write_opcode and set_orig_insn so they can install call instruction if needed. The arch_uprobe_optimize triggers the uprobe optimization and is called after first uprobe hit. I originally had it called on uprobe installation but then it clashed with elf loader, because the user space trampoline was added in a place where loader might need to put elf segments, so I decided to do it after first uprobe hit when loading is done. We do not unmap and release uprobe trampoline when it's no longer needed, because there's no easy way to make sure none of the threads is still inside the trampoline. But we do not waste memory, because there's just single page for all the uprobe trampoline mappings. We do waste frmae on page mapping for every 4GB by keeping the uprobe trampoline page mapped, but that seems ok. Attaching the speed up from benchs/run_bench_uprobes.sh script: current: usermode-count : 818.836 ± 2.842M/s syscall-count : 8.917 ± 0.003M/s uprobe-nop : 3.056 ± 0.013M/s uprobe-push : 2.903 ± 0.002M/s uprobe-ret : 1.533 ± 0.001M/s --> uprobe-nop5 : 1.492 ± 0.000M/s uretprobe-nop : 1.783 ± 0.000M/s uretprobe-push : 1.672 ± 0.001M/s uretprobe-ret : 1.067 ± 0.002M/s --> uretprobe-nop5 : 1.052 ± 0.000M/s after the change: usermode-count : 818.386 ± 1.886M/s syscall-count : 8.923 ± 0.003M/s uprobe-nop : 3.086 ± 0.005M/s uprobe-push : 2.751 ± 0.001M/s uprobe-ret : 1.481 ± 0.000M/s --> uprobe-nop5 : 4.016 ± 0.002M/s uretprobe-nop : 1.712 ± 0.008M/s uretprobe-push : 1.616 ± 0.001M/s uretprobe-ret : 1.052 ± 0.000M/s --> uretprobe-nop5 : 2.015 ± 0.000M/s Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- arch/x86/include/asm/uprobes.h | 6 ++ arch/x86/kernel/uprobes.c | 191 ++++++++++++++++++++++++++++++++- include/linux/uprobes.h | 6 +- kernel/events/uprobes.c | 16 ++- 4 files changed, 209 insertions(+), 10 deletions(-)