@@ -345,6 +345,7 @@
333 common io_pgetevents sys_io_pgetevents
334 common rseq sys_rseq
335 common uretprobe sys_uretprobe
+336 common uprobe sys_uprobe
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
424 common pidfd_send_signal sys_pidfd_send_signal
@@ -425,6 +425,54 @@ SYSCALL_DEFINE0(uretprobe)
return -1;
}
+SYSCALL_DEFINE0(uprobe)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+ unsigned long bp_vaddr;
+ int err;
+
+ err = copy_from_user(&bp_vaddr, (void __user *)regs->sp + 3*8, sizeof(bp_vaddr));
+ if (err) {
+ force_sig(SIGILL);
+ return -1;
+ }
+
+ handle_syscall_uprobe(regs, bp_vaddr - 5);
+ return 0;
+}
+
+asm (
+ ".pushsection .rodata\n"
+ ".global uprobe_trampoline_entry\n"
+ "uprobe_trampoline_entry:\n"
+ "push %rcx\n"
+ "push %r11\n"
+ "push %rax\n"
+ "movq $" __stringify(__NR_uprobe) ", %rax\n"
+ "syscall\n"
+ "pop %rax\n"
+ "pop %r11\n"
+ "pop %rcx\n"
+ "ret\n"
+ ".global uprobe_trampoline_end\n"
+ "uprobe_trampoline_end:\n"
+ ".popsection\n"
+);
+
+extern __visible u8 uprobe_trampoline_entry[];
+extern __visible u8 uprobe_trampoline_end[];
+
+void *arch_uprobe_trampoline(unsigned long *psize)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+
+ if (user_64bit_mode(regs)) {
+ *psize = uprobe_trampoline_end - uprobe_trampoline_entry;
+ return uprobe_trampoline_entry;
+ }
+ return NULL;
+}
+
/*
* If arch_uprobe->insn doesn't use rip-relative addressing, return
* immediately. Otherwise, rewrite the instruction so that it accesses
@@ -981,6 +981,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on);
asmlinkage long sys_uretprobe(void);
+asmlinkage long sys_uprobe(void);
+
/* pciconfig: alpha, arm, arm64, ia64, sparc */
asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn,
unsigned long off, unsigned long len,
@@ -231,6 +231,8 @@ extern bool arch_uprobe_is_register(uprobe_opcode_t *insn, int len, void *data);
struct tramp_area *get_tramp_area(unsigned long vaddr);
void put_tramp_area(struct tramp_area *area);
bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
+extern void *arch_uprobe_trampoline(unsigned long *psize);
+extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr);
#else /* !CONFIG_UPROBES */
struct uprobes_state {
};
@@ -621,6 +621,11 @@ bool __weak arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr)
return false;
}
+void * __weak arch_uprobe_trampoline(unsigned long *psize)
+{
+ return NULL;
+}
+
static unsigned long find_nearest_page(unsigned long vaddr)
{
struct mm_struct *mm = current->mm;
@@ -673,7 +678,13 @@ static struct tramp_area *create_tramp_area(unsigned long vaddr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
+ unsigned long tramp_size;
struct tramp_area *area;
+ void *tramp;
+
+ tramp = arch_uprobe_trampoline(&tramp_size);
+ if (!tramp)
+ return NULL;
vaddr = find_nearest_page(vaddr);
if (!vaddr)
@@ -690,6 +701,8 @@ static struct tramp_area *create_tramp_area(unsigned long vaddr)
refcount_set(&area->ref, 1);
area->vaddr = vaddr;
+ arch_uprobe_copy_ixol(area->page, 0, tramp, tramp_size);
+
vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
VM_READ|VM_EXEC|VM_MAYEXEC|VM_MAYREAD|VM_DONTCOPY|VM_IO,
&tramp_mapping);
@@ -2757,6 +2770,28 @@ static void handle_swbp(struct pt_regs *regs)
rcu_read_unlock_trace();
}
+void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr)
+{
+ struct uprobe *uprobe;
+ int is_swbp;
+
+ rcu_read_lock_trace();
+ uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
+ if (!uprobe)
+ goto unlock;
+
+ if (!get_utask())
+ goto unlock;
+
+ if (arch_uprobe_ignore(&uprobe->arch, regs))
+ goto unlock;
+
+ handler_chain(uprobe, regs);
+
+unlock:
+ rcu_read_unlock_trace();
+}
+
/*
* Perform required fix-ups and disable singlestep.
* Allow pending signals to take effect.
@@ -392,3 +392,4 @@ COND_SYSCALL(setuid16);
COND_SYSCALL(rseq);
COND_SYSCALL(uretprobe);
+COND_SYSCALL(uprobe);
Adding new uprobe syscall that calls uprobe handlers for given 'breakpoint' address. The idea is that the 'breakpoint' address calls the user space trampoline which executes the uprobe syscall. The syscall handler reads the return address of the initiall call to retrieve the original 'breakpoint' address. With this address we find the related uprobe object and call its consumers. TODO allow to call uprobe syscall only from uprobe trampoline. Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/x86/kernel/uprobes.c | 48 ++++++++++++++++++++++++++ include/linux/syscalls.h | 2 ++ include/linux/uprobes.h | 2 ++ kernel/events/uprobes.c | 35 +++++++++++++++++++ kernel/sys_ni.c | 1 + 6 files changed, 89 insertions(+)