Message ID | 20240710163133.GD13298@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | uprobes: future cleanups for review | expand |
On Wed, Jul 10, 2024 at 06:31:33PM +0200, Oleg Nesterov wrote: SNIP > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index 467f358c8ce7..7571811127a2 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -3157,6 +3157,7 @@ struct bpf_uprobe { > loff_t offset; > unsigned long ref_ctr_offset; > u64 cookie; > + struct uprobe *uprobe; > struct uprobe_consumer consumer; > }; > > @@ -3180,10 +3181,8 @@ static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, > { > u32 i; > > - for (i = 0; i < cnt; i++) { > - uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, > - &uprobes[i].consumer); > - } nice, we could also drop path argument now jirka > + for (i = 0; i < cnt; i++) > + uprobe_unregister(uprobes[i].uprobe, &uprobes[i].consumer); > } > > static void bpf_uprobe_multi_link_release(struct bpf_link *link) > @@ -3477,11 +3476,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr > &bpf_uprobe_multi_link_lops, prog); > > for (i = 0; i < cnt; i++) { > - err = uprobe_register(d_real_inode(link->path.dentry), > + uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), > uprobes[i].offset, > uprobes[i].ref_ctr_offset, > &uprobes[i].consumer); > - if (err) { > + if (IS_ERR(uprobes[i].uprobe)) { > + err = PTR_ERR(uprobes[i].uprobe); > bpf_uprobe_unregister(&path, uprobes, i); > goto error_free; > }
On Wed, Jul 10, 2024 at 9:33 AM Oleg Nesterov <oleg@redhat.com> wrote: > > This way uprobe_unregister() and uprobe_apply() do not need find_uprobe() + > put_uprobe(). And to me this change simplifies the code a bit. > > Signed-off-by: Oleg Nesterov <oleg@redhat.com> > --- > include/linux/uprobes.h | 14 ++++++------ > kernel/events/uprobes.c | 45 ++++++++++++------------------------- > kernel/trace/bpf_trace.c | 12 +++++----- > kernel/trace/trace_uprobe.c | 28 +++++++++++------------ > 4 files changed, 41 insertions(+), 58 deletions(-) > > diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h > index aa89a8b67039..399509befcf4 100644 > --- a/include/linux/uprobes.h > +++ b/include/linux/uprobes.h I don't see struct uprobe forward-declared in this header, maybe we should add it? > @@ -110,9 +110,9 @@ extern bool is_trap_insn(uprobe_opcode_t *insn); > extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); > extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); > extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); > -extern int uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); > -extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); > -extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); > +extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); > +extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); > +extern void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc); > extern int uprobe_mmap(struct vm_area_struct *vma); > extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); > extern void uprobe_start_dup_mmap(void); > @@ -147,18 +147,18 @@ static inline void uprobes_init(void) > > #define uprobe_get_trap_addr(regs) instruction_pointer(regs) > > -static inline int > +static inline struct uprobe * > uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc) > { > - return -ENOSYS; > + return ERR_PTR(-ENOSYS); > } > static inline int > -uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add) > +uprobe_apply(struct uprobe* uprobe, struct uprobe_consumer *uc, bool add) > { > return -ENOSYS; > } complete aside, when I was looking at this code I was wondering why we even need uprobe_apply, it looks like some hacky variant of uprobe_register and uprobe_unregister. I didn't dig deeper, but think whether we even need this? If this is just to avoid (for some period) some consumer callback calling, then that could be handled at the consumer side by ignoring such calls. callback call is cheap, it's the int3 handling that's expensive and with uprobe_apply we are already paying it anyways, so what is this for? > static inline void > -uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) > +uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) > { > } > static inline int uprobe_mmap(struct vm_area_struct *vma) [...] > > @@ -1133,41 +1126,39 @@ EXPORT_SYMBOL_GPL(uprobe_unregister); > * refcount is released when the last @uc for the @uprobe > * unregisters. Caller of uprobe_register() is required to keep @inode > * (and the containing mount) referenced. > - * > - * Return errno if it cannot successully install probes > - * else return 0 (success) mention that it never returns NULL, but rather encodes error code inside the pointer on error? It's an important part of the contract. > */ > -int uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, > - struct uprobe_consumer *uc) > +struct uprobe *uprobe_register(struct inode *inode, > + loff_t offset, loff_t ref_ctr_offset, > + struct uprobe_consumer *uc) > { [...] > @@ -1186,35 +1177,27 @@ int uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, > > if (unlikely(ret == -EAGAIN)) > goto retry; > - return ret; > + > + return ret ? ERR_PTR(ret) : uprobe; > } > EXPORT_SYMBOL_GPL(uprobe_register); > > /* this should be /** for doccomment checking (you'd get a warning for missing @uprobe if there was this extra star) > * uprobe_apply - unregister an already registered probe. > - * @inode: the file in which the probe has to be removed. > - * @offset: offset from the start of the file. add @uprobe description now? > * @uc: consumer which wants to add more or remove some breakpoints > * @add: add or remove the breakpoints > */ > -int uprobe_apply(struct inode *inode, loff_t offset, > - struct uprobe_consumer *uc, bool add) > +int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add) > { > - struct uprobe *uprobe; > struct uprobe_consumer *con; > int ret = -ENOENT; > [...] > @@ -3180,10 +3181,8 @@ static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, > { > u32 i; > > - for (i = 0; i < cnt; i++) { > - uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, > - &uprobes[i].consumer); > - } > + for (i = 0; i < cnt; i++) you'll now need !IS_ERR_OR_NULL(uprobes[i].uprobe) check (or just NULL check if you null-out it below) > + uprobe_unregister(uprobes[i].uprobe, &uprobes[i].consumer); > } > > static void bpf_uprobe_multi_link_release(struct bpf_link *link) > @@ -3477,11 +3476,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr > &bpf_uprobe_multi_link_lops, prog); > > for (i = 0; i < cnt; i++) { > - err = uprobe_register(d_real_inode(link->path.dentry), > + uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), will uprobe keep inode alive as long as uprobe is attached? If that's the case we can get rid of link->path (have it only as a local variable which we put as soon as we are done with registration). We can probably do that clean up separately, I'll defer to Jiri. > uprobes[i].offset, > uprobes[i].ref_ctr_offset, > &uprobes[i].consumer); > - if (err) { > + if (IS_ERR(uprobes[i].uprobe)) { > + err = PTR_ERR(uprobes[i].uprobe); we can NULL-out uprobe on error for bpf_uprobe_unregister() to handle only NULL vs non-NULL case or maybe better yet let's just have local struct uprobe variable and only assign it if registration succeeded (still need NULL check in bpf_uprobe_unregister above) > bpf_uprobe_unregister(&path, uprobes, i); > goto error_free; > } [...]
On Wed, Jul 10, 2024 at 9:49 AM Jiri Olsa <olsajiri@gmail.com> wrote: > > On Wed, Jul 10, 2024 at 06:31:33PM +0200, Oleg Nesterov wrote: > > SNIP > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > index 467f358c8ce7..7571811127a2 100644 > > --- a/kernel/trace/bpf_trace.c > > +++ b/kernel/trace/bpf_trace.c > > @@ -3157,6 +3157,7 @@ struct bpf_uprobe { > > loff_t offset; > > unsigned long ref_ctr_offset; > > u64 cookie; > > + struct uprobe *uprobe; > > struct uprobe_consumer consumer; > > }; > > > > @@ -3180,10 +3181,8 @@ static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, > > { > > u32 i; > > > > - for (i = 0; i < cnt; i++) { > > - uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, > > - &uprobes[i].consumer); > > - } > > nice, we could also drop path argument now see my comments to Oleg, I think we can/should get rid of link->path altogether if uprobe itself keeps inode alive. BTW, Jiri, do we have any test for multi-uprobe that simulates partial attachment success/failure (whichever way you want to look at it). It would be super useful to have to check at least some error handling code in the uprobe code base. If we don't, do you mind adding something simple to BPF selftests? > > jirka > > > + for (i = 0; i < cnt; i++) > > + uprobe_unregister(uprobes[i].uprobe, &uprobes[i].consumer); > > } > > > > static void bpf_uprobe_multi_link_release(struct bpf_link *link) > > @@ -3477,11 +3476,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr > > &bpf_uprobe_multi_link_lops, prog); > > > > for (i = 0; i < cnt; i++) { > > - err = uprobe_register(d_real_inode(link->path.dentry), > > + uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), > > uprobes[i].offset, > > uprobes[i].ref_ctr_offset, > > &uprobes[i].consumer); > > - if (err) { > > + if (IS_ERR(uprobes[i].uprobe)) { > > + err = PTR_ERR(uprobes[i].uprobe); > > bpf_uprobe_unregister(&path, uprobes, i); > > goto error_free; > > }
On 07/10, Jiri Olsa wrote: > > > @@ -3180,10 +3181,8 @@ static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, > > { > > u32 i; > > > > - for (i = 0; i < cnt; i++) { > > - uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, > > - &uprobes[i].consumer); > > - } > > nice, we could also drop path argument now Indeed, I missed that. Will send V2 when/if this makes any sense. Thanks! Oleg.
On Wed, Jul 10, 2024 at 11:23:10AM -0700, Andrii Nakryiko wrote: > On Wed, Jul 10, 2024 at 9:49 AM Jiri Olsa <olsajiri@gmail.com> wrote: > > > > On Wed, Jul 10, 2024 at 06:31:33PM +0200, Oleg Nesterov wrote: > > > > SNIP > > > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > > index 467f358c8ce7..7571811127a2 100644 > > > --- a/kernel/trace/bpf_trace.c > > > +++ b/kernel/trace/bpf_trace.c > > > @@ -3157,6 +3157,7 @@ struct bpf_uprobe { > > > loff_t offset; > > > unsigned long ref_ctr_offset; > > > u64 cookie; > > > + struct uprobe *uprobe; > > > struct uprobe_consumer consumer; > > > }; > > > > > > @@ -3180,10 +3181,8 @@ static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, > > > { > > > u32 i; > > > > > > - for (i = 0; i < cnt; i++) { > > > - uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, > > > - &uprobes[i].consumer); > > > - } > > > > nice, we could also drop path argument now > > see my comments to Oleg, I think we can/should get rid of link->path > altogether if uprobe itself keeps inode alive. yea, I was thinking of that, but then it's kind of useful to have it in bpf_uprobe_multi_link_fill_link_info, otherwise we have to take it from first uprobe in the link, but ok, still probably worth to remove it ;-) anyway as you wrote it's ok for follow up cleanup, I'll check on that > > BTW, Jiri, do we have any test for multi-uprobe that simulates partial > attachment success/failure (whichever way you want to look at it). It > would be super useful to have to check at least some error handling > code in the uprobe code base. If we don't, do you mind adding > something simple to BPF selftests? there's test_attach_api_fails, but I think all checked fails are before actually calling uprobe_register function I think there are few ways to fail the uprobe_register, like install it on top of int3.. will check add some test for that jirka > > > > > jirka > > > > > + for (i = 0; i < cnt; i++) > > > + uprobe_unregister(uprobes[i].uprobe, &uprobes[i].consumer); > > > } > > > > > > static void bpf_uprobe_multi_link_release(struct bpf_link *link) > > > @@ -3477,11 +3476,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr > > > &bpf_uprobe_multi_link_lops, prog); > > > > > > for (i = 0; i < cnt; i++) { > > > - err = uprobe_register(d_real_inode(link->path.dentry), > > > + uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), > > > uprobes[i].offset, > > > uprobes[i].ref_ctr_offset, > > > &uprobes[i].consumer); > > > - if (err) { > > > + if (IS_ERR(uprobes[i].uprobe)) { > > > + err = PTR_ERR(uprobes[i].uprobe); > > > bpf_uprobe_unregister(&path, uprobes, i); > > > goto error_free; > > > }
On Wed, Jul 10, 2024 at 12:38 PM Jiri Olsa <olsajiri@gmail.com> wrote: > > On Wed, Jul 10, 2024 at 11:23:10AM -0700, Andrii Nakryiko wrote: > > On Wed, Jul 10, 2024 at 9:49 AM Jiri Olsa <olsajiri@gmail.com> wrote: > > > > > > On Wed, Jul 10, 2024 at 06:31:33PM +0200, Oleg Nesterov wrote: > > > > > > SNIP > > > > > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > > > index 467f358c8ce7..7571811127a2 100644 > > > > --- a/kernel/trace/bpf_trace.c > > > > +++ b/kernel/trace/bpf_trace.c > > > > @@ -3157,6 +3157,7 @@ struct bpf_uprobe { > > > > loff_t offset; > > > > unsigned long ref_ctr_offset; > > > > u64 cookie; > > > > + struct uprobe *uprobe; > > > > struct uprobe_consumer consumer; > > > > }; > > > > > > > > @@ -3180,10 +3181,8 @@ static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, > > > > { > > > > u32 i; > > > > > > > > - for (i = 0; i < cnt; i++) { > > > > - uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, > > > > - &uprobes[i].consumer); > > > > - } > > > > > > nice, we could also drop path argument now > > > > see my comments to Oleg, I think we can/should get rid of link->path > > altogether if uprobe itself keeps inode alive. > > yea, I was thinking of that, but then it's kind of useful to have it in > bpf_uprobe_multi_link_fill_link_info, otherwise we have to take it from > first uprobe in the link, but ok, still probably worth to remove it ;-) if we need it for link_info, probably cleaner to just keep it, no big deal then > > anyway as you wrote it's ok for follow up cleanup, I'll check on that > > > > > BTW, Jiri, do we have any test for multi-uprobe that simulates partial > > attachment success/failure (whichever way you want to look at it). It > > would be super useful to have to check at least some error handling > > code in the uprobe code base. If we don't, do you mind adding > > something simple to BPF selftests? > > there's test_attach_api_fails, but I think all checked fails are before > actually calling uprobe_register function > > I think there are few ways to fail the uprobe_register, like install it > on top of int3.. will check add some test for that > great, thank you! > jirka > > > > > > > > > jirka > > > > > > > + for (i = 0; i < cnt; i++) > > > > + uprobe_unregister(uprobes[i].uprobe, &uprobes[i].consumer); > > > > } > > > > > > > > static void bpf_uprobe_multi_link_release(struct bpf_link *link) > > > > @@ -3477,11 +3476,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr > > > > &bpf_uprobe_multi_link_lops, prog); > > > > > > > > for (i = 0; i < cnt; i++) { > > > > - err = uprobe_register(d_real_inode(link->path.dentry), > > > > + uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), > > > > uprobes[i].offset, > > > > uprobes[i].ref_ctr_offset, > > > > &uprobes[i].consumer); > > > > - if (err) { > > > > + if (IS_ERR(uprobes[i].uprobe)) { > > > > + err = PTR_ERR(uprobes[i].uprobe); > > > > bpf_uprobe_unregister(&path, uprobes, i); > > > > goto error_free; > > > > }
On 07/10, Andrii Nakryiko wrote: > > On Wed, Jul 10, 2024 at 9:33 AM Oleg Nesterov <oleg@redhat.com> wrote: > > > > This way uprobe_unregister() and uprobe_apply() do not need find_uprobe() + > > put_uprobe(). And to me this change simplifies the code a bit. > > > > Signed-off-by: Oleg Nesterov <oleg@redhat.com> > > --- > > include/linux/uprobes.h | 14 ++++++------ > > kernel/events/uprobes.c | 45 ++++++++++++------------------------- > > kernel/trace/bpf_trace.c | 12 +++++----- > > kernel/trace/trace_uprobe.c | 28 +++++++++++------------ > > 4 files changed, 41 insertions(+), 58 deletions(-) > > > > diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h > > index aa89a8b67039..399509befcf4 100644 > > --- a/include/linux/uprobes.h > > +++ b/include/linux/uprobes.h > > I don't see struct uprobe forward-declared in this header, maybe we > should add it? Probably yes, thanks... Although the current code already uses struct uprobes * without forward-declaration at least if CONFIG_UPROBES=y. Thanks, will add. > > static inline int > > -uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add) > > +uprobe_apply(struct uprobe* uprobe, struct uprobe_consumer *uc, bool add) > > { > > return -ENOSYS; > > } > > complete aside, when I was looking at this code I was wondering why we > even need uprobe_apply, it looks like some hacky variant of > uprobe_register and uprobe_unregister. All I can say is that - I can hardly recall this logic, I'll try to do this tomorrow and write another email - in any case this logic is ugly and needs more cleanups - but this patch only tries to simplify this code without any visible changes. > > @@ -1133,41 +1126,39 @@ EXPORT_SYMBOL_GPL(uprobe_unregister); > > * refcount is released when the last @uc for the @uprobe > > * unregisters. Caller of uprobe_register() is required to keep @inode > > * (and the containing mount) referenced. > > - * > > - * Return errno if it cannot successully install probes > > - * else return 0 (success) > > mention that it never returns NULL, but rather encodes error code > inside the pointer on error? It's an important part of the contract. OK... > > /* > > this should be /** for doccomment checking (you'd get a warning for > missing @uprobe if there was this extra star) Well, this is what we have before this patch, but OK > > * uprobe_apply - unregister an already registered probe. > > - * @inode: the file in which the probe has to be removed. > > - * @offset: offset from the start of the file. > > add @uprobe description now? If only I knew what this @uprobe description can say ;) > > @@ -3180,10 +3181,8 @@ static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, > > { > > u32 i; > > > > - for (i = 0; i < cnt; i++) { > > - uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, > > - &uprobes[i].consumer); > > - } > > + for (i = 0; i < cnt; i++) > > you'll now need !IS_ERR_OR_NULL(uprobes[i].uprobe) check (or just NULL > check if you null-out it below) Hmm... are you sure? I'll re-check... See also the end of my email. > > @@ -3477,11 +3476,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr > > &bpf_uprobe_multi_link_lops, prog); > > > > for (i = 0; i < cnt; i++) { > > - err = uprobe_register(d_real_inode(link->path.dentry), > > + uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), > > will uprobe keep inode alive as long as uprobe is attached? Why? No, this patch doesn't (shouldn't) change the current behaviour. The users still need kern_path/path_put to, say, prevent umount. > we can NULL-out uprobe on error for bpf_uprobe_unregister() to handle > only NULL vs non-NULL case Not sure I understand... > or maybe better yet let's just have local struct uprobe variable and > only assign it if registration succeeded We can, but > (still need NULL check in > bpf_uprobe_unregister above) Why? If bpf_uprobe_unregister() is called by bpf_uprobe_multi_link_attach() on error, it passes cnt = i where is the 1st failed uprobe index. IOW, uptobes[i].uprobe can't be IS_ERR_OR_NULL() in the 0..cnt-1 range. If it is called by bpf_uprobe_multi_link_release() then the whole 0..umulti_link->cnt-1 range should be fine? Oleg.
On Wed, Jul 10, 2024 at 1:18 PM Oleg Nesterov <oleg@redhat.com> wrote: > > On 07/10, Andrii Nakryiko wrote: > > > > On Wed, Jul 10, 2024 at 9:33 AM Oleg Nesterov <oleg@redhat.com> wrote: > > > > > > This way uprobe_unregister() and uprobe_apply() do not need find_uprobe() + > > > put_uprobe(). And to me this change simplifies the code a bit. > > > > > > Signed-off-by: Oleg Nesterov <oleg@redhat.com> > > > --- > > > include/linux/uprobes.h | 14 ++++++------ > > > kernel/events/uprobes.c | 45 ++++++++++++------------------------- > > > kernel/trace/bpf_trace.c | 12 +++++----- > > > kernel/trace/trace_uprobe.c | 28 +++++++++++------------ > > > 4 files changed, 41 insertions(+), 58 deletions(-) > > > > > > diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h > > > index aa89a8b67039..399509befcf4 100644 > > > --- a/include/linux/uprobes.h > > > +++ b/include/linux/uprobes.h > > > > I don't see struct uprobe forward-declared in this header, maybe we > > should add it? > > Probably yes, thanks... Although the current code already uses > struct uprobes * without forward-declaration at least if CONFIG_UPROBES=y. > Thanks, will add. > Yep, I saw that and was wondering as well. > > > static inline int > > > -uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add) > > > +uprobe_apply(struct uprobe* uprobe, struct uprobe_consumer *uc, bool add) > > > { > > > return -ENOSYS; > > > } > > > > complete aside, when I was looking at this code I was wondering why we > > even need uprobe_apply, it looks like some hacky variant of > > uprobe_register and uprobe_unregister. > > All I can say is that > > - I can hardly recall this logic, I'll try to do this tomorrow > and write another email > > - in any case this logic is ugly and needs more cleanups > > - but this patch only tries to simplify this code without any > visible changes. yep, that's why it's an aside, up to you > > > > @@ -1133,41 +1126,39 @@ EXPORT_SYMBOL_GPL(uprobe_unregister); > > > * refcount is released when the last @uc for the @uprobe > > > * unregisters. Caller of uprobe_register() is required to keep @inode > > > * (and the containing mount) referenced. > > > - * > > > - * Return errno if it cannot successully install probes > > > - * else return 0 (success) > > > > mention that it never returns NULL, but rather encodes error code > > inside the pointer on error? It's an important part of the contract. > > OK... > > > > /* > > > > this should be /** for doccomment checking (you'd get a warning for > > missing @uprobe if there was this extra star) > > Well, this is what we have before this patch, but OK > > > > * uprobe_apply - unregister an already registered probe. > > > - * @inode: the file in which the probe has to be removed. > > > - * @offset: offset from the start of the file. > > > > add @uprobe description now? > > If only I knew what this @uprobe description can say ;) I'm pointing this out because I accidentally used /** for comment for some function, and I got some bot report about missing argument. I think /** makes sense for documenting "public API" function, so which is why all the above. > > > > @@ -3180,10 +3181,8 @@ static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, > > > { > > > u32 i; > > > > > > - for (i = 0; i < cnt; i++) { > > > - uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, > > > - &uprobes[i].consumer); > > > - } > > > + for (i = 0; i < cnt; i++) > > > > you'll now need !IS_ERR_OR_NULL(uprobes[i].uprobe) check (or just NULL > > check if you null-out it below) > > Hmm... are you sure? I'll re-check... See also the end of my email. no, you are right, it should be fine > > > > @@ -3477,11 +3476,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr > > > &bpf_uprobe_multi_link_lops, prog); > > > > > > for (i = 0; i < cnt; i++) { > > > - err = uprobe_register(d_real_inode(link->path.dentry), > > > + uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), > > > > will uprobe keep inode alive as long as uprobe is attached? > > Why? No, this patch doesn't (shouldn't) change the current behaviour. > > The users still need kern_path/path_put to, say, prevent umount. ok, then link->path has to stay, I was just wondering if we need to keep it alive > > > we can NULL-out uprobe on error for bpf_uprobe_unregister() to handle > > only NULL vs non-NULL case > > Not sure I understand... > > > or maybe better yet let's just have local struct uprobe variable and > > only assign it if registration succeeded > > We can, but > > > (still need NULL check in > > bpf_uprobe_unregister above) > > Why? > > If bpf_uprobe_unregister() is called by bpf_uprobe_multi_link_attach() on > error, it passes cnt = i where is the 1st failed uprobe index. IOW, > uptobes[i].uprobe can't be IS_ERR_OR_NULL() in the 0..cnt-1 range. > > If it is called by bpf_uprobe_multi_link_release() then the whole > 0..umulti_link->cnt-1 range should be fine? yes, you are absolutely right, I missed that for this partial failure case we pass i as count into bpf_uprobe_unregister(), sorry about the noise! please feel free to add my ack for the next revision: Acked-by: Andrii Nakryiko <andrii@kernel.org> > > Oleg. >
On 07/10, Oleg Nesterov wrote: > > -void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) > +void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) > { > - struct uprobe *uprobe; > - > - uprobe = find_uprobe(inode, offset); > - if (WARN_ON(!uprobe)) > - return; > - > down_write(&uprobe->register_rwsem); > __uprobe_unregister(uprobe, uc); > up_write(&uprobe->register_rwsem); > - put_uprobe(uprobe); OK, this is obviously wrong, needs get_uprobe/put_uprobe. __uprobe_unregister() can free this uprobe, so up_write(&uprobe->register_rwsem) is not safe. I'll send V2 on top of Peter's new version. Oleg.
On Thu, Jul 11, 2024 at 2:28 AM Oleg Nesterov <oleg@redhat.com> wrote: > > On 07/10, Oleg Nesterov wrote: > > > > -void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) > > +void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) > > { > > - struct uprobe *uprobe; > > - > > - uprobe = find_uprobe(inode, offset); > > - if (WARN_ON(!uprobe)) > > - return; > > - > > down_write(&uprobe->register_rwsem); > > __uprobe_unregister(uprobe, uc); > > up_write(&uprobe->register_rwsem); > > - put_uprobe(uprobe); > > OK, this is obviously wrong, needs get_uprobe/put_uprobe. __uprobe_unregister() > can free this uprobe, so up_write(&uprobe->register_rwsem) is not safe. uprobe_register(), given it returns an uprobe instance to the caller should keep refcount on it (it belongs to uprobe_consumer). That's what I did for my patches, are you going to do that as well? We basically do the same thing, just interfaces look a bit different. > > I'll send V2 on top of Peter's new version. > > Oleg. >
On 07/11, Andrii Nakryiko wrote: > > On Thu, Jul 11, 2024 at 2:28 AM Oleg Nesterov <oleg@redhat.com> wrote: > > > > On 07/10, Oleg Nesterov wrote: > > > > > > -void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) > > > +void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) > > > { > > > - struct uprobe *uprobe; > > > - > > > - uprobe = find_uprobe(inode, offset); > > > - if (WARN_ON(!uprobe)) > > > - return; > > > - > > > down_write(&uprobe->register_rwsem); > > > __uprobe_unregister(uprobe, uc); > > > up_write(&uprobe->register_rwsem); > > > - put_uprobe(uprobe); > > > > OK, this is obviously wrong, needs get_uprobe/put_uprobe. __uprobe_unregister() > > can free this uprobe, so up_write(&uprobe->register_rwsem) is not safe. > > uprobe_register(), given it returns an uprobe instance to the caller > should keep refcount on it (it belongs to uprobe_consumer). Of course. And again, this patch doesn't change the curent behaviour. > That's > what I did for my patches, are you going to do that as well? > > We basically do the same thing, just interfaces look a bit different. Not sure. Well I do not really know, I didn't read your series to the end, sorry ;) The same for V1/V2 from Peter so far. But let me say this just in case... With or without this change, currently uprobe_consumer doesn't have an "individual" ref to uprobe. The fact that uprobe->consumers != NULL adds a reference. Lets not discuss if this is good or bad right now, this cleanup is only cleanup. ------------------------------------------------------------------------ Now, let me add another "just in case" note to explain what I am going to do in V2. So. this patch should turn uprobe_unregister() into something like void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) { // Ugly !!!! please kill me!!! get_uprobe(uprobe); down_write(&uprobe->register_rwsem); __uprobe_unregister(uprobe, uc); up_write(&uprobe->register_rwsem); put_uprobe(uprobe); } to simplify this change. And the next (simple) patch will kill these get_uprobe + put_uprobe, we just need to shift the (possibly) final put_uprobe() from delete_uprobe() to unregister(). But of course, I will recheck before I send V2. Oleg.
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index aa89a8b67039..399509befcf4 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -110,9 +110,9 @@ extern bool is_trap_insn(uprobe_opcode_t *insn); extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); -extern int uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); -extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); -extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); +extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); +extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); +extern void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void uprobe_start_dup_mmap(void); @@ -147,18 +147,18 @@ static inline void uprobes_init(void) #define uprobe_get_trap_addr(regs) instruction_pointer(regs) -static inline int +static inline struct uprobe * uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc) { - return -ENOSYS; + return ERR_PTR(-ENOSYS); } static inline int -uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add) +uprobe_apply(struct uprobe* uprobe, struct uprobe_consumer *uc, bool add) { return -ENOSYS; } static inline void -uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) +uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) { } static inline int uprobe_mmap(struct vm_area_struct *vma) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index e5b7c6239970..dba41403d7b8 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1100,22 +1100,15 @@ __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) /* * uprobe_unregister - unregister an already registered probe. - * @inode: the file in which the probe has to be removed. + * @uprobe: uprobe to remove * @offset: offset from the start of the file. * @uc: identify which probe if multiple probes are colocated. */ -void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) +void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) { - struct uprobe *uprobe; - - uprobe = find_uprobe(inode, offset); - if (WARN_ON(!uprobe)) - return; - down_write(&uprobe->register_rwsem); __uprobe_unregister(uprobe, uc); up_write(&uprobe->register_rwsem); - put_uprobe(uprobe); } EXPORT_SYMBOL_GPL(uprobe_unregister); @@ -1133,41 +1126,39 @@ EXPORT_SYMBOL_GPL(uprobe_unregister); * refcount is released when the last @uc for the @uprobe * unregisters. Caller of uprobe_register() is required to keep @inode * (and the containing mount) referenced. - * - * Return errno if it cannot successully install probes - * else return 0 (success) */ -int uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, - struct uprobe_consumer *uc) +struct uprobe *uprobe_register(struct inode *inode, + loff_t offset, loff_t ref_ctr_offset, + struct uprobe_consumer *uc) { struct uprobe *uprobe; int ret; /* Uprobe must have at least one set consumer */ if (!uc->handler && !uc->ret_handler) - return -EINVAL; + return ERR_PTR(-EINVAL); /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */ if (!inode->i_mapping->a_ops->read_folio && !shmem_mapping(inode->i_mapping)) - return -EIO; + return ERR_PTR(-EIO); /* Racy, just to catch the obvious mistakes */ if (offset > i_size_read(inode)) - return -EINVAL; + return ERR_PTR(-EINVAL); /* * This ensures that copy_from_page(), copy_to_page() and * __update_ref_ctr() can't cross page boundary. */ if (!IS_ALIGNED(offset, UPROBE_SWBP_INSN_SIZE)) - return -EINVAL; + return ERR_PTR(-EINVAL); if (!IS_ALIGNED(ref_ctr_offset, sizeof(short))) - return -EINVAL; + return ERR_PTR(-EINVAL); retry: uprobe = alloc_uprobe(inode, offset, ref_ctr_offset); if (IS_ERR(uprobe)) - return PTR_ERR(uprobe); + return uprobe; /* * We can race with uprobe_unregister()->delete_uprobe(). @@ -1186,35 +1177,27 @@ int uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, if (unlikely(ret == -EAGAIN)) goto retry; - return ret; + + return ret ? ERR_PTR(ret) : uprobe; } EXPORT_SYMBOL_GPL(uprobe_register); /* * uprobe_apply - unregister an already registered probe. - * @inode: the file in which the probe has to be removed. - * @offset: offset from the start of the file. * @uc: consumer which wants to add more or remove some breakpoints * @add: add or remove the breakpoints */ -int uprobe_apply(struct inode *inode, loff_t offset, - struct uprobe_consumer *uc, bool add) +int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add) { - struct uprobe *uprobe; struct uprobe_consumer *con; int ret = -ENOENT; - uprobe = find_uprobe(inode, offset); - if (WARN_ON(!uprobe)) - return ret; - down_write(&uprobe->register_rwsem); for (con = uprobe->consumers; con && con != uc ; con = con->next) ; if (con) ret = register_for_each_vma(uprobe, add ? uc : NULL); up_write(&uprobe->register_rwsem); - put_uprobe(uprobe); return ret; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 467f358c8ce7..7571811127a2 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3157,6 +3157,7 @@ struct bpf_uprobe { loff_t offset; unsigned long ref_ctr_offset; u64 cookie; + struct uprobe *uprobe; struct uprobe_consumer consumer; }; @@ -3180,10 +3181,8 @@ static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, { u32 i; - for (i = 0; i < cnt; i++) { - uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, - &uprobes[i].consumer); - } + for (i = 0; i < cnt; i++) + uprobe_unregister(uprobes[i].uprobe, &uprobes[i].consumer); } static void bpf_uprobe_multi_link_release(struct bpf_link *link) @@ -3477,11 +3476,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr &bpf_uprobe_multi_link_lops, prog); for (i = 0; i < cnt; i++) { - err = uprobe_register(d_real_inode(link->path.dentry), + uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), uprobes[i].offset, uprobes[i].ref_ctr_offset, &uprobes[i].consumer); - if (err) { + if (IS_ERR(uprobes[i].uprobe)) { + err = PTR_ERR(uprobes[i].uprobe); bpf_uprobe_unregister(&path, uprobes, i); goto error_free; } diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 78a5c40e885a..2872955da202 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -58,8 +58,8 @@ struct trace_uprobe { struct dyn_event devent; struct uprobe_consumer consumer; struct path path; - struct inode *inode; char *filename; + struct uprobe *uprobe; unsigned long offset; unsigned long ref_ctr_offset; unsigned long nhit; @@ -1084,17 +1084,17 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self, static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter) { - int ret; + struct inode *inode = d_real_inode(tu->path.dentry); + struct uprobe *uprobe; tu->consumer.filter = filter; - tu->inode = d_real_inode(tu->path.dentry); - - ret = uprobe_register(tu->inode, tu->offset, tu->ref_ctr_offset, - &tu->consumer); - if (ret) - tu->inode = NULL; + uprobe = uprobe_register(inode, tu->offset, tu->ref_ctr_offset, + &tu->consumer); + if (IS_ERR(uprobe)) + return PTR_ERR(uprobe); - return ret; + tu->uprobe = uprobe; + return 0; } static void __probe_event_disable(struct trace_probe *tp) @@ -1105,11 +1105,11 @@ static void __probe_event_disable(struct trace_probe *tp) WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter)); list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { - if (!tu->inode) + if (!tu->uprobe) continue; - uprobe_unregister(tu->inode, tu->offset, &tu->consumer); - tu->inode = NULL; + uprobe_unregister(tu->uprobe, &tu->consumer); + tu->uprobe = NULL; } } @@ -1306,7 +1306,7 @@ static int uprobe_perf_close(struct trace_event_call *call, return 0; list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { - ret = uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); + ret = uprobe_apply(tu->uprobe, &tu->consumer, false); if (ret) break; } @@ -1330,7 +1330,7 @@ static int uprobe_perf_open(struct trace_event_call *call, return 0; list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { - err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); + err = uprobe_apply(tu->uprobe, &tu->consumer, true); if (err) { uprobe_perf_close(call, event); break;
This way uprobe_unregister() and uprobe_apply() do not need find_uprobe() + put_uprobe(). And to me this change simplifies the code a bit. Signed-off-by: Oleg Nesterov <oleg@redhat.com> --- include/linux/uprobes.h | 14 ++++++------ kernel/events/uprobes.c | 45 ++++++++++++------------------------- kernel/trace/bpf_trace.c | 12 +++++----- kernel/trace/trace_uprobe.c | 28 +++++++++++------------ 4 files changed, 41 insertions(+), 58 deletions(-)