diff mbox series

[06/12] uprobes: add batch uprobe register/unregister APIs

Message ID 20240625002144.3485799-7-andrii@kernel.org (mailing list archive)
State Superseded
Headers show
Series uprobes: add batched register/unregister APIs and per-CPU RW semaphore | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply

Commit Message

Andrii Nakryiko June 25, 2024, 12:21 a.m. UTC
Introduce batch versions of uprobe registration (attachment) and
unregistration (detachment) APIs.

Unregistration is presumed to never fail, so that's easy.

Batch registration can fail, and so the semantics of
uprobe_register_batch() is such that either all uprobe_consumers are
successfully attached or none of them remain attached after the return.

There is no guarantee of atomicity of attachment, though, and so while
batch attachment is proceeding, some uprobes might start firing before
others are completely attached. Even if overall attachment eventually
fails, some successfully attached uprobes might fire and callers have to
be prepared to handle that. This is in no way a regression compared to
current approach of attaching uprobes one-by-one, though.

One crucial implementation detail is the addition of `struct uprobe
*uprobe` field to `struct uprobe_consumer` which is meant for internal
uprobe subsystem usage only. We use this field both as temporary storage
(to avoid unnecessary allocations) and as a back link to associated
uprobe to simplify and speed up uprobe unregistration, as we now can
avoid yet another tree lookup when unregistering uprobe_consumer.

The general direction with uprobe registration implementation is to do
batch attachment in distinct steps, each step performing some set of
checks or actions on all uprobe_consumers before proceeding to the next
phase. This, after some more changes in next patches, allows to batch
locking for each phase and in such a way amortize any long delays that
might be added by writer locks (especially once we switch
uprobes_treelock to per-CPU R/W semaphore later).

Currently, uprobe_register_batch() performs all the sanity checks first.
Then proceeds to allocate-and-insert (we'll split this up further later
on) uprobe instances, as necessary. And then the last step is actual
uprobe registration for all affected VMAs.

We take care to undo all the actions in the event of an error at any
point in this lengthy process, so end result is all-or-nothing, as
described above.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/uprobes.h |  17 ++++
 kernel/events/uprobes.c | 181 +++++++++++++++++++++++++++++-----------
 2 files changed, 147 insertions(+), 51 deletions(-)

Comments

Jiri Olsa June 26, 2024, 11:27 a.m. UTC | #1
On Mon, Jun 24, 2024 at 05:21:38PM -0700, Andrii Nakryiko wrote:

SNIP

> +	for (i = 0; i < cnt; i++) {
> +		uc = get_uprobe_consumer(i, ctx);
> +
> +		/* Each consumer must have at least one set consumer */
> +		if (!uc || (!uc->handler && !uc->ret_handler))
> +			return -EINVAL;
> +		/* Racy, just to catch the obvious mistakes */
> +		if (uc->offset > i_size_read(inode))
> +			return -EINVAL;
> +		if (uc->uprobe)
> +			return -EINVAL;
> +		/*
> +		 * This ensures that copy_from_page(), copy_to_page() and
> +		 * __update_ref_ctr() can't cross page boundary.
> +		 */
> +		if (!IS_ALIGNED(uc->offset, UPROBE_SWBP_INSN_SIZE))
> +			return -EINVAL;
> +		if (!IS_ALIGNED(uc->ref_ctr_offset, sizeof(short)))
> +			return -EINVAL;
> +	}
>  
> -	down_write(&uprobe->register_rwsem);
> -	consumer_add(uprobe, uc);
> -	ret = register_for_each_vma(uprobe, uc);
> -	if (ret)
> -		__uprobe_unregister(uprobe, uc);
> -	up_write(&uprobe->register_rwsem);
> +	for (i = 0; i < cnt; i++) {
> +		uc = get_uprobe_consumer(i, ctx);
>  
> -	if (ret)
> -		put_uprobe(uprobe);
> +		uprobe = alloc_uprobe(inode, uc->offset, uc->ref_ctr_offset);
> +		if (IS_ERR(uprobe)) {
> +			ret = PTR_ERR(uprobe);
> +			goto cleanup_uprobes;
> +		}
> +
> +		uc->uprobe = uprobe;
> +	}
> +
> +	for (i = 0; i < cnt; i++) {
> +		uc = get_uprobe_consumer(i, ctx);
> +		uprobe = uc->uprobe;
> +
> +		down_write(&uprobe->register_rwsem);
> +		consumer_add(uprobe, uc);
> +		ret = register_for_each_vma(uprobe, uc);
> +		if (ret)
> +			__uprobe_unregister(uprobe, uc);
> +		up_write(&uprobe->register_rwsem);
> +
> +		if (ret) {
> +			put_uprobe(uprobe);
> +			goto cleanup_unreg;
> +		}
> +	}
> +
> +	return 0;
>  
> +cleanup_unreg:
> +	/* unregister all uprobes we managed to register until failure */
> +	for (i--; i >= 0; i--) {
> +		uc = get_uprobe_consumer(i, ctx);
> +
> +		down_write(&uprobe->register_rwsem);
> +		__uprobe_unregister(uc->uprobe, uc);
> +		up_write(&uprobe->register_rwsem);
> +	}
> +cleanup_uprobes:

when we jump here from 'goto cleanup_uprobes' not all of the
consumers might have uc->uprobe set up

perhaps we can set cnt = i - 1 before the goto, or just check uc->uprobe below


> +	/* put all the successfully allocated/reused uprobes */
> +	for (i = cnt - 1; i >= 0; i--) {

curious, any reason why we go from the top here?

thanks,
jirka

> +		uc = get_uprobe_consumer(i, ctx);
> +
> +		put_uprobe(uc->uprobe);
> +		uc->uprobe = NULL;
> +	}
>  	return ret;
>  }
>  
>  int uprobe_register(struct inode *inode, struct uprobe_consumer *uc)
>  {
> -	return __uprobe_register(inode, uc->offset, uc->ref_ctr_offset, uc);
> +	return uprobe_register_batch(inode, 1, uprobe_consumer_identity, uc);
>  }
>  EXPORT_SYMBOL_GPL(uprobe_register);
>  
> -- 
> 2.43.0
>
Andrii Nakryiko June 26, 2024, 4:44 p.m. UTC | #2
On Wed, Jun 26, 2024 at 4:27 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Mon, Jun 24, 2024 at 05:21:38PM -0700, Andrii Nakryiko wrote:
>
> SNIP
>
> > +     for (i = 0; i < cnt; i++) {
> > +             uc = get_uprobe_consumer(i, ctx);
> > +
> > +             /* Each consumer must have at least one set consumer */
> > +             if (!uc || (!uc->handler && !uc->ret_handler))
> > +                     return -EINVAL;
> > +             /* Racy, just to catch the obvious mistakes */
> > +             if (uc->offset > i_size_read(inode))
> > +                     return -EINVAL;
> > +             if (uc->uprobe)
> > +                     return -EINVAL;
> > +             /*
> > +              * This ensures that copy_from_page(), copy_to_page() and
> > +              * __update_ref_ctr() can't cross page boundary.
> > +              */
> > +             if (!IS_ALIGNED(uc->offset, UPROBE_SWBP_INSN_SIZE))
> > +                     return -EINVAL;
> > +             if (!IS_ALIGNED(uc->ref_ctr_offset, sizeof(short)))
> > +                     return -EINVAL;
> > +     }
> >
> > -     down_write(&uprobe->register_rwsem);
> > -     consumer_add(uprobe, uc);
> > -     ret = register_for_each_vma(uprobe, uc);
> > -     if (ret)
> > -             __uprobe_unregister(uprobe, uc);
> > -     up_write(&uprobe->register_rwsem);
> > +     for (i = 0; i < cnt; i++) {
> > +             uc = get_uprobe_consumer(i, ctx);
> >
> > -     if (ret)
> > -             put_uprobe(uprobe);
> > +             uprobe = alloc_uprobe(inode, uc->offset, uc->ref_ctr_offset);
> > +             if (IS_ERR(uprobe)) {
> > +                     ret = PTR_ERR(uprobe);
> > +                     goto cleanup_uprobes;
> > +             }
> > +
> > +             uc->uprobe = uprobe;
> > +     }
> > +
> > +     for (i = 0; i < cnt; i++) {
> > +             uc = get_uprobe_consumer(i, ctx);
> > +             uprobe = uc->uprobe;
> > +
> > +             down_write(&uprobe->register_rwsem);
> > +             consumer_add(uprobe, uc);
> > +             ret = register_for_each_vma(uprobe, uc);
> > +             if (ret)
> > +                     __uprobe_unregister(uprobe, uc);
> > +             up_write(&uprobe->register_rwsem);
> > +
> > +             if (ret) {
> > +                     put_uprobe(uprobe);
> > +                     goto cleanup_unreg;
> > +             }
> > +     }
> > +
> > +     return 0;
> >
> > +cleanup_unreg:
> > +     /* unregister all uprobes we managed to register until failure */
> > +     for (i--; i >= 0; i--) {
> > +             uc = get_uprobe_consumer(i, ctx);
> > +
> > +             down_write(&uprobe->register_rwsem);
> > +             __uprobe_unregister(uc->uprobe, uc);
> > +             up_write(&uprobe->register_rwsem);
> > +     }
> > +cleanup_uprobes:
>
> when we jump here from 'goto cleanup_uprobes' not all of the
> consumers might have uc->uprobe set up
>
> perhaps we can set cnt = i - 1 before the goto, or just check uc->uprobe below

yep, you are right, I missed this part during multiple rounds of
refactorings. I think the `if (uc->uprobe)` check is the cleanest
approach here.

>
>
> > +     /* put all the successfully allocated/reused uprobes */
> > +     for (i = cnt - 1; i >= 0; i--) {
>
> curious, any reason why we go from the top here?

No particular reason. This started as (i = i - 1; i >= 0; i--), but
then as I kept splitting steps I needed to do this over all uprobes.
Anyways, I can do a clean `i = 0; i < cnt; i++` with `if (uc->uprobe)`
check.

>
> thanks,
> jirka
>
> > +             uc = get_uprobe_consumer(i, ctx);
> > +
> > +             put_uprobe(uc->uprobe);
> > +             uc->uprobe = NULL;
> > +     }
> >       return ret;
> >  }
> >
> >  int uprobe_register(struct inode *inode, struct uprobe_consumer *uc)
> >  {
> > -     return __uprobe_register(inode, uc->offset, uc->ref_ctr_offset, uc);
> > +     return uprobe_register_batch(inode, 1, uprobe_consumer_identity, uc);
> >  }
> >  EXPORT_SYMBOL_GPL(uprobe_register);
> >
> > --
> > 2.43.0
> >
Masami Hiramatsu (Google) June 27, 2024, 1:04 p.m. UTC | #3
On Mon, 24 Jun 2024 17:21:38 -0700
Andrii Nakryiko <andrii@kernel.org> wrote:

> -static int __uprobe_register(struct inode *inode, loff_t offset,
> -			     loff_t ref_ctr_offset, struct uprobe_consumer *uc)
> +int uprobe_register_batch(struct inode *inode, int cnt,
> +			  uprobe_consumer_fn get_uprobe_consumer, void *ctx)

Is this interface just for avoiding memory allocation? Can't we just
allocate a temporary array of *uprobe_consumer instead? 

Thank you,
Andrii Nakryiko June 27, 2024, 4:47 p.m. UTC | #4
On Thu, Jun 27, 2024 at 6:04 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Mon, 24 Jun 2024 17:21:38 -0700
> Andrii Nakryiko <andrii@kernel.org> wrote:
>
> > -static int __uprobe_register(struct inode *inode, loff_t offset,
> > -                          loff_t ref_ctr_offset, struct uprobe_consumer *uc)
> > +int uprobe_register_batch(struct inode *inode, int cnt,
> > +                       uprobe_consumer_fn get_uprobe_consumer, void *ctx)
>
> Is this interface just for avoiding memory allocation? Can't we just
> allocate a temporary array of *uprobe_consumer instead?

Yes, exactly, to avoid the need for allocating another array that
would just contain pointers to uprobe_consumer. Consumers would never
just have an array of `struct uprobe_consumer *`, because
uprobe_consumer struct is embedded in some other struct, so the array
interface isn't the most convenient.

If you feel strongly, I can do an array, but this necessitates
allocating an extra array *and keeping it* for the entire duration of
BPF multi-uprobe link (attachment) existence, so it feels like a
waste. This is because we don't want to do anything that can fail in
the detachment logic (so no temporary array allocation there).

Anyways, let me know how you feel about keeping this callback.

>
> Thank you,
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
Masami Hiramatsu (Google) June 28, 2024, 6:28 a.m. UTC | #5
On Thu, 27 Jun 2024 09:47:10 -0700
Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:

> On Thu, Jun 27, 2024 at 6:04 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> >
> > On Mon, 24 Jun 2024 17:21:38 -0700
> > Andrii Nakryiko <andrii@kernel.org> wrote:
> >
> > > -static int __uprobe_register(struct inode *inode, loff_t offset,
> > > -                          loff_t ref_ctr_offset, struct uprobe_consumer *uc)
> > > +int uprobe_register_batch(struct inode *inode, int cnt,
> > > +                       uprobe_consumer_fn get_uprobe_consumer, void *ctx)
> >
> > Is this interface just for avoiding memory allocation? Can't we just
> > allocate a temporary array of *uprobe_consumer instead?
> 
> Yes, exactly, to avoid the need for allocating another array that
> would just contain pointers to uprobe_consumer. Consumers would never
> just have an array of `struct uprobe_consumer *`, because
> uprobe_consumer struct is embedded in some other struct, so the array
> interface isn't the most convenient.

OK, I understand it.

> 
> If you feel strongly, I can do an array, but this necessitates
> allocating an extra array *and keeping it* for the entire duration of
> BPF multi-uprobe link (attachment) existence, so it feels like a
> waste. This is because we don't want to do anything that can fail in
> the detachment logic (so no temporary array allocation there).

No need to change it, that sounds reasonable.

> 
> Anyways, let me know how you feel about keeping this callback.

IMHO, maybe the interface function is better to change to
`uprobe_consumer *next_uprobe_consumer(void **data)`. If caller
side uses a linked list of structure, index access will need to
follow the list every time.

Thank you,


> 
> >
> > Thank you,
> >
> > --
> > Masami Hiramatsu (Google) <mhiramat@kernel.org>
Andrii Nakryiko June 28, 2024, 4:34 p.m. UTC | #6
On Thu, Jun 27, 2024 at 11:28 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Thu, 27 Jun 2024 09:47:10 -0700
> Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> > On Thu, Jun 27, 2024 at 6:04 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > >
> > > On Mon, 24 Jun 2024 17:21:38 -0700
> > > Andrii Nakryiko <andrii@kernel.org> wrote:
> > >
> > > > -static int __uprobe_register(struct inode *inode, loff_t offset,
> > > > -                          loff_t ref_ctr_offset, struct uprobe_consumer *uc)
> > > > +int uprobe_register_batch(struct inode *inode, int cnt,
> > > > +                       uprobe_consumer_fn get_uprobe_consumer, void *ctx)
> > >
> > > Is this interface just for avoiding memory allocation? Can't we just
> > > allocate a temporary array of *uprobe_consumer instead?
> >
> > Yes, exactly, to avoid the need for allocating another array that
> > would just contain pointers to uprobe_consumer. Consumers would never
> > just have an array of `struct uprobe_consumer *`, because
> > uprobe_consumer struct is embedded in some other struct, so the array
> > interface isn't the most convenient.
>
> OK, I understand it.
>
> >
> > If you feel strongly, I can do an array, but this necessitates
> > allocating an extra array *and keeping it* for the entire duration of
> > BPF multi-uprobe link (attachment) existence, so it feels like a
> > waste. This is because we don't want to do anything that can fail in
> > the detachment logic (so no temporary array allocation there).
>
> No need to change it, that sounds reasonable.
>

Great, thanks.

> >
> > Anyways, let me know how you feel about keeping this callback.
>
> IMHO, maybe the interface function is better to change to
> `uprobe_consumer *next_uprobe_consumer(void **data)`. If caller
> side uses a linked list of structure, index access will need to
> follow the list every time.

This would be problematic. Note how we call get_uprobe_consumer(i,
ctx) with i going from 0 to N in multiple independent loops. So if we
are only allowed to ask for the next consumer, then
uprobe_register_batch and uprobe_unregister_batch would need to build
its own internal index and remember ith instance. Which again means
more allocations and possibly failing uprobe_unregister_batch(), which
isn't great.

For now this API works well, I propose to keep it as is. For linked
list case consumers would need to allocate one extra array or pay the
price of O(N) search (which might be ok, depending on how many uprobes
are being attached). But we don't have such consumers right now,
thankfully.

>
> Thank you,
>
>
> >
> > >
> > > Thank you,
> > >
> > > --
> > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
Masami Hiramatsu (Google) June 29, 2024, 11:30 p.m. UTC | #7
On Fri, 28 Jun 2024 09:34:26 -0700
Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:

> On Thu, Jun 27, 2024 at 11:28 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> >
> > On Thu, 27 Jun 2024 09:47:10 -0700
> > Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> >
> > > On Thu, Jun 27, 2024 at 6:04 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > > >
> > > > On Mon, 24 Jun 2024 17:21:38 -0700
> > > > Andrii Nakryiko <andrii@kernel.org> wrote:
> > > >
> > > > > -static int __uprobe_register(struct inode *inode, loff_t offset,
> > > > > -                          loff_t ref_ctr_offset, struct uprobe_consumer *uc)
> > > > > +int uprobe_register_batch(struct inode *inode, int cnt,
> > > > > +                       uprobe_consumer_fn get_uprobe_consumer, void *ctx)
> > > >
> > > > Is this interface just for avoiding memory allocation? Can't we just
> > > > allocate a temporary array of *uprobe_consumer instead?
> > >
> > > Yes, exactly, to avoid the need for allocating another array that
> > > would just contain pointers to uprobe_consumer. Consumers would never
> > > just have an array of `struct uprobe_consumer *`, because
> > > uprobe_consumer struct is embedded in some other struct, so the array
> > > interface isn't the most convenient.
> >
> > OK, I understand it.
> >
> > >
> > > If you feel strongly, I can do an array, but this necessitates
> > > allocating an extra array *and keeping it* for the entire duration of
> > > BPF multi-uprobe link (attachment) existence, so it feels like a
> > > waste. This is because we don't want to do anything that can fail in
> > > the detachment logic (so no temporary array allocation there).
> >
> > No need to change it, that sounds reasonable.
> >
> 
> Great, thanks.
> 
> > >
> > > Anyways, let me know how you feel about keeping this callback.
> >
> > IMHO, maybe the interface function is better to change to
> > `uprobe_consumer *next_uprobe_consumer(void **data)`. If caller
> > side uses a linked list of structure, index access will need to
> > follow the list every time.
> 
> This would be problematic. Note how we call get_uprobe_consumer(i,
> ctx) with i going from 0 to N in multiple independent loops. So if we
> are only allowed to ask for the next consumer, then
> uprobe_register_batch and uprobe_unregister_batch would need to build
> its own internal index and remember ith instance. Which again means
> more allocations and possibly failing uprobe_unregister_batch(), which
> isn't great.

No, I think we can use a cursor variable as;

int uprobe_register_batch(struct inode *inode,
                 uprobe_consumer_fn get_uprobe_consumer, void *ctx)
{
	void *cur = ctx;

	while ((uc = get_uprobe_consumer(&cur)) != NULL) {
		...
	} 

	cur = ctx;
	while ((uc = get_uprobe_consumer(&cur)) != NULL) {
		...
	} 
}

This can also remove the cnt.

Thank you,

> 
> For now this API works well, I propose to keep it as is. For linked
> list case consumers would need to allocate one extra array or pay the
> price of O(N) search (which might be ok, depending on how many uprobes
> are being attached). But we don't have such consumers right now,
> thankfully.
> 
> >
> > Thank you,
> >
> >
> > >
> > > >
> > > > Thank you,
> > > >
> > > > --
> > > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
> >
> >
> > --
> > Masami Hiramatsu (Google) <mhiramat@kernel.org>
Andrii Nakryiko July 1, 2024, 5:55 p.m. UTC | #8
On Sat, Jun 29, 2024 at 4:30 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Fri, 28 Jun 2024 09:34:26 -0700
> Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> > On Thu, Jun 27, 2024 at 11:28 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > >
> > > On Thu, 27 Jun 2024 09:47:10 -0700
> > > Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > >
> > > > On Thu, Jun 27, 2024 at 6:04 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > > > >
> > > > > On Mon, 24 Jun 2024 17:21:38 -0700
> > > > > Andrii Nakryiko <andrii@kernel.org> wrote:
> > > > >
> > > > > > -static int __uprobe_register(struct inode *inode, loff_t offset,
> > > > > > -                          loff_t ref_ctr_offset, struct uprobe_consumer *uc)
> > > > > > +int uprobe_register_batch(struct inode *inode, int cnt,
> > > > > > +                       uprobe_consumer_fn get_uprobe_consumer, void *ctx)
> > > > >
> > > > > Is this interface just for avoiding memory allocation? Can't we just
> > > > > allocate a temporary array of *uprobe_consumer instead?
> > > >
> > > > Yes, exactly, to avoid the need for allocating another array that
> > > > would just contain pointers to uprobe_consumer. Consumers would never
> > > > just have an array of `struct uprobe_consumer *`, because
> > > > uprobe_consumer struct is embedded in some other struct, so the array
> > > > interface isn't the most convenient.
> > >
> > > OK, I understand it.
> > >
> > > >
> > > > If you feel strongly, I can do an array, but this necessitates
> > > > allocating an extra array *and keeping it* for the entire duration of
> > > > BPF multi-uprobe link (attachment) existence, so it feels like a
> > > > waste. This is because we don't want to do anything that can fail in
> > > > the detachment logic (so no temporary array allocation there).
> > >
> > > No need to change it, that sounds reasonable.
> > >
> >
> > Great, thanks.
> >
> > > >
> > > > Anyways, let me know how you feel about keeping this callback.
> > >
> > > IMHO, maybe the interface function is better to change to
> > > `uprobe_consumer *next_uprobe_consumer(void **data)`. If caller
> > > side uses a linked list of structure, index access will need to
> > > follow the list every time.
> >
> > This would be problematic. Note how we call get_uprobe_consumer(i,
> > ctx) with i going from 0 to N in multiple independent loops. So if we
> > are only allowed to ask for the next consumer, then
> > uprobe_register_batch and uprobe_unregister_batch would need to build
> > its own internal index and remember ith instance. Which again means
> > more allocations and possibly failing uprobe_unregister_batch(), which
> > isn't great.
>
> No, I think we can use a cursor variable as;
>
> int uprobe_register_batch(struct inode *inode,
>                  uprobe_consumer_fn get_uprobe_consumer, void *ctx)
> {
>         void *cur = ctx;
>
>         while ((uc = get_uprobe_consumer(&cur)) != NULL) {
>                 ...
>         }
>
>         cur = ctx;
>         while ((uc = get_uprobe_consumer(&cur)) != NULL) {
>                 ...
>         }
> }
>
> This can also remove the cnt.

Ok, if you prefer this I'll switch. It's a bit more cumbersome to use
for callers, but we have one right now, and might have another one, so
not a big deal.

>
> Thank you,
>
> >
> > For now this API works well, I propose to keep it as is. For linked
> > list case consumers would need to allocate one extra array or pay the
> > price of O(N) search (which might be ok, depending on how many uprobes
> > are being attached). But we don't have such consumers right now,
> > thankfully.
> >
> > >
> > > Thank you,
> > >
> > >
> > > >
> > > > >
> > > > > Thank you,
> > > > >
> > > > > --
> > > > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > >
> > >
> > > --
> > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
Andrii Nakryiko July 1, 2024, 10:15 p.m. UTC | #9
On Mon, Jul 1, 2024 at 10:55 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Sat, Jun 29, 2024 at 4:30 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> >
> > On Fri, 28 Jun 2024 09:34:26 -0700
> > Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> >
> > > On Thu, Jun 27, 2024 at 11:28 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > > >
> > > > On Thu, 27 Jun 2024 09:47:10 -0700
> > > > Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > > On Thu, Jun 27, 2024 at 6:04 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > > > > >
> > > > > > On Mon, 24 Jun 2024 17:21:38 -0700
> > > > > > Andrii Nakryiko <andrii@kernel.org> wrote:
> > > > > >
> > > > > > > -static int __uprobe_register(struct inode *inode, loff_t offset,
> > > > > > > -                          loff_t ref_ctr_offset, struct uprobe_consumer *uc)
> > > > > > > +int uprobe_register_batch(struct inode *inode, int cnt,
> > > > > > > +                       uprobe_consumer_fn get_uprobe_consumer, void *ctx)
> > > > > >
> > > > > > Is this interface just for avoiding memory allocation? Can't we just
> > > > > > allocate a temporary array of *uprobe_consumer instead?
> > > > >
> > > > > Yes, exactly, to avoid the need for allocating another array that
> > > > > would just contain pointers to uprobe_consumer. Consumers would never
> > > > > just have an array of `struct uprobe_consumer *`, because
> > > > > uprobe_consumer struct is embedded in some other struct, so the array
> > > > > interface isn't the most convenient.
> > > >
> > > > OK, I understand it.
> > > >
> > > > >
> > > > > If you feel strongly, I can do an array, but this necessitates
> > > > > allocating an extra array *and keeping it* for the entire duration of
> > > > > BPF multi-uprobe link (attachment) existence, so it feels like a
> > > > > waste. This is because we don't want to do anything that can fail in
> > > > > the detachment logic (so no temporary array allocation there).
> > > >
> > > > No need to change it, that sounds reasonable.
> > > >
> > >
> > > Great, thanks.
> > >
> > > > >
> > > > > Anyways, let me know how you feel about keeping this callback.
> > > >
> > > > IMHO, maybe the interface function is better to change to
> > > > `uprobe_consumer *next_uprobe_consumer(void **data)`. If caller
> > > > side uses a linked list of structure, index access will need to
> > > > follow the list every time.
> > >
> > > This would be problematic. Note how we call get_uprobe_consumer(i,
> > > ctx) with i going from 0 to N in multiple independent loops. So if we
> > > are only allowed to ask for the next consumer, then
> > > uprobe_register_batch and uprobe_unregister_batch would need to build
> > > its own internal index and remember ith instance. Which again means
> > > more allocations and possibly failing uprobe_unregister_batch(), which
> > > isn't great.
> >
> > No, I think we can use a cursor variable as;
> >
> > int uprobe_register_batch(struct inode *inode,
> >                  uprobe_consumer_fn get_uprobe_consumer, void *ctx)
> > {
> >         void *cur = ctx;
> >
> >         while ((uc = get_uprobe_consumer(&cur)) != NULL) {
> >                 ...
> >         }
> >
> >         cur = ctx;
> >         while ((uc = get_uprobe_consumer(&cur)) != NULL) {
> >                 ...
> >         }
> > }
> >
> > This can also remove the cnt.
>
> Ok, if you prefer this I'll switch. It's a bit more cumbersome to use
> for callers, but we have one right now, and might have another one, so
> not a big deal.
>

Actually, now that I started implementing this, I really-really don't
like it. In the example above you assume by storing and reusing
original ctx value you will reset iteration to the very beginning.
This is not how it works in practice though. Ctx is most probably a
pointer to some struct somewhere with iteration state (e.g., array of
all uprobes + current index), and so get_uprobe_consumer() doesn't
update `void *ctx` itself, it updates the state of that struct.

And so there is no easy and clean way to reset this iterator without
adding another callback or something. At which point it becomes quite
cumbersome and convoluted.

How about this? I'll keep the existing get_uprobe_consumer(idx, ctx)
contract, which works for the only user right now, BPF multi-uprobes.
When it's time to add another consumer that works with a linked list,
we can add another more complicated contract that would do
iterator-style callbacks. This would be used by linked list users, and
we can transparently implement existing uprobe_register_batch()
contract on top of if by implementing a trivial iterator wrapper on
top of get_uprobe_consumer(idx, ctx) approach.

Let's not add unnecessary complications right now given we have a
clear path forward to add it later, if necessary, without breaking
anything. I'll send v2 without changes to get_uprobe_consumer() for
now, hopefully my above plan makes sense to you. Thanks!

> >
> > Thank you,
> >
> > >
> > > For now this API works well, I propose to keep it as is. For linked
> > > list case consumers would need to allocate one extra array or pay the
> > > price of O(N) search (which might be ok, depending on how many uprobes
> > > are being attached). But we don't have such consumers right now,
> > > thankfully.
> > >
> > > >
> > > > Thank you,
> > > >
> > > >
> > > > >
> > > > > >
> > > > > > Thank you,
> > > > > >
> > > > > > --
> > > > > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > >
> > > >
> > > > --
> > > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
> >
> >
> > --
> > Masami Hiramatsu (Google) <mhiramat@kernel.org>
Masami Hiramatsu (Google) July 2, 2024, 1:01 a.m. UTC | #10
On Mon, 1 Jul 2024 15:15:56 -0700
Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:

> On Mon, Jul 1, 2024 at 10:55 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Sat, Jun 29, 2024 at 4:30 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > >
> > > On Fri, 28 Jun 2024 09:34:26 -0700
> > > Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > >
> > > > On Thu, Jun 27, 2024 at 11:28 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > > > >
> > > > > On Thu, 27 Jun 2024 09:47:10 -0700
> > > > > Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > > > >
> > > > > > On Thu, Jun 27, 2024 at 6:04 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > > > > > >
> > > > > > > On Mon, 24 Jun 2024 17:21:38 -0700
> > > > > > > Andrii Nakryiko <andrii@kernel.org> wrote:
> > > > > > >
> > > > > > > > -static int __uprobe_register(struct inode *inode, loff_t offset,
> > > > > > > > -                          loff_t ref_ctr_offset, struct uprobe_consumer *uc)
> > > > > > > > +int uprobe_register_batch(struct inode *inode, int cnt,
> > > > > > > > +                       uprobe_consumer_fn get_uprobe_consumer, void *ctx)
> > > > > > >
> > > > > > > Is this interface just for avoiding memory allocation? Can't we just
> > > > > > > allocate a temporary array of *uprobe_consumer instead?
> > > > > >
> > > > > > Yes, exactly, to avoid the need for allocating another array that
> > > > > > would just contain pointers to uprobe_consumer. Consumers would never
> > > > > > just have an array of `struct uprobe_consumer *`, because
> > > > > > uprobe_consumer struct is embedded in some other struct, so the array
> > > > > > interface isn't the most convenient.
> > > > >
> > > > > OK, I understand it.
> > > > >
> > > > > >
> > > > > > If you feel strongly, I can do an array, but this necessitates
> > > > > > allocating an extra array *and keeping it* for the entire duration of
> > > > > > BPF multi-uprobe link (attachment) existence, so it feels like a
> > > > > > waste. This is because we don't want to do anything that can fail in
> > > > > > the detachment logic (so no temporary array allocation there).
> > > > >
> > > > > No need to change it, that sounds reasonable.
> > > > >
> > > >
> > > > Great, thanks.
> > > >
> > > > > >
> > > > > > Anyways, let me know how you feel about keeping this callback.
> > > > >
> > > > > IMHO, maybe the interface function is better to change to
> > > > > `uprobe_consumer *next_uprobe_consumer(void **data)`. If caller
> > > > > side uses a linked list of structure, index access will need to
> > > > > follow the list every time.
> > > >
> > > > This would be problematic. Note how we call get_uprobe_consumer(i,
> > > > ctx) with i going from 0 to N in multiple independent loops. So if we
> > > > are only allowed to ask for the next consumer, then
> > > > uprobe_register_batch and uprobe_unregister_batch would need to build
> > > > its own internal index and remember ith instance. Which again means
> > > > more allocations and possibly failing uprobe_unregister_batch(), which
> > > > isn't great.
> > >
> > > No, I think we can use a cursor variable as;
> > >
> > > int uprobe_register_batch(struct inode *inode,
> > >                  uprobe_consumer_fn get_uprobe_consumer, void *ctx)
> > > {
> > >         void *cur = ctx;
> > >
> > >         while ((uc = get_uprobe_consumer(&cur)) != NULL) {
> > >                 ...
> > >         }
> > >
> > >         cur = ctx;
> > >         while ((uc = get_uprobe_consumer(&cur)) != NULL) {
> > >                 ...
> > >         }
> > > }
> > >
> > > This can also remove the cnt.
> >
> > Ok, if you prefer this I'll switch. It's a bit more cumbersome to use
> > for callers, but we have one right now, and might have another one, so
> > not a big deal.
> >
> 
> Actually, now that I started implementing this, I really-really don't
> like it. In the example above you assume by storing and reusing
> original ctx value you will reset iteration to the very beginning.
> This is not how it works in practice though. Ctx is most probably a
> pointer to some struct somewhere with iteration state (e.g., array of
> all uprobes + current index), and so get_uprobe_consumer() doesn't
> update `void *ctx` itself, it updates the state of that struct.

Yeah, that should be noted so that if the get_uprobe_consumer() is
called with original `ctx` value, it should return the same.
Ah, and I found we need to pass both ctx and pos...

       while ((uc = get_uprobe_consumer(&cur, ctx)) != NULL) {
                 ...
         }

Usually it is enough to pass the cursor as similar to the other
for_each_* macros. For example, struct foo has .list and .uc, then

struct uprobe_consumer *get_uprobe_consumer_foo(void **pos, void *head)
{
	struct foo *foo = *pos;

	if (!foo)
		return NULL;

	*pos = list_next_entry(foo, list);
	if (list_is_head(pos, (head)))
		*pos = NULL;

	return foo->uc;
}

This works something like this.

#define for_each_uprobe_consumer_from_foo(uc, pos, head) \
	list_for_each_entry(pos, head, list) \
		if (uc = uprobe_consumer_from_foo(pos))

or, for array of *uprobe_consumer (array must be end with NULL), 

struct uprobe_consumer *get_uprobe_consumer_array(void **pos, void *head __unused)
{
	struct uprobe_consumer **uc = *pos;

	if (!*uc)
		return NULL;

	*pos = uc + 1;

	return *uc;
}

But this may not be able to support array of uprobe_consumer. Hmm.


> And so there is no easy and clean way to reset this iterator without
> adding another callback or something. At which point it becomes quite
> cumbersome and convoluted.

If you consider that is problematic, I think we can prepare more
iterator like object;

struct uprobe_consumer_iter_ops {
	struct uprobe_consumer *(*start)(struct uprobe_consumer_iter_ops *);
	struct uprobe_consumer *(*next)(struct uprobe_consumer_iter_ops *);
	void *ctx; // or, just embed the data in this structure.
};


> How about this? I'll keep the existing get_uprobe_consumer(idx, ctx)
> contract, which works for the only user right now, BPF multi-uprobes.
> When it's time to add another consumer that works with a linked list,
> we can add another more complicated contract that would do
> iterator-style callbacks. This would be used by linked list users, and
> we can transparently implement existing uprobe_register_batch()
> contract on top of if by implementing a trivial iterator wrapper on
> top of get_uprobe_consumer(idx, ctx) approach.

Agreed, anyway as far as it uses an array of uprobe_consumer, it works.
When we need to register list of the structure, we may be possible to
allocate an array or introduce new function.

Thank you!

> 
> Let's not add unnecessary complications right now given we have a
> clear path forward to add it later, if necessary, without breaking
> anything. I'll send v2 without changes to get_uprobe_consumer() for
> now, hopefully my above plan makes sense to you. Thanks!
> 
> > >
> > > Thank you,
> > >
> > > >
> > > > For now this API works well, I propose to keep it as is. For linked
> > > > list case consumers would need to allocate one extra array or pay the
> > > > price of O(N) search (which might be ok, depending on how many uprobes
> > > > are being attached). But we don't have such consumers right now,
> > > > thankfully.
> > > >
> > > > >
> > > > > Thank you,
> > > > >
> > > > >
> > > > > >
> > > > > > >
> > > > > > > Thank you,
> > > > > > >
> > > > > > > --
> > > > > > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > > >
> > > > >
> > > > > --
> > > > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > >
> > >
> > > --
> > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
Andrii Nakryiko July 2, 2024, 1:34 a.m. UTC | #11
On Mon, Jul 1, 2024 at 6:01 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Mon, 1 Jul 2024 15:15:56 -0700
> Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> > On Mon, Jul 1, 2024 at 10:55 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Sat, Jun 29, 2024 at 4:30 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > > >
> > > > On Fri, 28 Jun 2024 09:34:26 -0700
> > > > Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > > On Thu, Jun 27, 2024 at 11:28 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > > > > >
> > > > > > On Thu, 27 Jun 2024 09:47:10 -0700
> > > > > > Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > > > > >
> > > > > > > On Thu, Jun 27, 2024 at 6:04 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > > > > > > >
> > > > > > > > On Mon, 24 Jun 2024 17:21:38 -0700
> > > > > > > > Andrii Nakryiko <andrii@kernel.org> wrote:
> > > > > > > >
> > > > > > > > > -static int __uprobe_register(struct inode *inode, loff_t offset,
> > > > > > > > > -                          loff_t ref_ctr_offset, struct uprobe_consumer *uc)
> > > > > > > > > +int uprobe_register_batch(struct inode *inode, int cnt,
> > > > > > > > > +                       uprobe_consumer_fn get_uprobe_consumer, void *ctx)
> > > > > > > >
> > > > > > > > Is this interface just for avoiding memory allocation? Can't we just
> > > > > > > > allocate a temporary array of *uprobe_consumer instead?
> > > > > > >
> > > > > > > Yes, exactly, to avoid the need for allocating another array that
> > > > > > > would just contain pointers to uprobe_consumer. Consumers would never
> > > > > > > just have an array of `struct uprobe_consumer *`, because
> > > > > > > uprobe_consumer struct is embedded in some other struct, so the array
> > > > > > > interface isn't the most convenient.
> > > > > >
> > > > > > OK, I understand it.
> > > > > >
> > > > > > >
> > > > > > > If you feel strongly, I can do an array, but this necessitates
> > > > > > > allocating an extra array *and keeping it* for the entire duration of
> > > > > > > BPF multi-uprobe link (attachment) existence, so it feels like a
> > > > > > > waste. This is because we don't want to do anything that can fail in
> > > > > > > the detachment logic (so no temporary array allocation there).
> > > > > >
> > > > > > No need to change it, that sounds reasonable.
> > > > > >
> > > > >
> > > > > Great, thanks.
> > > > >
> > > > > > >
> > > > > > > Anyways, let me know how you feel about keeping this callback.
> > > > > >
> > > > > > IMHO, maybe the interface function is better to change to
> > > > > > `uprobe_consumer *next_uprobe_consumer(void **data)`. If caller
> > > > > > side uses a linked list of structure, index access will need to
> > > > > > follow the list every time.
> > > > >
> > > > > This would be problematic. Note how we call get_uprobe_consumer(i,
> > > > > ctx) with i going from 0 to N in multiple independent loops. So if we
> > > > > are only allowed to ask for the next consumer, then
> > > > > uprobe_register_batch and uprobe_unregister_batch would need to build
> > > > > its own internal index and remember ith instance. Which again means
> > > > > more allocations and possibly failing uprobe_unregister_batch(), which
> > > > > isn't great.
> > > >
> > > > No, I think we can use a cursor variable as;
> > > >
> > > > int uprobe_register_batch(struct inode *inode,
> > > >                  uprobe_consumer_fn get_uprobe_consumer, void *ctx)
> > > > {
> > > >         void *cur = ctx;
> > > >
> > > >         while ((uc = get_uprobe_consumer(&cur)) != NULL) {
> > > >                 ...
> > > >         }
> > > >
> > > >         cur = ctx;
> > > >         while ((uc = get_uprobe_consumer(&cur)) != NULL) {
> > > >                 ...
> > > >         }
> > > > }
> > > >
> > > > This can also remove the cnt.
> > >
> > > Ok, if you prefer this I'll switch. It's a bit more cumbersome to use
> > > for callers, but we have one right now, and might have another one, so
> > > not a big deal.
> > >
> >
> > Actually, now that I started implementing this, I really-really don't
> > like it. In the example above you assume by storing and reusing
> > original ctx value you will reset iteration to the very beginning.
> > This is not how it works in practice though. Ctx is most probably a
> > pointer to some struct somewhere with iteration state (e.g., array of
> > all uprobes + current index), and so get_uprobe_consumer() doesn't
> > update `void *ctx` itself, it updates the state of that struct.
>
> Yeah, that should be noted so that if the get_uprobe_consumer() is
> called with original `ctx` value, it should return the same.
> Ah, and I found we need to pass both ctx and pos...
>
>        while ((uc = get_uprobe_consumer(&cur, ctx)) != NULL) {
>                  ...
>          }
>
> Usually it is enough to pass the cursor as similar to the other
> for_each_* macros. For example, struct foo has .list and .uc, then
>
> struct uprobe_consumer *get_uprobe_consumer_foo(void **pos, void *head)
> {
>         struct foo *foo = *pos;
>
>         if (!foo)
>                 return NULL;
>
>         *pos = list_next_entry(foo, list);
>         if (list_is_head(pos, (head)))
>                 *pos = NULL;
>
>         return foo->uc;
> }
>
> This works something like this.
>
> #define for_each_uprobe_consumer_from_foo(uc, pos, head) \
>         list_for_each_entry(pos, head, list) \
>                 if (uc = uprobe_consumer_from_foo(pos))
>
> or, for array of *uprobe_consumer (array must be end with NULL),
>
> struct uprobe_consumer *get_uprobe_consumer_array(void **pos, void *head __unused)
> {
>         struct uprobe_consumer **uc = *pos;
>
>         if (!*uc)
>                 return NULL;
>
>         *pos = uc + 1;
>
>         return *uc;
> }
>
> But this may not be able to support array of uprobe_consumer. Hmm.
>
>
> > And so there is no easy and clean way to reset this iterator without
> > adding another callback or something. At which point it becomes quite
> > cumbersome and convoluted.
>
> If you consider that is problematic, I think we can prepare more
> iterator like object;
>
> struct uprobe_consumer_iter_ops {
>         struct uprobe_consumer *(*start)(struct uprobe_consumer_iter_ops *);
>         struct uprobe_consumer *(*next)(struct uprobe_consumer_iter_ops *);
>         void *ctx; // or, just embed the data in this structure.
> };
>

Yeah, I was thinking about something like this for adding a proper
iterator-based interface.

>
> > How about this? I'll keep the existing get_uprobe_consumer(idx, ctx)
> > contract, which works for the only user right now, BPF multi-uprobes.
> > When it's time to add another consumer that works with a linked list,
> > we can add another more complicated contract that would do
> > iterator-style callbacks. This would be used by linked list users, and
> > we can transparently implement existing uprobe_register_batch()
> > contract on top of if by implementing a trivial iterator wrapper on
> > top of get_uprobe_consumer(idx, ctx) approach.
>
> Agreed, anyway as far as it uses an array of uprobe_consumer, it works.
> When we need to register list of the structure, we may be possible to
> allocate an array or introduce new function.
>

Cool, glad we agree. What you propose above with start + next + ctx
seems like a way forward if we need this.

BTW, is this (batched register/unregister APIs) something you'd like
to use from the tracefs-based (or whatever it's called, I mean non-BPF
ones) uprobes as well? Or there is just no way to even specify a batch
of uprobes? Just curious if you had any plans for this.

> Thank you!
>
> >
> > Let's not add unnecessary complications right now given we have a
> > clear path forward to add it later, if necessary, without breaking
> > anything. I'll send v2 without changes to get_uprobe_consumer() for
> > now, hopefully my above plan makes sense to you. Thanks!
> >
> > > >
> > > > Thank you,
> > > >
> > > > >
> > > > > For now this API works well, I propose to keep it as is. For linked
> > > > > list case consumers would need to allocate one extra array or pay the
> > > > > price of O(N) search (which might be ok, depending on how many uprobes
> > > > > are being attached). But we don't have such consumers right now,
> > > > > thankfully.
> > > > >
> > > > > >
> > > > > > Thank you,
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > Thank you,
> > > > > > > >
> > > > > > > > --
> > > > > > > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > > > >
> > > > > >
> > > > > > --
> > > > > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > >
> > > >
> > > > --
> > > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
Masami Hiramatsu (Google) July 2, 2024, 3:19 p.m. UTC | #12
On Mon, 1 Jul 2024 18:34:55 -0700
Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:

> > > How about this? I'll keep the existing get_uprobe_consumer(idx, ctx)
> > > contract, which works for the only user right now, BPF multi-uprobes.
> > > When it's time to add another consumer that works with a linked list,
> > > we can add another more complicated contract that would do
> > > iterator-style callbacks. This would be used by linked list users, and
> > > we can transparently implement existing uprobe_register_batch()
> > > contract on top of if by implementing a trivial iterator wrapper on
> > > top of get_uprobe_consumer(idx, ctx) approach.
> >
> > Agreed, anyway as far as it uses an array of uprobe_consumer, it works.
> > When we need to register list of the structure, we may be possible to
> > allocate an array or introduce new function.
> >
> 
> Cool, glad we agree. What you propose above with start + next + ctx
> seems like a way forward if we need this.
> 
> BTW, is this (batched register/unregister APIs) something you'd like
> to use from the tracefs-based (or whatever it's called, I mean non-BPF
> ones) uprobes as well? Or there is just no way to even specify a batch
> of uprobes? Just curious if you had any plans for this.

No, because current tracefs dynamic event interface is not designed for
batched registration. I think we can expand it to pass wildcard symbols
(for kprobe and fprobe) or list of addresses (for uprobes) for uprobe.
Um, that maybe another good idea.

Thank you!
Steven Rostedt July 2, 2024, 4:53 p.m. UTC | #13
On Wed, 3 Jul 2024 00:19:05 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> > BTW, is this (batched register/unregister APIs) something you'd like
> > to use from the tracefs-based (or whatever it's called, I mean non-BPF
> > ones) uprobes as well? Or there is just no way to even specify a batch
> > of uprobes? Just curious if you had any plans for this.  
> 
> No, because current tracefs dynamic event interface is not designed for
> batched registration. I think we can expand it to pass wildcard symbols
> (for kprobe and fprobe) or list of addresses (for uprobes) for uprobe.
> Um, that maybe another good idea.

I don't see why not. The wild cards were added to the kernel
specifically for the tracefs interface (set_ftrace_filter).

-- Steve
diff mbox series

Patch

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index a75ba37ce3c8..a6e6eb70539d 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -33,6 +33,8 @@  enum uprobe_filter_ctx {
 	UPROBE_FILTER_MMAP,
 };
 
+typedef struct uprobe_consumer *(*uprobe_consumer_fn)(size_t idx, void *ctx);
+
 struct uprobe_consumer {
 	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
 	int (*ret_handler)(struct uprobe_consumer *self,
@@ -48,6 +50,8 @@  struct uprobe_consumer {
 	loff_t ref_ctr_offset;
 	/* for internal uprobe infra use, consumers shouldn't touch fields below */
 	struct uprobe_consumer *next;
+	/* associated uprobe instance (or NULL if consumer isn't attached) */
+	struct uprobe *uprobe;
 };
 
 #ifdef CONFIG_UPROBES
@@ -116,8 +120,12 @@  extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, struct uprobe_consumer *uc);
+extern int uprobe_register_batch(struct inode *inode, int cnt,
+				 uprobe_consumer_fn get_uprobe_consumer, void *ctx);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, struct uprobe_consumer *uc);
+extern void uprobe_unregister_batch(struct inode *inode, int cnt,
+				    uprobe_consumer_fn get_uprobe_consumer, void *ctx);
 extern int uprobe_mmap(struct vm_area_struct *vma);
 extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void uprobe_start_dup_mmap(void);
@@ -160,6 +168,11 @@  uprobe_register(struct inode *inode, struct uprobe_consumer *uc)
 {
 	return -ENOSYS;
 }
+static inline int uprobe_register_batch(struct inode *inode, int cnt,
+					uprobe_consumer_fn get_uprobe_consumer, void *ctx)
+{
+	return -ENOSYS;
+}
 static inline int
 uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
 {
@@ -169,6 +182,10 @@  static inline void
 uprobe_unregister(struct inode *inode, struct uprobe_consumer *uc)
 {
 }
+static inline void uprobe_unregister_batch(struct inode *inode, int cnt,
+					     uprobe_consumer_fn get_uprobe_consumer, void *ctx)
+{
+}
 static inline int uprobe_mmap(struct vm_area_struct *vma)
 {
 	return 0;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2544e8b79bad..846efda614cb 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1194,6 +1194,41 @@  __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
 	(void)register_for_each_vma(uprobe, NULL);
 }
 
+/*
+ * uprobe_unregister_batch - unregister a batch of already registered uprobe
+ * consumers.
+ * @inode: the file in which the probes have to be removed.
+ * @cnt: number of consumers to unregister
+ * @get_uprobe_consumer: a callback that returns Nth uprobe_consumer to attach
+ * @ctx: an arbitrary context passed through into get_uprobe_consumer callback
+ */
+void uprobe_unregister_batch(struct inode *inode, int cnt, uprobe_consumer_fn get_uprobe_consumer, void *ctx)
+{
+	struct uprobe *uprobe;
+	struct uprobe_consumer *uc;
+	int i;
+
+	for (i = 0; i < cnt; i++) {
+		uc = get_uprobe_consumer(i, ctx);
+		uprobe = uc->uprobe;
+
+		if (WARN_ON(!uprobe))
+			continue;
+
+		down_write(&uprobe->register_rwsem);
+		__uprobe_unregister(uprobe, uc);
+		up_write(&uprobe->register_rwsem);
+		put_uprobe(uprobe);
+
+		uc->uprobe = NULL;
+	}
+}
+
+static struct uprobe_consumer *uprobe_consumer_identity(size_t idx, void *ctx)
+{
+	return (struct uprobe_consumer *)ctx;
+}
+
 /*
  * uprobe_unregister - unregister an already registered probe.
  * @inode: the file in which the probe has to be removed.
@@ -1201,84 +1236,128 @@  __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
  */
 void uprobe_unregister(struct inode *inode, struct uprobe_consumer *uc)
 {
-	struct uprobe *uprobe;
-
-	uprobe = find_uprobe(inode, uc->offset);
-	if (WARN_ON(!uprobe))
-		return;
-
-	down_write(&uprobe->register_rwsem);
-	__uprobe_unregister(uprobe, uc);
-	up_write(&uprobe->register_rwsem);
-	put_uprobe(uprobe);
+	uprobe_unregister_batch(inode, 1, uprobe_consumer_identity, uc);
 }
 EXPORT_SYMBOL_GPL(uprobe_unregister);
 
 /*
- * __uprobe_register - register a probe
- * @inode: the file in which the probe has to be placed.
- * @offset: offset from the start of the file.
- * @uc: information on howto handle the probe..
+ * uprobe_register_batch - register a batch of probes for a given inode
+ * @inode: the file in which the probes have to be placed.
+ * @cnt: number of probes to register
+ * @get_uprobe_consumer: a callback that returns Nth uprobe_consumer
+ * @ctx: an arbitrary context passed through into get_uprobe_consumer callback
  *
- * Apart from the access refcount, __uprobe_register() takes a creation
- * refcount (thro alloc_uprobe) if and only if this @uprobe is getting
- * inserted into the rbtree (i.e first consumer for a @inode:@offset
- * tuple).  Creation refcount stops uprobe_unregister from freeing the
- * @uprobe even before the register operation is complete. Creation
- * refcount is released when the last @uc for the @uprobe
- * unregisters. Caller of __uprobe_register() is required to keep @inode
- * (and the containing mount) referenced.
+ * uprobe_consumer instance itself contains offset and (optional)
+ * ref_ctr_offset within inode to attach to.
+ *
+ * On success, each attached uprobe_consumer assumes one refcount taken for
+ * respective uprobe instance (uniquely identified by inode+offset
+ * combination). Each uprobe_consumer is expected to eventually be detached
+ * through uprobe_unregister() or uprobe_unregister_batch() call, dropping
+ * their owning refcount.
+ *
+ * Caller of uprobe_register()/uprobe_register_batch() is required to keep
+ * @inode (and the containing mount) referenced.
+ *
+ * If not all probes are successfully installed, then all the successfully
+ * installed ones are rolled back. Note, there is no atomicity guarantees
+ * w.r.t. batch attachment. Some probes might start firing before batch
+ * attachment is completed. Even more so, some consumers might fire even if
+ * overall batch attachment ultimately fails.
  *
  * Return errno if it cannot successully install probes
  * else return 0 (success)
  */
-static int __uprobe_register(struct inode *inode, loff_t offset,
-			     loff_t ref_ctr_offset, struct uprobe_consumer *uc)
+int uprobe_register_batch(struct inode *inode, int cnt,
+			  uprobe_consumer_fn get_uprobe_consumer, void *ctx)
 {
 	struct uprobe *uprobe;
-	int ret;
-
-	/* Uprobe must have at least one set consumer */
-	if (!uc->handler && !uc->ret_handler)
-		return -EINVAL;
+	struct uprobe_consumer *uc;
+	int ret, i;
 
 	/* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */
 	if (!inode->i_mapping->a_ops->read_folio &&
 	    !shmem_mapping(inode->i_mapping))
 		return -EIO;
-	/* Racy, just to catch the obvious mistakes */
-	if (offset > i_size_read(inode))
-		return -EINVAL;
 
-	/*
-	 * This ensures that copy_from_page(), copy_to_page() and
-	 * __update_ref_ctr() can't cross page boundary.
-	 */
-	if (!IS_ALIGNED(offset, UPROBE_SWBP_INSN_SIZE))
-		return -EINVAL;
-	if (!IS_ALIGNED(ref_ctr_offset, sizeof(short)))
+	if (cnt <= 0 || !get_uprobe_consumer)
 		return -EINVAL;
 
-	uprobe = alloc_uprobe(inode, offset, ref_ctr_offset);
-	if (IS_ERR(uprobe))
-		return PTR_ERR(uprobe);
+	for (i = 0; i < cnt; i++) {
+		uc = get_uprobe_consumer(i, ctx);
+
+		/* Each consumer must have at least one set consumer */
+		if (!uc || (!uc->handler && !uc->ret_handler))
+			return -EINVAL;
+		/* Racy, just to catch the obvious mistakes */
+		if (uc->offset > i_size_read(inode))
+			return -EINVAL;
+		if (uc->uprobe)
+			return -EINVAL;
+		/*
+		 * This ensures that copy_from_page(), copy_to_page() and
+		 * __update_ref_ctr() can't cross page boundary.
+		 */
+		if (!IS_ALIGNED(uc->offset, UPROBE_SWBP_INSN_SIZE))
+			return -EINVAL;
+		if (!IS_ALIGNED(uc->ref_ctr_offset, sizeof(short)))
+			return -EINVAL;
+	}
 
-	down_write(&uprobe->register_rwsem);
-	consumer_add(uprobe, uc);
-	ret = register_for_each_vma(uprobe, uc);
-	if (ret)
-		__uprobe_unregister(uprobe, uc);
-	up_write(&uprobe->register_rwsem);
+	for (i = 0; i < cnt; i++) {
+		uc = get_uprobe_consumer(i, ctx);
 
-	if (ret)
-		put_uprobe(uprobe);
+		uprobe = alloc_uprobe(inode, uc->offset, uc->ref_ctr_offset);
+		if (IS_ERR(uprobe)) {
+			ret = PTR_ERR(uprobe);
+			goto cleanup_uprobes;
+		}
+
+		uc->uprobe = uprobe;
+	}
+
+	for (i = 0; i < cnt; i++) {
+		uc = get_uprobe_consumer(i, ctx);
+		uprobe = uc->uprobe;
+
+		down_write(&uprobe->register_rwsem);
+		consumer_add(uprobe, uc);
+		ret = register_for_each_vma(uprobe, uc);
+		if (ret)
+			__uprobe_unregister(uprobe, uc);
+		up_write(&uprobe->register_rwsem);
+
+		if (ret) {
+			put_uprobe(uprobe);
+			goto cleanup_unreg;
+		}
+	}
+
+	return 0;
 
+cleanup_unreg:
+	/* unregister all uprobes we managed to register until failure */
+	for (i--; i >= 0; i--) {
+		uc = get_uprobe_consumer(i, ctx);
+
+		down_write(&uprobe->register_rwsem);
+		__uprobe_unregister(uc->uprobe, uc);
+		up_write(&uprobe->register_rwsem);
+	}
+cleanup_uprobes:
+	/* put all the successfully allocated/reused uprobes */
+	for (i = cnt - 1; i >= 0; i--) {
+		uc = get_uprobe_consumer(i, ctx);
+
+		put_uprobe(uc->uprobe);
+		uc->uprobe = NULL;
+	}
 	return ret;
 }
 
 int uprobe_register(struct inode *inode, struct uprobe_consumer *uc)
 {
-	return __uprobe_register(inode, uc->offset, uc->ref_ctr_offset, uc);
+	return uprobe_register_batch(inode, 1, uprobe_consumer_identity, uc);
 }
 EXPORT_SYMBOL_GPL(uprobe_register);