diff mbox series

[sched_ext/for-6.15,v3,3/5] sched_ext: Add scx_kfunc_ids_ops_context_sensitive for unified filtering of context-sensitive SCX kfuncs

Message ID AM6PR03MB5080648369E8A4508220133E99C22@AM6PR03MB5080.eurprd03.prod.outlook.com (mailing list archive)
State Not Applicable
Headers show
Series bpf, sched_ext: Make kfunc filters support struct_ops context to reduce runtime overhead | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply, async

Commit Message

Juntong Deng Feb. 26, 2025, 7:28 p.m. UTC
This patch adds scx_kfunc_ids_ops_context_sensitive for unified
filtering of context-sensitive SCX kfuncs.

Currently we need to rely on kfunc id sets to group context-sensitive
SCX kfuncs.

If we add filters to each group kfunc id set separately, it will be
cumbersome. A better approach would be to use different kfunc id sets
for grouping purposes and filtering purposes.

scx_kfunc_ids_ops_context_sensitive is a kfunc id set for filtering
purposes, which contains all context-sensitive SCX kfuncs and implements
filtering rules for different contexts in the filter (by searching the
kfunc id sets used for grouping purposes).

Now we only need to register scx_kfunc_ids_ops_context_sensitive, no
longer need to register multiple context-sensitive kfunc id sets.

In addition, this patch adds the SCX_MOFF_IDX macro to facilitate the
calculation of idx based on moff.

Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
---
 kernel/sched/ext.c      | 110 ++++++++++++++++++++++++++++++----------
 kernel/sched/ext_idle.c |   8 +--
 2 files changed, 83 insertions(+), 35 deletions(-)

Comments

Tejun Heo Feb. 27, 2025, 8:25 p.m. UTC | #1
Hello,

On Wed, Feb 26, 2025 at 07:28:18PM +0000, Juntong Deng wrote:
> +BTF_KFUNCS_START(scx_kfunc_ids_ops_context_sensitive)
> +/* scx_kfunc_ids_select_cpu */
> +BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU)
> +/* scx_kfunc_ids_enqueue_dispatch */
> +BTF_ID_FLAGS(func, scx_bpf_dsq_insert, KF_RCU)
> +BTF_ID_FLAGS(func, scx_bpf_dsq_insert_vtime, KF_RCU)
> +BTF_ID_FLAGS(func, scx_bpf_dispatch, KF_RCU)
> +BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime, KF_RCU)
> +/* scx_kfunc_ids_dispatch */
> +BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots)
> +BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel)
> +BTF_ID_FLAGS(func, scx_bpf_dsq_move_to_local)
> +BTF_ID_FLAGS(func, scx_bpf_consume)
> +/* scx_kfunc_ids_cpu_release */
> +BTF_ID_FLAGS(func, scx_bpf_reenqueue_local)
> +/* scx_kfunc_ids_unlocked */
> +BTF_ID_FLAGS(func, scx_bpf_create_dsq, KF_SLEEPABLE)
> +/* Intersection of scx_kfunc_ids_dispatch and scx_kfunc_ids_unlocked */
> +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice)
> +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime)
> +BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU)
> +BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU)
> +BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice)
> +BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime)
> +BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
> +BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
> +BTF_KFUNCS_END(scx_kfunc_ids_ops_context_sensitive)

I'm not a big fan of repeating the kfuncs. This is going to be error-prone.
Can't it register and test the existing sets in the filter function instead?
If that's too cumbersome, maybe we can put these in a macro so that we don't
have to repeat the functions?

> +static int scx_kfunc_ids_ops_context_sensitive_filter(const struct bpf_prog *prog, u32 kfunc_id)
> +{
> +	u32 moff, flags;
> +
> +	if (!btf_id_set8_contains(&scx_kfunc_ids_ops_context_sensitive, kfunc_id))
> +		return 0;
> +
> +	if (prog->type == BPF_PROG_TYPE_SYSCALL &&
> +	    btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id))
> +		return 0;

Not from this change but these can probably be allowed from TRACING too.

> +	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS &&
> +	    prog->aux->st_ops != &bpf_sched_ext_ops)
> +		return 0;

Why can't other struct_ops progs call scx_kfunc_ids_unlocked kfuncs?

> +	/* prog->type == BPF_PROG_TYPE_STRUCT_OPS && prog->aux->st_ops == &bpf_sched_ext_ops*/
> +
> +	moff = prog->aux->attach_st_ops_member_off;
> +	flags = scx_ops_context_flags[SCX_MOFF_IDX(moff)];
> +
> +	if ((flags & SCX_OPS_KF_UNLOCKED) &&
> +	    btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id))
> +		return 0;

Wouldn't this disallow e.g. ops.dispatch() from calling scx_dsq_move()?

Have you tested that the before and after behaviors match?

Thanks.
Juntong Deng Feb. 27, 2025, 9:23 p.m. UTC | #2
On 2025/2/27 20:25, Tejun Heo wrote:

>> +/* Each flag corresponds to a btf kfunc id set */
>> +enum scx_ops_kf_flags {
>> +	SCX_OPS_KF_ANY			= 0,
>> +	SCX_OPS_KF_UNLOCKED		= 1 << 1,

> nit: Any specific reason to skip bit 0?

Thanks for your reply.

This is a mistake and will be fixed in the next version.

>> +	[SCX_OP_IDX(exit_task)]			= SCX_OPS_KF_ANY,
>> +	[SCX_OP_IDX(enable)]			= SCX_OPS_KF_ANY,
>> +	[SCX_OP_IDX(disable)]			= SCX_OPS_KF_ANY,
>> +	[SCX_OP_IDX(dump)]			= SCX_OPS_KF_DISPATCH,

> Shouldn't this be SCX_OPS_KF_UNLOCKED?

This is another mistake and will be fixed in the next version.

> Hello,
> 
> On Wed, Feb 26, 2025 at 07:28:18PM +0000, Juntong Deng wrote:
>> +BTF_KFUNCS_START(scx_kfunc_ids_ops_context_sensitive)
>> +/* scx_kfunc_ids_select_cpu */
>> +BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU)
>> +/* scx_kfunc_ids_enqueue_dispatch */
>> +BTF_ID_FLAGS(func, scx_bpf_dsq_insert, KF_RCU)
>> +BTF_ID_FLAGS(func, scx_bpf_dsq_insert_vtime, KF_RCU)
>> +BTF_ID_FLAGS(func, scx_bpf_dispatch, KF_RCU)
>> +BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime, KF_RCU)
>> +/* scx_kfunc_ids_dispatch */
>> +BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots)
>> +BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel)
>> +BTF_ID_FLAGS(func, scx_bpf_dsq_move_to_local)
>> +BTF_ID_FLAGS(func, scx_bpf_consume)
>> +/* scx_kfunc_ids_cpu_release */
>> +BTF_ID_FLAGS(func, scx_bpf_reenqueue_local)
>> +/* scx_kfunc_ids_unlocked */
>> +BTF_ID_FLAGS(func, scx_bpf_create_dsq, KF_SLEEPABLE)
>> +/* Intersection of scx_kfunc_ids_dispatch and scx_kfunc_ids_unlocked */
>> +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice)
>> +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime)
>> +BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU)
>> +BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU)
>> +BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice)
>> +BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime)
>> +BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
>> +BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
>> +BTF_KFUNCS_END(scx_kfunc_ids_ops_context_sensitive)
> 
> I'm not a big fan of repeating the kfuncs. This is going to be error-prone.
> Can't it register and test the existing sets in the filter function instead?
> If that's too cumbersome, maybe we can put these in a macro so that we don't
> have to repeat the functions?
> 

The core idea of ​​the current design is to separate the kfunc id set used
for filtering purpose and grouping purpose, so that we only need one
filter and do not need to add separate filters for each kfunc id set.
So although kfuncs appear repeatedly in two kfunc id sets, they are
used for different purposes.

scx_kfunc_ids_ops_context_sensitive is only used for filtering purposes
and includes all context-sensitive kfuncs. We need to rely on another
grouping purpose kfunc id set, for example, scx_kfunc_ids_dispatch,
to determine whether a kfunc is allowed to be called in the
dispatch context.

Macro is a good idea, I will try it in the next version.

>> +static int scx_kfunc_ids_ops_context_sensitive_filter(const struct bpf_prog *prog, u32 kfunc_id)
>> +{
>> +	u32 moff, flags;
>> +
>> +	if (!btf_id_set8_contains(&scx_kfunc_ids_ops_context_sensitive, kfunc_id))
>> +		return 0;
>> +
>> +	if (prog->type == BPF_PROG_TYPE_SYSCALL &&
>> +	    btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id))
>> +		return 0;
> 
> Not from this change but these can probably be allowed from TRACING too.
> 

Not sure if it is safe to make these kfuncs available in TRACING.
If Alexei sees this email, could you please leave a comment?

>> +	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS &&
>> +	    prog->aux->st_ops != &bpf_sched_ext_ops)
>> +		return 0;
> 
> Why can't other struct_ops progs call scx_kfunc_ids_unlocked kfuncs?
> 

Return 0 means allowed. So kfuncs in scx_kfunc_ids_unlocked can be
called by other struct_ops programs.

>> +	/* prog->type == BPF_PROG_TYPE_STRUCT_OPS && prog->aux->st_ops == &bpf_sched_ext_ops*/
>> +
>> +	moff = prog->aux->attach_st_ops_member_off;
>> +	flags = scx_ops_context_flags[SCX_MOFF_IDX(moff)];
>> +
>> +	if ((flags & SCX_OPS_KF_UNLOCKED) &&
>> +	    btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id))
>> +		return 0;
> 
> Wouldn't this disallow e.g. ops.dispatch() from calling scx_dsq_move()?
> 

No, because

>> [SCX_OP_IDX(dispatch)] = SCX_OPS_KF_DISPATCH | SCX_OPS_KF_ENQUEUE,

Therefore, kfuncs (scx_bpf_dsq_move_*) in scx_kfunc_ids_dispatch can be
called in the dispatch context.

> Have you tested that the before and after behaviors match?
>

I tested the programs in tools/testing/selftests/sched_ext and
tools/sched_ext and all worked fine.

If there are other cases that are not covered, we may need to add new
test cases.

> Thanks.
>
Tejun Heo Feb. 27, 2025, 9:32 p.m. UTC | #3
Hello,

On Thu, Feb 27, 2025 at 09:23:20PM +0000, Juntong Deng wrote:
> > > +	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS &&
> > > +	    prog->aux->st_ops != &bpf_sched_ext_ops)
> > > +		return 0;
> > 
> > Why can't other struct_ops progs call scx_kfunc_ids_unlocked kfuncs?
> > 
> 
> Return 0 means allowed. So kfuncs in scx_kfunc_ids_unlocked can be
> called by other struct_ops programs.

Hmm... would that mean a non-sched_ext bpf prog would be able to call e.g.
scx_bpf_dsq_insert()?

> > > +	/* prog->type == BPF_PROG_TYPE_STRUCT_OPS && prog->aux->st_ops == &bpf_sched_ext_ops*/
> > > +
> > > +	moff = prog->aux->attach_st_ops_member_off;
> > > +	flags = scx_ops_context_flags[SCX_MOFF_IDX(moff)];
> > > +
> > > +	if ((flags & SCX_OPS_KF_UNLOCKED) &&
> > > +	    btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id))
> > > +		return 0;
> > 
> > Wouldn't this disallow e.g. ops.dispatch() from calling scx_dsq_move()?
> > 
> 
> No, because
> 
> > > [SCX_OP_IDX(dispatch)] = SCX_OPS_KF_DISPATCH | SCX_OPS_KF_ENQUEUE,
> 
> Therefore, kfuncs (scx_bpf_dsq_move_*) in scx_kfunc_ids_dispatch can be
> called in the dispatch context.

I see, scx_dsq_move_*() are in both groups, so it should be fine. I'm not
fully sure the groupings are the actually implemented filtering are in sync.
They are intended to be but the grouping didn't really matter in the
previous implementation. So, they need to be carefully audited.

> > Have you tested that the before and after behaviors match?
> 
> I tested the programs in tools/testing/selftests/sched_ext and
> tools/sched_ext and all worked fine.
> 
> If there are other cases that are not covered, we may need to add new
> test cases.

Right, the coverage there isn't perfect. Testing all conditions would be too
much but it'd be nice to have a test case which at least confirms that all
allowed cases verify successfully.

Thanks.
Alexei Starovoitov Feb. 28, 2025, 2:34 a.m. UTC | #4
On Thu, Feb 27, 2025 at 1:32 PM Tejun Heo <tj@kernel.org> wrote:
>
> Hello,
>
> On Thu, Feb 27, 2025 at 09:23:20PM +0000, Juntong Deng wrote:
> > > > + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS &&
> > > > +     prog->aux->st_ops != &bpf_sched_ext_ops)
> > > > +         return 0;
> > >
> > > Why can't other struct_ops progs call scx_kfunc_ids_unlocked kfuncs?
> > >
> >
> > Return 0 means allowed. So kfuncs in scx_kfunc_ids_unlocked can be
> > called by other struct_ops programs.
>
> Hmm... would that mean a non-sched_ext bpf prog would be able to call e.g.
> scx_bpf_dsq_insert()?

Not as far as I can tell.
scx_kfunc_ids_unlocked[] doesn't include scx_bpf_dsq_insert.
It's part of scx_kfunc_ids_enqueue_dispatch[].

So this bit in patch 3 enables it:
+       if ((flags & SCX_OPS_KF_ENQUEUE) &&
+           btf_id_set8_contains(&scx_kfunc_ids_enqueue_dispatch, kfunc_id))

and in patch 2:
+       [SCX_OP_IDX(enqueue)]                   = SCX_OPS_KF_ENQUEUE,

So scx_bpf_dsq_insert() kfunc can only be called out
of enqueue() sched-ext hook.

So the restriction is still the same. afaict.
Alexei Starovoitov Feb. 28, 2025, 2:38 a.m. UTC | #5
On Thu, Feb 27, 2025 at 1:23 PM Juntong Deng <juntong.deng@outlook.com> wrote:
>
> >> +static int scx_kfunc_ids_ops_context_sensitive_filter(const struct bpf_prog *prog, u32 kfunc_id)
> >> +{
> >> +    u32 moff, flags;
> >> +
> >> +    if (!btf_id_set8_contains(&scx_kfunc_ids_ops_context_sensitive, kfunc_id))
> >> +            return 0;
> >> +
> >> +    if (prog->type == BPF_PROG_TYPE_SYSCALL &&
> >> +        btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id))
> >> +            return 0;
> >
> > Not from this change but these can probably be allowed from TRACING too.
> >
>
> Not sure if it is safe to make these kfuncs available in TRACING.
> If Alexei sees this email, could you please leave a comment?

Hold on, you want to enable all of scx_kfunc_ids_unlocked[] set
to all of TRACING ? What is the use case ?
Maybe it's safe, but without in-depth analysis we shouldn't.
Currently sched-ext allows scx_kfunc_set_any[] for tracing.
I would stick to that in this patch set.
Juntong Deng Feb. 28, 2025, 6:42 p.m. UTC | #6
On 2025/2/27 21:32, Tejun Heo wrote:
> Hello,
> 
> On Thu, Feb 27, 2025 at 09:23:20PM +0000, Juntong Deng wrote:
>>>> +	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS &&
>>>> +	    prog->aux->st_ops != &bpf_sched_ext_ops)
>>>> +		return 0;
>>>
>>> Why can't other struct_ops progs call scx_kfunc_ids_unlocked kfuncs?
>>>
>>
>> Return 0 means allowed. So kfuncs in scx_kfunc_ids_unlocked can be
>> called by other struct_ops programs.
> 
> Hmm... would that mean a non-sched_ext bpf prog would be able to call e.g.
> scx_bpf_dsq_insert()?
> 

For other struct_ops programs, yes, in the current logic,
when prog->aux->st_ops != &bpf_sched_ext_ops, all calls are allowed.

This may seem a bit weird, but the reason I did it is that in other
struct_ops programs, the meaning of member_off changes, so the logic
that follows makes no sense at all.

Of course, we can change this, and ideally there would be some groupings
(kfunc id set) that declare which kfunc can be called by other
struct_ops programs and which cannot.

>>>> +	/* prog->type == BPF_PROG_TYPE_STRUCT_OPS && prog->aux->st_ops == &bpf_sched_ext_ops*/
>>>> +
>>>> +	moff = prog->aux->attach_st_ops_member_off;
>>>> +	flags = scx_ops_context_flags[SCX_MOFF_IDX(moff)];
>>>> +
>>>> +	if ((flags & SCX_OPS_KF_UNLOCKED) &&
>>>> +	    btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id))
>>>> +		return 0;
>>>
>>> Wouldn't this disallow e.g. ops.dispatch() from calling scx_dsq_move()?
>>>
>>
>> No, because
>>
>>>> [SCX_OP_IDX(dispatch)] = SCX_OPS_KF_DISPATCH | SCX_OPS_KF_ENQUEUE,
>>
>> Therefore, kfuncs (scx_bpf_dsq_move_*) in scx_kfunc_ids_dispatch can be
>> called in the dispatch context.
> 
> I see, scx_dsq_move_*() are in both groups, so it should be fine. I'm not
> fully sure the groupings are the actually implemented filtering are in sync.
> They are intended to be but the grouping didn't really matter in the
> previous implementation. So, they need to be carefully audited.
> 

After you audit the current groupings of scx kfuncs, please tell me how
you would like to change the current groupings.

>>> Have you tested that the before and after behaviors match?
>>
>> I tested the programs in tools/testing/selftests/sched_ext and
>> tools/sched_ext and all worked fine.
>>
>> If there are other cases that are not covered, we may need to add new
>> test cases.
> 
> Right, the coverage there isn't perfect. Testing all conditions would be too
> much but it'd be nice to have a test case which at least confirms that all
> allowed cases verify successfully.
> 

Yes, we can add a simple test case for each operation that is not
SCX_OPS_KF_ANY.

> Thanks.
>
Tejun Heo Feb. 28, 2025, 9:14 p.m. UTC | #7
Hello,

On Thu, Feb 27, 2025 at 06:38:32PM -0800, Alexei Starovoitov wrote:
...
> > > Not from this change but these can probably be allowed from TRACING too.
> > >
> >
> > Not sure if it is safe to make these kfuncs available in TRACING.
> > If Alexei sees this email, could you please leave a comment?
> 
> Hold on, you want to enable all of scx_kfunc_ids_unlocked[] set
> to all of TRACING ? What is the use case ?

I thought it may be useful to be able to iterate DSQs and trigger
dispatching from there but

> Maybe it's safe, but without in-depth analysis we shouldn't.
> Currently sched-ext allows scx_kfunc_set_any[] for tracing.
> I would stick to that in this patch set.

I haven't thought through about safety at all and was mostly naively
thinking that if _any is safe _unlocked should too. Right now, unlocked has
two groups of kfuncs - the ones that should be able to sleep and the ones
that can be called from within scx_dsq_iter iterations. The former is
excluded from TRACING through KF_SLEEPABLE, I think. I don't know whether
dsq iteration is allowed in TRACING.

Anyways, not a real issue either way.

Thanks.
Tejun Heo Feb. 28, 2025, 9:20 p.m. UTC | #8
Hello,

On Fri, Feb 28, 2025 at 06:42:11PM +0000, Juntong Deng wrote:
> > > Return 0 means allowed. So kfuncs in scx_kfunc_ids_unlocked can be
> > > called by other struct_ops programs.
> > 
> > Hmm... would that mean a non-sched_ext bpf prog would be able to call e.g.
> > scx_bpf_dsq_insert()?
> 
> For other struct_ops programs, yes, in the current logic,
> when prog->aux->st_ops != &bpf_sched_ext_ops, all calls are allowed.
> 
> This may seem a bit weird, but the reason I did it is that in other
> struct_ops programs, the meaning of member_off changes, so the logic
> that follows makes no sense at all.
> 
> Of course, we can change this, and ideally there would be some groupings
> (kfunc id set) that declare which kfunc can be called by other
> struct_ops programs and which cannot.

Other than any and unlocked, I don't think other bpf struct ops should be
able to call SCX kfuncs. They all assume rq lock to be held which wouldn't
be true for other struct_ops after all.

...
> > I see, scx_dsq_move_*() are in both groups, so it should be fine. I'm not
> > fully sure the groupings are the actually implemented filtering are in sync.
> > They are intended to be but the grouping didn't really matter in the
> > previous implementation. So, they need to be carefully audited.
> 
> After you audit the current groupings of scx kfuncs, please tell me how
> you would like to change the current groupings.

Yeah, I'll go over them but after all, we need to ensure that the behavior
currently implemented by scx_kf_allowed*() matches what the new code does,
so I'd appreciate if you can go over with that in mind too. This is kinda
confusing so we can definitely use more eyes.

> > Right, the coverage there isn't perfect. Testing all conditions would be too
> > much but it'd be nice to have a test case which at least confirms that all
> > allowed cases verify successfully.
> 
> Yes, we can add a simple test case for each operation that is not
> SCX_OPS_KF_ANY.

That'd be great. Thanks.
Tejun Heo Feb. 28, 2025, 9:31 p.m. UTC | #9
Hello,

On Thu, Feb 27, 2025 at 06:34:37PM -0800, Alexei Starovoitov wrote:
> > Hmm... would that mean a non-sched_ext bpf prog would be able to call e.g.
> > scx_bpf_dsq_insert()?
> 
> Not as far as I can tell.
> scx_kfunc_ids_unlocked[] doesn't include scx_bpf_dsq_insert.
> It's part of scx_kfunc_ids_enqueue_dispatch[].
> 
> So this bit in patch 3 enables it:
> +       if ((flags & SCX_OPS_KF_ENQUEUE) &&
> +           btf_id_set8_contains(&scx_kfunc_ids_enqueue_dispatch, kfunc_id))
> 
> and in patch 2:
> +       [SCX_OP_IDX(enqueue)]                   = SCX_OPS_KF_ENQUEUE,
> 
> So scx_bpf_dsq_insert() kfunc can only be called out
> of enqueue() sched-ext hook.
> 
> So the restriction is still the same. afaict.

Hmm... maybe I'm missing something:

  static int scx_kfunc_ids_ops_context_sensitive_filter(const struct bpf_prog *prog, u32 kfunc_id)
  {
         u32 moff, flags;

         // allow non-context sensitive kfuncs
         if (!btf_id_set8_contains(&scx_kfunc_ids_ops_context_sensitive, kfunc_id))
                 return 0;

         // allow unlocked to be called form all SYSCALL progs
         if (prog->type == BPF_PROG_TYPE_SYSCALL &&
             btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id))
                 return 0;

         // *** HERE, allow if the prog is not SCX ***
         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS &&
             prog->aux->st_ops != &bpf_sched_ext_ops)
                 return 0;

         /* prog->type == BPF_PROG_TYPE_STRUCT_OPS && prog->aux->st_ops == &bpf_sched_ext_ops*/

         // other context sensitive allow's

So, I think it'd say yes if a non-SCX BPF prog tries to call a context
sensitive SCX kfunc.

Thanks.
Alexei Starovoitov Feb. 28, 2025, 11:06 p.m. UTC | #10
On Fri, Feb 28, 2025 at 1:31 PM Tejun Heo <tj@kernel.org> wrote:
>
> Hello,
>
> On Thu, Feb 27, 2025 at 06:34:37PM -0800, Alexei Starovoitov wrote:
> > > Hmm... would that mean a non-sched_ext bpf prog would be able to call e.g.
> > > scx_bpf_dsq_insert()?
> >
> > Not as far as I can tell.
> > scx_kfunc_ids_unlocked[] doesn't include scx_bpf_dsq_insert.
> > It's part of scx_kfunc_ids_enqueue_dispatch[].
> >
> > So this bit in patch 3 enables it:
> > +       if ((flags & SCX_OPS_KF_ENQUEUE) &&
> > +           btf_id_set8_contains(&scx_kfunc_ids_enqueue_dispatch, kfunc_id))
> >
> > and in patch 2:
> > +       [SCX_OP_IDX(enqueue)]                   = SCX_OPS_KF_ENQUEUE,
> >
> > So scx_bpf_dsq_insert() kfunc can only be called out
> > of enqueue() sched-ext hook.
> >
> > So the restriction is still the same. afaict.
>
> Hmm... maybe I'm missing something:
>
>   static int scx_kfunc_ids_ops_context_sensitive_filter(const struct bpf_prog *prog, u32 kfunc_id)
>   {
>          u32 moff, flags;
>
>          // allow non-context sensitive kfuncs
>          if (!btf_id_set8_contains(&scx_kfunc_ids_ops_context_sensitive, kfunc_id))
>                  return 0;
>
>          // allow unlocked to be called form all SYSCALL progs
>          if (prog->type == BPF_PROG_TYPE_SYSCALL &&
>              btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id))
>                  return 0;
>
>          // *** HERE, allow if the prog is not SCX ***
>          if (prog->type == BPF_PROG_TYPE_STRUCT_OPS &&
>              prog->aux->st_ops != &bpf_sched_ext_ops)

Ohh. You're right. My bad.
Here it probably needs
&& !!btf_id_set8_contains(&scx_kfunc_ids_ops_context_sensitive

since later scx_kfunc_set_ops_context_sensitive
is registered for all of struct-ops.
diff mbox series

Patch

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 15fac968629e..c337f6206ae5 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -10,6 +10,7 @@ 
 #include "ext_idle.h"
 
 #define SCX_OP_IDX(op)		(offsetof(struct sched_ext_ops, op) / sizeof(void (*)(void)))
+#define SCX_MOFF_IDX(moff)	(moff / sizeof(void (*)(void)))
 
 enum scx_consts {
 	SCX_DSP_DFL_MAX_BATCH		= 32,
@@ -6300,11 +6301,6 @@  BTF_ID_FLAGS(func, scx_bpf_dispatch, KF_RCU)
 BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime, KF_RCU)
 BTF_KFUNCS_END(scx_kfunc_ids_enqueue_dispatch)
 
-static const struct btf_kfunc_id_set scx_kfunc_set_enqueue_dispatch = {
-	.owner			= THIS_MODULE,
-	.set			= &scx_kfunc_ids_enqueue_dispatch,
-};
-
 static bool scx_dsq_move(struct bpf_iter_scx_dsq_kern *kit,
 			 struct task_struct *p, u64 dsq_id, u64 enq_flags)
 {
@@ -6620,11 +6616,6 @@  BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
 BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
 BTF_KFUNCS_END(scx_kfunc_ids_dispatch)
 
-static const struct btf_kfunc_id_set scx_kfunc_set_dispatch = {
-	.owner			= THIS_MODULE,
-	.set			= &scx_kfunc_ids_dispatch,
-};
-
 __bpf_kfunc_start_defs();
 
 /**
@@ -6687,11 +6678,6 @@  BTF_KFUNCS_START(scx_kfunc_ids_cpu_release)
 BTF_ID_FLAGS(func, scx_bpf_reenqueue_local)
 BTF_KFUNCS_END(scx_kfunc_ids_cpu_release)
 
-static const struct btf_kfunc_id_set scx_kfunc_set_cpu_release = {
-	.owner			= THIS_MODULE,
-	.set			= &scx_kfunc_ids_cpu_release,
-};
-
 __bpf_kfunc_start_defs();
 
 /**
@@ -6724,11 +6710,6 @@  BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
 BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
 BTF_KFUNCS_END(scx_kfunc_ids_unlocked)
 
-static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = {
-	.owner			= THIS_MODULE,
-	.set			= &scx_kfunc_ids_unlocked,
-};
-
 __bpf_kfunc_start_defs();
 
 /**
@@ -7370,6 +7351,85 @@  __bpf_kfunc void scx_bpf_events(struct scx_event_stats *events,
 
 __bpf_kfunc_end_defs();
 
+BTF_KFUNCS_START(scx_kfunc_ids_ops_context_sensitive)
+/* scx_kfunc_ids_select_cpu */
+BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU)
+/* scx_kfunc_ids_enqueue_dispatch */
+BTF_ID_FLAGS(func, scx_bpf_dsq_insert, KF_RCU)
+BTF_ID_FLAGS(func, scx_bpf_dsq_insert_vtime, KF_RCU)
+BTF_ID_FLAGS(func, scx_bpf_dispatch, KF_RCU)
+BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime, KF_RCU)
+/* scx_kfunc_ids_dispatch */
+BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots)
+BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel)
+BTF_ID_FLAGS(func, scx_bpf_dsq_move_to_local)
+BTF_ID_FLAGS(func, scx_bpf_consume)
+/* scx_kfunc_ids_cpu_release */
+BTF_ID_FLAGS(func, scx_bpf_reenqueue_local)
+/* scx_kfunc_ids_unlocked */
+BTF_ID_FLAGS(func, scx_bpf_create_dsq, KF_SLEEPABLE)
+/* Intersection of scx_kfunc_ids_dispatch and scx_kfunc_ids_unlocked */
+BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice)
+BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime)
+BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU)
+BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU)
+BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice)
+BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime)
+BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
+BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
+BTF_KFUNCS_END(scx_kfunc_ids_ops_context_sensitive)
+
+extern struct btf_id_set8 scx_kfunc_ids_select_cpu;
+
+static int scx_kfunc_ids_ops_context_sensitive_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+	u32 moff, flags;
+
+	if (!btf_id_set8_contains(&scx_kfunc_ids_ops_context_sensitive, kfunc_id))
+		return 0;
+
+	if (prog->type == BPF_PROG_TYPE_SYSCALL &&
+	    btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id))
+		return 0;
+
+	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS &&
+	    prog->aux->st_ops != &bpf_sched_ext_ops)
+		return 0;
+
+	/* prog->type == BPF_PROG_TYPE_STRUCT_OPS && prog->aux->st_ops == &bpf_sched_ext_ops*/
+
+	moff = prog->aux->attach_st_ops_member_off;
+	flags = scx_ops_context_flags[SCX_MOFF_IDX(moff)];
+
+	if ((flags & SCX_OPS_KF_UNLOCKED) &&
+	    btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id))
+		return 0;
+
+	if ((flags & SCX_OPS_KF_CPU_RELEASE) &&
+	    btf_id_set8_contains(&scx_kfunc_ids_cpu_release, kfunc_id))
+		return 0;
+
+	if ((flags & SCX_OPS_KF_DISPATCH) &&
+	    btf_id_set8_contains(&scx_kfunc_ids_dispatch, kfunc_id))
+		return 0;
+
+	if ((flags & SCX_OPS_KF_ENQUEUE) &&
+	    btf_id_set8_contains(&scx_kfunc_ids_enqueue_dispatch, kfunc_id))
+		return 0;
+
+	if ((flags & SCX_OPS_KF_SELECT_CPU) &&
+	    btf_id_set8_contains(&scx_kfunc_ids_select_cpu, kfunc_id))
+		return 0;
+
+	return -EACCES;
+}
+
+static const struct btf_kfunc_id_set scx_kfunc_set_ops_context_sensitive = {
+	.owner			= THIS_MODULE,
+	.set			= &scx_kfunc_ids_ops_context_sensitive,
+	.filter			= scx_kfunc_ids_ops_context_sensitive_filter,
+};
+
 BTF_KFUNCS_START(scx_kfunc_ids_any)
 BTF_ID_FLAGS(func, scx_bpf_kick_cpu)
 BTF_ID_FLAGS(func, scx_bpf_dsq_nr_queued)
@@ -7425,15 +7485,9 @@  static int __init scx_init(void)
 	 * check using scx_kf_allowed().
 	 */
 	if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
-					     &scx_kfunc_set_enqueue_dispatch)) ||
-	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
-					     &scx_kfunc_set_dispatch)) ||
-	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
-					     &scx_kfunc_set_cpu_release)) ||
-	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
-					     &scx_kfunc_set_unlocked)) ||
+					     &scx_kfunc_set_ops_context_sensitive)) ||
 	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL,
-					     &scx_kfunc_set_unlocked)) ||
+					     &scx_kfunc_set_ops_context_sensitive)) ||
 	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
 					     &scx_kfunc_set_any)) ||
 	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING,
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index dc40e0baf77c..efb6077810d8 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -1125,17 +1125,11 @@  BTF_KFUNCS_START(scx_kfunc_ids_select_cpu)
 BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU)
 BTF_KFUNCS_END(scx_kfunc_ids_select_cpu)
 
-static const struct btf_kfunc_id_set scx_kfunc_set_select_cpu = {
-	.owner			= THIS_MODULE,
-	.set			= &scx_kfunc_ids_select_cpu,
-};
-
 int scx_idle_init(void)
 {
 	int ret;
 
-	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_select_cpu) ||
-	      register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_idle) ||
+	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_idle) ||
 	      register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &scx_kfunc_set_idle) ||
 	      register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &scx_kfunc_set_idle);