diff mbox series

[mptcp-next,v5,4/9] mptcp: pm: define struct mptcp_pm_ops

Message ID 5f83856741646b16dde9a741fcfbfa55753eee39.1740019794.git.tanggeliang@kylinos.cn (mailing list archive)
State Superseded, archived
Delegated to: Matthieu Baerts
Headers show
Series BPF path manager, part 4 | expand

Checks

Context Check Description
matttbe/KVM_Validation__normal success Success! ✅
matttbe/KVM_Validation__debug success Success! ✅
matttbe/KVM_Validation__btf-normal__only_bpftest_all_ success Success! ✅
matttbe/KVM_Validation__btf-debug__only_bpftest_all_ warning Unstable: 1 failed test(s): bpftest_test_progs-no_alu32_mptcp
matttbe/checkpatch success total: 0 errors, 0 warnings, 0 checks, 120 lines checked
matttbe/shellcheck success MPTCP selftests files have not been modified
matttbe/build success Build and static analysis OK

Commit Message

Geliang Tang Feb. 20, 2025, 2:57 a.m. UTC
From: Geliang Tang <tanggeliang@kylinos.cn>

In order to allow users to develop their own BPF-based path manager,
this patch defines a struct ops "mptcp_pm_ops" for a userspace path
manager, which contains a set of interfaces.

Add a set of functions to register, unregister, find and validate a
given struct ops.

Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 include/net/mptcp.h  | 29 ++++++++++++++++++++++
 net/mptcp/pm.c       | 59 ++++++++++++++++++++++++++++++++++++++++++++
 net/mptcp/protocol.h |  5 ++++
 3 files changed, 93 insertions(+)

Comments

Matthieu Baerts Feb. 21, 2025, 5:23 p.m. UTC | #1
Hi Geliang,

On 20/02/2025 03:57, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
> 
> In order to allow users to develop their own BPF-based path manager,
> this patch defines a struct ops "mptcp_pm_ops" for a userspace path
> manager, which contains a set of interfaces.
> 
> Add a set of functions to register, unregister, find and validate a
> given struct ops.
> 
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
>  include/net/mptcp.h  | 29 ++++++++++++++++++++++
>  net/mptcp/pm.c       | 59 ++++++++++++++++++++++++++++++++++++++++++++
>  net/mptcp/protocol.h |  5 ++++
>  3 files changed, 93 insertions(+)
> 
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index a41d6c74760f..f51e75d3882d 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -134,6 +134,35 @@ struct mptcp_pm_param {
>  	struct mptcp_addr_info		addr;
>  };
>  
> +struct mptcp_pm_ops {
> +	int (*created)(struct mptcp_sock *msk);
> +	int (*established)(struct mptcp_sock *msk);
> +	int (*closed)(struct mptcp_sock *msk);
> +	int (*address_announced)(struct mptcp_sock *msk,
> +				 struct mptcp_pm_param *param);
> +	int (*address_removed)(struct mptcp_sock *msk,
> +			       struct mptcp_pm_param *param);
> +	int (*subflow_established)(struct mptcp_sock *msk,
> +				   struct mptcp_pm_param *param);
> +	int (*subflow_closed)(struct mptcp_sock *msk,
> +			      struct mptcp_pm_param *param);
> +	int (*get_local_id)(struct mptcp_sock *msk,
> +			    struct mptcp_pm_param *param);
> +	bool (*get_priority)(struct mptcp_sock *msk,
> +			     struct mptcp_pm_param *param);
> +	int (*set_priority)(struct mptcp_sock *msk,
> +			    struct mptcp_pm_param *param);
> +	int (*listener_created)(struct mptcp_sock *msk);
> +	int (*listener_closed)(struct mptcp_sock *msk);
> +
> +	u8			type;

I guess the type matches net.mptcp.pm_type sysctl knob, right?

I wonder if we should not deprecate this sysctl, and use a string like
with the scheduler. So instead, we could have:

  char			name[MPTCP_PM_NAME_MAX];

And on ctrl.c, we could map pm_type for the moment with a custom
proc_handler (and even remove this sysctl knob in a few releases):

 - 0  → in-kernel
 - 1  → userspace
 - >1 → bpf

WDYT? Would it not be clearer for devs and users?

Note that if for the implementation, if it is easier to keep this "type"
entry for the moment for the sysctl stuff, I'm fine with that. But if we
don't need it, let's not introduce it.

> +	struct module		*owner;
> +	struct list_head	list;
> +
> +	void (*init)(struct mptcp_sock *msk);
> +	void (*release)(struct mptcp_sock *msk);

To answer my question from the v3 review: the init/release is done for
each MPTCP connection handled by this PM.

> +} ____cacheline_aligned_in_smp;
> +
>  #ifdef CONFIG_MPTCP
>  void mptcp_init(void);
>  
> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> index e3457f34621c..f56b2d1e3409 100644
> --- a/net/mptcp/pm.c
> +++ b/net/mptcp/pm.c
> @@ -6,12 +6,17 @@
>  #define pr_fmt(fmt) "MPTCP: " fmt
>  
>  #include <linux/kernel.h>
> +#include <linux/rculist.h>
> +#include <linux/spinlock.h>
>  #include <net/mptcp.h>
>  #include "protocol.h"
>  
>  #include "mib.h"
>  #include "mptcp_pm_gen.h"
>  
> +static DEFINE_SPINLOCK(mptcp_pm_list_lock);
> +static LIST_HEAD(mptcp_pm_list);
> +
>  /* path manager command handlers */
>  
>  int mptcp_pm_announce_addr(struct mptcp_sock *msk,
> @@ -661,3 +666,57 @@ void __init mptcp_pm_init(void)
>  {
>  	mptcp_pm_nl_init();
>  }
> +
> +/* Must be called with rcu read lock held */
> +struct mptcp_pm_ops *mptcp_pm_find(enum mptcp_pm_type type)
> +{
> +	struct mptcp_pm_ops *pm;
> +
> +	list_for_each_entry_rcu(pm, &mptcp_pm_list, list) {
> +		if (pm->type == type)
> +			return pm;
> +	}
> +
> +	return NULL;
> +}
> +
> +int mptcp_pm_validate(struct mptcp_pm_ops *pm)
> +{
> +	if (!pm->created && !pm->established && !pm->closed &&
> +	    !pm->address_announced && !pm->address_removed &&
> +	    !pm->subflow_established && !pm->subflow_closed &&
> +	    !pm->get_local_id && !pm->get_priority && !pm->set_priority &&
> +	    !pm->listener_created && !pm->listener_closed) {

I'm not sure to understand the purpose of this validation.

Why not forcing some or all of them?

(...)

Cheers,
Matt
Geliang Tang Feb. 24, 2025, 6:54 a.m. UTC | #2
Hi Matt,

Thanks for the review.

On Fri, 2025-02-21 at 18:23 +0100, Matthieu Baerts wrote:
> Hi Geliang,
> 
> On 20/02/2025 03:57, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> > 
> > In order to allow users to develop their own BPF-based path
> > manager,
> > this patch defines a struct ops "mptcp_pm_ops" for a userspace path
> > manager, which contains a set of interfaces.
> > 
> > Add a set of functions to register, unregister, find and validate a
> > given struct ops.
> > 
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> >  include/net/mptcp.h  | 29 ++++++++++++++++++++++
> >  net/mptcp/pm.c       | 59
> > ++++++++++++++++++++++++++++++++++++++++++++
> >  net/mptcp/protocol.h |  5 ++++
> >  3 files changed, 93 insertions(+)
> > 
> > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > index a41d6c74760f..f51e75d3882d 100644
> > --- a/include/net/mptcp.h
> > +++ b/include/net/mptcp.h
> > @@ -134,6 +134,35 @@ struct mptcp_pm_param {
> >  	struct mptcp_addr_info		addr;
> >  };
> >  
> > +struct mptcp_pm_ops {
> > +	int (*created)(struct mptcp_sock *msk);
> > +	int (*established)(struct mptcp_sock *msk);
> > +	int (*closed)(struct mptcp_sock *msk);
> > +	int (*address_announced)(struct mptcp_sock *msk,
> > +				 struct mptcp_pm_param *param);
> > +	int (*address_removed)(struct mptcp_sock *msk,
> > +			       struct mptcp_pm_param *param);
> > +	int (*subflow_established)(struct mptcp_sock *msk,
> > +				   struct mptcp_pm_param *param);
> > +	int (*subflow_closed)(struct mptcp_sock *msk,
> > +			      struct mptcp_pm_param *param);
> > +	int (*get_local_id)(struct mptcp_sock *msk,
> > +			    struct mptcp_pm_param *param);
> > +	bool (*get_priority)(struct mptcp_sock *msk,
> > +			     struct mptcp_pm_param *param);
> > +	int (*set_priority)(struct mptcp_sock *msk,
> > +			    struct mptcp_pm_param *param);
> > +	int (*listener_created)(struct mptcp_sock *msk);
> > +	int (*listener_closed)(struct mptcp_sock *msk);
> > +
> > +	u8			type;
> 
> I guess the type matches net.mptcp.pm_type sysctl knob, right?
> 
> I wonder if we should not deprecate this sysctl, and use a string
> like
> with the scheduler. So instead, we could have:
> 
>   char			name[MPTCP_PM_NAME_MAX];
> 
> And on ctrl.c, we could map pm_type for the moment with a custom
> proc_handler (and even remove this sysctl knob in a few releases):
> 
>  - 0  → in-kernel
>  - 1  → userspace
>  - >1 → bpf

Do we need to restrict BPF path manager to only extend the userspace
type path manager? That is, for all BPF path managers,
mptcp_pm_is_userspace() returns true. If this is the case, there is no
need to add "type" field in struct mptcp_pm_ops.

Is it necessary to allow BPF to create kernel type path managers, that
is, path managers for which mptcp_pm_is_userspace() returns false?
Since we registered mptcp_netlink_pm from this version, can the in-
kernel type path manager also be extended by BPF? If this is the case,
we need to add "type" field in struct mptcp_pm_ops to distinguish
whether it is a userspace type path manager.

> 
> WDYT? Would it not be clearer for devs and users?
> 
> Note that if for the implementation, if it is easier to keep this
> "type"
> entry for the moment for the sysctl stuff, I'm fine with that. But if
> we
> don't need it, let's not introduce it.
> 
> > +	struct module		*owner;
> > +	struct list_head	list;
> > +
> > +	void (*init)(struct mptcp_sock *msk);
> > +	void (*release)(struct mptcp_sock *msk);
> 
> To answer my question from the v3 review: the init/release is done
> for
> each MPTCP connection handled by this PM.

In-kernel pm is for each MPTCP connection handled by this PM while
userspace pm is only for this one being initialized. How should I deal
with this? Please give me more detailed suggestions.

Thanks,
-Geliang

> 
> > +} ____cacheline_aligned_in_smp;
> > +
> >  #ifdef CONFIG_MPTCP
> >  void mptcp_init(void);
> >  
> > diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> > index e3457f34621c..f56b2d1e3409 100644
> > --- a/net/mptcp/pm.c
> > +++ b/net/mptcp/pm.c
> > @@ -6,12 +6,17 @@
> >  #define pr_fmt(fmt) "MPTCP: " fmt
> >  
> >  #include <linux/kernel.h>
> > +#include <linux/rculist.h>
> > +#include <linux/spinlock.h>
> >  #include <net/mptcp.h>
> >  #include "protocol.h"
> >  
> >  #include "mib.h"
> >  #include "mptcp_pm_gen.h"
> >  
> > +static DEFINE_SPINLOCK(mptcp_pm_list_lock);
> > +static LIST_HEAD(mptcp_pm_list);
> > +
> >  /* path manager command handlers */
> >  
> >  int mptcp_pm_announce_addr(struct mptcp_sock *msk,
> > @@ -661,3 +666,57 @@ void __init mptcp_pm_init(void)
> >  {
> >  	mptcp_pm_nl_init();
> >  }
> > +
> > +/* Must be called with rcu read lock held */
> > +struct mptcp_pm_ops *mptcp_pm_find(enum mptcp_pm_type type)
> > +{
> > +	struct mptcp_pm_ops *pm;
> > +
> > +	list_for_each_entry_rcu(pm, &mptcp_pm_list, list) {
> > +		if (pm->type == type)
> > +			return pm;
> > +	}
> > +
> > +	return NULL;
> > +}
> > +
> > +int mptcp_pm_validate(struct mptcp_pm_ops *pm)
> > +{
> > +	if (!pm->created && !pm->established && !pm->closed &&
> > +	    !pm->address_announced && !pm->address_removed &&
> > +	    !pm->subflow_established && !pm->subflow_closed &&
> > +	    !pm->get_local_id && !pm->get_priority && !pm-
> > >set_priority &&
> > +	    !pm->listener_created && !pm->listener_closed) {
> 
> I'm not sure to understand the purpose of this validation.
> 
> Why not forcing some or all of them?
> 
> (...)
> 
> Cheers,
> Matt
Matthieu Baerts Feb. 24, 2025, 8:26 a.m. UTC | #3
Hi Geliang,

On 24/02/2025 07:54, Geliang Tang wrote:
> Hi Matt,
> 
> Thanks for the review.
> 
> On Fri, 2025-02-21 at 18:23 +0100, Matthieu Baerts wrote:
>> Hi Geliang,
>>
>> On 20/02/2025 03:57, Geliang Tang wrote:
>>> From: Geliang Tang <tanggeliang@kylinos.cn>
>>>
>>> In order to allow users to develop their own BPF-based path
>>> manager,
>>> this patch defines a struct ops "mptcp_pm_ops" for a userspace path
>>> manager, which contains a set of interfaces.
>>>
>>> Add a set of functions to register, unregister, find and validate a
>>> given struct ops.
>>>
>>> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
>>> ---
>>>  include/net/mptcp.h  | 29 ++++++++++++++++++++++
>>>  net/mptcp/pm.c       | 59
>>> ++++++++++++++++++++++++++++++++++++++++++++
>>>  net/mptcp/protocol.h |  5 ++++
>>>  3 files changed, 93 insertions(+)
>>>
>>> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
>>> index a41d6c74760f..f51e75d3882d 100644
>>> --- a/include/net/mptcp.h
>>> +++ b/include/net/mptcp.h
>>> @@ -134,6 +134,35 @@ struct mptcp_pm_param {
>>>  	struct mptcp_addr_info		addr;
>>>  };
>>>  
>>> +struct mptcp_pm_ops {
>>> +	int (*created)(struct mptcp_sock *msk);
>>> +	int (*established)(struct mptcp_sock *msk);
>>> +	int (*closed)(struct mptcp_sock *msk);
>>> +	int (*address_announced)(struct mptcp_sock *msk,
>>> +				 struct mptcp_pm_param *param);
>>> +	int (*address_removed)(struct mptcp_sock *msk,
>>> +			       struct mptcp_pm_param *param);
>>> +	int (*subflow_established)(struct mptcp_sock *msk,
>>> +				   struct mptcp_pm_param *param);
>>> +	int (*subflow_closed)(struct mptcp_sock *msk,
>>> +			      struct mptcp_pm_param *param);
>>> +	int (*get_local_id)(struct mptcp_sock *msk,
>>> +			    struct mptcp_pm_param *param);
>>> +	bool (*get_priority)(struct mptcp_sock *msk,
>>> +			     struct mptcp_pm_param *param);
>>> +	int (*set_priority)(struct mptcp_sock *msk,
>>> +			    struct mptcp_pm_param *param);
>>> +	int (*listener_created)(struct mptcp_sock *msk);
>>> +	int (*listener_closed)(struct mptcp_sock *msk);
>>> +
>>> +	u8			type;
>>
>> I guess the type matches net.mptcp.pm_type sysctl knob, right?
>>
>> I wonder if we should not deprecate this sysctl, and use a string
>> like
>> with the scheduler. So instead, we could have:
>>
>>   char			name[MPTCP_PM_NAME_MAX];
>>
>> And on ctrl.c, we could map pm_type for the moment with a custom
>> proc_handler (and even remove this sysctl knob in a few releases):
>>
>>  - 0  → in-kernel
>>  - 1  → userspace
>>  - >1 → bpf
> 
> Do we need to restrict BPF path manager to only extend the userspace
> type path manager? That is, for all BPF path managers,
> mptcp_pm_is_userspace() returns true. If this is the case, there is no
> need to add "type" field in struct mptcp_pm_ops.

To me, mptcp_pm_is_userspace() should no longer be needed, because
pm->ops will be used instead. So the "type" field should no longer be
needed. Only the name should be stored, and the net.mptcp.pm_type sysctl
knob should depend only on the name. (I guess a lock will be needed to
handle the case where both the "pm_type" and "path_manager" will be set
at the same time.)

> Is it necessary to allow BPF to create kernel type path managers, that
> is, path managers for which mptcp_pm_is_userspace() returns false?
> Since we registered mptcp_netlink_pm from this version, can the in-
> kernel type path manager also be extended by BPF? If this is the case,
> we need to add "type" field in struct mptcp_pm_ops to distinguish
> whether it is a userspace type path manager.

I guess a BPF path-manager will have its own mptcp_pm_ops structure, and
it will not extend anything. Of course, it is possible to add kfunc to
share some code. No?

>> WDYT? Would it not be clearer for devs and users?
>>
>> Note that if for the implementation, if it is easier to keep this
>> "type"
>> entry for the moment for the sysctl stuff, I'm fine with that. But if
>> we
>> don't need it, let's not introduce it.
>>
>>> +	struct module		*owner;
>>> +	struct list_head	list;
>>> +
>>> +	void (*init)(struct mptcp_sock *msk);
>>> +	void (*release)(struct mptcp_sock *msk);
>>
>> To answer my question from the v3 review: the init/release is done
>> for
>> each MPTCP connection handled by this PM.
> 
> In-kernel pm is for each MPTCP connection handled by this PM while
> userspace pm is only for this one being initialized. How should I deal
> with this? Please give me more detailed suggestions.

Sorry, I'm not sure to understand your question. If one PM doesn't need
to init and/or release anything, these ops can be set to NULL, no?

Cheers,
Matt
Geliang Tang Feb. 24, 2025, 9:11 a.m. UTC | #4
Hi Matt,

On Mon, 2025-02-24 at 09:26 +0100, Matthieu Baerts wrote:
> Hi Geliang,
> 
> On 24/02/2025 07:54, Geliang Tang wrote:
> > Hi Matt,
> > 
> > Thanks for the review.
> > 
> > On Fri, 2025-02-21 at 18:23 +0100, Matthieu Baerts wrote:
> > > Hi Geliang,
> > > 
> > > On 20/02/2025 03:57, Geliang Tang wrote:
> > > > From: Geliang Tang <tanggeliang@kylinos.cn>
> > > > 
> > > > In order to allow users to develop their own BPF-based path
> > > > manager,
> > > > this patch defines a struct ops "mptcp_pm_ops" for a userspace
> > > > path
> > > > manager, which contains a set of interfaces.
> > > > 
> > > > Add a set of functions to register, unregister, find and
> > > > validate a
> > > > given struct ops.
> > > > 
> > > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > > > ---
> > > >  include/net/mptcp.h  | 29 ++++++++++++++++++++++
> > > >  net/mptcp/pm.c       | 59
> > > > ++++++++++++++++++++++++++++++++++++++++++++
> > > >  net/mptcp/protocol.h |  5 ++++
> > > >  3 files changed, 93 insertions(+)
> > > > 
> > > > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > > > index a41d6c74760f..f51e75d3882d 100644
> > > > --- a/include/net/mptcp.h
> > > > +++ b/include/net/mptcp.h
> > > > @@ -134,6 +134,35 @@ struct mptcp_pm_param {
> > > >  	struct mptcp_addr_info		addr;
> > > >  };
> > > >  
> > > > +struct mptcp_pm_ops {
> > > > +	int (*created)(struct mptcp_sock *msk);
> > > > +	int (*established)(struct mptcp_sock *msk);
> > > > +	int (*closed)(struct mptcp_sock *msk);
> > > > +	int (*address_announced)(struct mptcp_sock *msk,
> > > > +				 struct mptcp_pm_param
> > > > *param);
> > > > +	int (*address_removed)(struct mptcp_sock *msk,
> > > > +			       struct mptcp_pm_param *param);
> > > > +	int (*subflow_established)(struct mptcp_sock *msk,
> > > > +				   struct mptcp_pm_param
> > > > *param);
> > > > +	int (*subflow_closed)(struct mptcp_sock *msk,
> > > > +			      struct mptcp_pm_param *param);
> > > > +	int (*get_local_id)(struct mptcp_sock *msk,
> > > > +			    struct mptcp_pm_param *param);
> > > > +	bool (*get_priority)(struct mptcp_sock *msk,
> > > > +			     struct mptcp_pm_param *param);
> > > > +	int (*set_priority)(struct mptcp_sock *msk,
> > > > +			    struct mptcp_pm_param *param);
> > > > +	int (*listener_created)(struct mptcp_sock *msk);
> > > > +	int (*listener_closed)(struct mptcp_sock *msk);
> > > > +
> > > > +	u8			type;
> > > 
> > > I guess the type matches net.mptcp.pm_type sysctl knob, right?
> > > 
> > > I wonder if we should not deprecate this sysctl, and use a string
> > > like
> > > with the scheduler. So instead, we could have:
> > > 
> > >   char			name[MPTCP_PM_NAME_MAX];
> > > 
> > > And on ctrl.c, we could map pm_type for the moment with a custom
> > > proc_handler (and even remove this sysctl knob in a few
> > > releases):
> > > 
> > >  - 0  → in-kernel
> > >  - 1  → userspace
> > >  - >1 → bpf
> > 
> > Do we need to restrict BPF path manager to only extend the
> > userspace
> > type path manager? That is, for all BPF path managers,
> > mptcp_pm_is_userspace() returns true. If this is the case, there is
> > no
> > need to add "type" field in struct mptcp_pm_ops.
> 
> To me, mptcp_pm_is_userspace() should no longer be needed, because
> pm->ops will be used instead. So the "type" field should no longer be

Currently, the path manager must rely on mptcp_pm_is_userspace().
mptcp_pm_is_userspace() is not needed for get_local_id() and
get_priority(), but it's useful for address_announced(),
address_removed(), subflow_created() and subflow_closed(). 

Take address_announced() as an example:

For userspace pm, address_announced() is invoked in
mptcp_pm_nl_announce_doit(), which is the handler of the command
MPTCP_PM_CMD_ANNOUNCE [1].

While for in-kernel pm, address_announced() is invoked in
mptcp_pm_nl_add_addr_doit(), which is the handler of the command
MPTCP_PM_CMD_ADD_ADDR [2].

When we implement a BPF path manager, we must tell the PM core which
path to use to call its address_announced() interface. So I added
"type" in struct mptcp_pm_ops, and each path manager needs to set it to
tell the PM core whether it is a userspace type path manager, and check
this type in mptcp_pm_is_userspace().

[1]
https://patchwork.kernel.org/project/mptcp/patch/6d39ed9364b41f84b273598f198fa1aa226a2cbc.1740047738.git.tanggeliang@kylinos.cn/
[2]
https://patchwork.kernel.org/project/mptcp/patch/5881dc057b4927f30070193bde21703f0079e233.1740047738.git.tanggeliang@kylinos.cn/

> needed. Only the name should be stored, and the net.mptcp.pm_type
> sysctl
> knob should depend only on the name. (I guess a lock will be needed
> to
> handle the case where both the "pm_type" and "path_manager" will be
> set
> at the same time.)
> 
> > Is it necessary to allow BPF to create kernel type path managers,
> > that
> > is, path managers for which mptcp_pm_is_userspace() returns false?
> > Since we registered mptcp_netlink_pm from this version, can the in-
> > kernel type path manager also be extended by BPF? If this is the
> > case,
> > we need to add "type" field in struct mptcp_pm_ops to distinguish
> > whether it is a userspace type path manager.
> 
> I guess a BPF path-manager will have its own mptcp_pm_ops structure,
> and
> it will not extend anything. Of course, it is possible to add kfunc
> to
> share some code. No?
> 
> > > WDYT? Would it not be clearer for devs and users?
> > > 
> > > Note that if for the implementation, if it is easier to keep this
> > > "type"
> > > entry for the moment for the sysctl stuff, I'm fine with that.
> > > But if
> > > we
> > > don't need it, let's not introduce it.
> > > 
> > > > +	struct module		*owner;
> > > > +	struct list_head	list;
> > > > +
> > > > +	void (*init)(struct mptcp_sock *msk);
> > > > +	void (*release)(struct mptcp_sock *msk);
> > > 
> > > To answer my question from the v3 review: the init/release is
> > > done
> > > for
> > > each MPTCP connection handled by this PM.
> > 
> > In-kernel pm is for each MPTCP connection handled by this PM while
> > userspace pm is only for this one being initialized. How should I
> > deal
> > with this? Please give me more detailed suggestions.
> 
> Sorry, I'm not sure to understand your question. If one PM doesn't
> need
> to init and/or release anything, these ops can be set to NULL, no?
> 
> Cheers,
> Matt
Matthieu Baerts Feb. 24, 2025, 10:24 a.m. UTC | #5
Hi Geliang,

On 24/02/2025 10:11, Geliang Tang wrote:
> Hi Matt,
> 
> On Mon, 2025-02-24 at 09:26 +0100, Matthieu Baerts wrote:
>> Hi Geliang,
>>
>> On 24/02/2025 07:54, Geliang Tang wrote:
>>> Hi Matt,
>>>
>>> Thanks for the review.
>>>
>>> On Fri, 2025-02-21 at 18:23 +0100, Matthieu Baerts wrote:
>>>> Hi Geliang,
>>>>
>>>> On 20/02/2025 03:57, Geliang Tang wrote:
>>>>> From: Geliang Tang <tanggeliang@kylinos.cn>
>>>>>
>>>>> In order to allow users to develop their own BPF-based path
>>>>> manager,
>>>>> this patch defines a struct ops "mptcp_pm_ops" for a userspace
>>>>> path
>>>>> manager, which contains a set of interfaces.
>>>>>
>>>>> Add a set of functions to register, unregister, find and
>>>>> validate a
>>>>> given struct ops.
>>>>>
>>>>> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
>>>>> ---
>>>>>  include/net/mptcp.h  | 29 ++++++++++++++++++++++
>>>>>  net/mptcp/pm.c       | 59
>>>>> ++++++++++++++++++++++++++++++++++++++++++++
>>>>>  net/mptcp/protocol.h |  5 ++++
>>>>>  3 files changed, 93 insertions(+)
>>>>>
>>>>> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
>>>>> index a41d6c74760f..f51e75d3882d 100644
>>>>> --- a/include/net/mptcp.h
>>>>> +++ b/include/net/mptcp.h
>>>>> @@ -134,6 +134,35 @@ struct mptcp_pm_param {
>>>>>  	struct mptcp_addr_info		addr;
>>>>>  };
>>>>>  
>>>>> +struct mptcp_pm_ops {
>>>>> +	int (*created)(struct mptcp_sock *msk);
>>>>> +	int (*established)(struct mptcp_sock *msk);
>>>>> +	int (*closed)(struct mptcp_sock *msk);
>>>>> +	int (*address_announced)(struct mptcp_sock *msk,
>>>>> +				 struct mptcp_pm_param
>>>>> *param);
>>>>> +	int (*address_removed)(struct mptcp_sock *msk,
>>>>> +			       struct mptcp_pm_param *param);
>>>>> +	int (*subflow_established)(struct mptcp_sock *msk,
>>>>> +				   struct mptcp_pm_param
>>>>> *param);
>>>>> +	int (*subflow_closed)(struct mptcp_sock *msk,
>>>>> +			      struct mptcp_pm_param *param);
>>>>> +	int (*get_local_id)(struct mptcp_sock *msk,
>>>>> +			    struct mptcp_pm_param *param);
>>>>> +	bool (*get_priority)(struct mptcp_sock *msk,
>>>>> +			     struct mptcp_pm_param *param);
>>>>> +	int (*set_priority)(struct mptcp_sock *msk,
>>>>> +			    struct mptcp_pm_param *param);
>>>>> +	int (*listener_created)(struct mptcp_sock *msk);
>>>>> +	int (*listener_closed)(struct mptcp_sock *msk);
>>>>> +
>>>>> +	u8			type;
>>>>
>>>> I guess the type matches net.mptcp.pm_type sysctl knob, right?
>>>>
>>>> I wonder if we should not deprecate this sysctl, and use a string
>>>> like
>>>> with the scheduler. So instead, we could have:
>>>>
>>>>   char			name[MPTCP_PM_NAME_MAX];
>>>>
>>>> And on ctrl.c, we could map pm_type for the moment with a custom
>>>> proc_handler (and even remove this sysctl knob in a few
>>>> releases):
>>>>
>>>>  - 0  → in-kernel
>>>>  - 1  → userspace
>>>>  - >1 → bpf
>>>
>>> Do we need to restrict BPF path manager to only extend the
>>> userspace
>>> type path manager? That is, for all BPF path managers,
>>> mptcp_pm_is_userspace() returns true. If this is the case, there is
>>> no
>>> need to add "type" field in struct mptcp_pm_ops.
>>
>> To me, mptcp_pm_is_userspace() should no longer be needed, because
>> pm->ops will be used instead. So the "type" field should no longer be
> 
> Currently, the path manager must rely on mptcp_pm_is_userspace().
> mptcp_pm_is_userspace() is not needed for get_local_id() and
> get_priority(), but it's useful for address_announced(),
> address_removed(), subflow_created() and subflow_closed(). 

Yes but once all ops are implemented, it should be fine, no?

> Take address_announced() as an example:
> 
> For userspace pm, address_announced() is invoked in
> mptcp_pm_nl_announce_doit(), which is the handler of the command
> MPTCP_PM_CMD_ANNOUNCE [1].
> 
> While for in-kernel pm, address_announced() is invoked in
> mptcp_pm_nl_add_addr_doit(), which is the handler of the command
> MPTCP_PM_CMD_ADD_ADDR [2].
> 
> When we implement a BPF path manager, we must tell the PM core which
> path to use to call its address_announced() interface. So I added
> "type" in struct mptcp_pm_ops, and each path manager needs to set it to
> tell the PM core whether it is a userspace type path manager, and check
> this type in mptcp_pm_is_userspace().

I think there is a bit of confusion here: address_announced() will be
called when the other peer has announced an address. In other words,
when an ADD_ADDR is received, pm->ops->address_announced() will be
called from mptcp_pm_add_addr_received(), replacing:

  if (mptcp_pm_is_userspace(msk)) {
     (...)
  } else (...) {
     (...)
  }

address_announced() could then either:
- return a boolean: handled or drop → the PM will be in charged of
sending the ADD_ADDR echo.

- Or, if it is easier for the BPF PM, an enum could be returned, and in
pm.c, the kernel will either:
  - send add addr echo
  - drop
  - do nothing (the in-kernel PM would schedule the PM worker)

WDYT?

Similar for address_removed(), subflow_created() and subflow_closed().
If I'm not mistaken, the userspace PM does nothing here, because that's
the userspace daemon to react with the events that will be sent.

Cheers,
Matt
diff mbox series

Patch

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index a41d6c74760f..f51e75d3882d 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -134,6 +134,35 @@  struct mptcp_pm_param {
 	struct mptcp_addr_info		addr;
 };
 
+struct mptcp_pm_ops {
+	int (*created)(struct mptcp_sock *msk);
+	int (*established)(struct mptcp_sock *msk);
+	int (*closed)(struct mptcp_sock *msk);
+	int (*address_announced)(struct mptcp_sock *msk,
+				 struct mptcp_pm_param *param);
+	int (*address_removed)(struct mptcp_sock *msk,
+			       struct mptcp_pm_param *param);
+	int (*subflow_established)(struct mptcp_sock *msk,
+				   struct mptcp_pm_param *param);
+	int (*subflow_closed)(struct mptcp_sock *msk,
+			      struct mptcp_pm_param *param);
+	int (*get_local_id)(struct mptcp_sock *msk,
+			    struct mptcp_pm_param *param);
+	bool (*get_priority)(struct mptcp_sock *msk,
+			     struct mptcp_pm_param *param);
+	int (*set_priority)(struct mptcp_sock *msk,
+			    struct mptcp_pm_param *param);
+	int (*listener_created)(struct mptcp_sock *msk);
+	int (*listener_closed)(struct mptcp_sock *msk);
+
+	u8			type;
+	struct module		*owner;
+	struct list_head	list;
+
+	void (*init)(struct mptcp_sock *msk);
+	void (*release)(struct mptcp_sock *msk);
+} ____cacheline_aligned_in_smp;
+
 #ifdef CONFIG_MPTCP
 void mptcp_init(void);
 
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index e3457f34621c..f56b2d1e3409 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -6,12 +6,17 @@ 
 #define pr_fmt(fmt) "MPTCP: " fmt
 
 #include <linux/kernel.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
 #include <net/mptcp.h>
 #include "protocol.h"
 
 #include "mib.h"
 #include "mptcp_pm_gen.h"
 
+static DEFINE_SPINLOCK(mptcp_pm_list_lock);
+static LIST_HEAD(mptcp_pm_list);
+
 /* path manager command handlers */
 
 int mptcp_pm_announce_addr(struct mptcp_sock *msk,
@@ -661,3 +666,57 @@  void __init mptcp_pm_init(void)
 {
 	mptcp_pm_nl_init();
 }
+
+/* Must be called with rcu read lock held */
+struct mptcp_pm_ops *mptcp_pm_find(enum mptcp_pm_type type)
+{
+	struct mptcp_pm_ops *pm;
+
+	list_for_each_entry_rcu(pm, &mptcp_pm_list, list) {
+		if (pm->type == type)
+			return pm;
+	}
+
+	return NULL;
+}
+
+int mptcp_pm_validate(struct mptcp_pm_ops *pm)
+{
+	if (!pm->created && !pm->established && !pm->closed &&
+	    !pm->address_announced && !pm->address_removed &&
+	    !pm->subflow_established && !pm->subflow_closed &&
+	    !pm->get_local_id && !pm->get_priority && !pm->set_priority &&
+	    !pm->listener_created && !pm->listener_closed) {
+		pr_err("%u does not implement required ops\n", pm->type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int mptcp_pm_register(struct mptcp_pm_ops *pm)
+{
+	int ret;
+
+	ret = mptcp_pm_validate(pm);
+	if (ret)
+		return ret;
+
+	spin_lock(&mptcp_pm_list_lock);
+	if (mptcp_pm_find(pm->type)) {
+		spin_unlock(&mptcp_pm_list_lock);
+		return -EEXIST;
+	}
+	list_add_tail_rcu(&pm->list, &mptcp_pm_list);
+	spin_unlock(&mptcp_pm_list_lock);
+
+	pr_debug("userspace_pm type %u registered\n", pm->type);
+	return 0;
+}
+
+void mptcp_pm_unregister(struct mptcp_pm_ops *pm)
+{
+	spin_lock(&mptcp_pm_list_lock);
+	list_del_rcu(&pm->list);
+	spin_unlock(&mptcp_pm_list_lock);
+}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7987beaa730e..f3e04927e214 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -1039,6 +1039,11 @@  int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
 void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk,
 				struct mptcp_pm_addr_entry *entry);
 
+struct mptcp_pm_ops *mptcp_pm_find(enum mptcp_pm_type type);
+int mptcp_pm_validate(struct mptcp_pm_ops *pm);
+int mptcp_pm_register(struct mptcp_pm_ops *pm);
+void mptcp_pm_unregister(struct mptcp_pm_ops *pm);
+
 void mptcp_free_local_addr_list(struct mptcp_sock *msk);
 
 void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,