Message ID | 5f83856741646b16dde9a741fcfbfa55753eee39.1740019794.git.tanggeliang@kylinos.cn (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Matthieu Baerts |
Headers | show |
Series | BPF path manager, part 4 | expand |
Context | Check | Description |
---|---|---|
matttbe/KVM_Validation__normal | success | Success! ✅ |
matttbe/KVM_Validation__debug | success | Success! ✅ |
matttbe/KVM_Validation__btf-normal__only_bpftest_all_ | success | Success! ✅ |
matttbe/KVM_Validation__btf-debug__only_bpftest_all_ | warning | Unstable: 1 failed test(s): bpftest_test_progs-no_alu32_mptcp |
matttbe/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 120 lines checked |
matttbe/shellcheck | success | MPTCP selftests files have not been modified |
matttbe/build | success | Build and static analysis OK |
Hi Geliang, On 20/02/2025 03:57, Geliang Tang wrote: > From: Geliang Tang <tanggeliang@kylinos.cn> > > In order to allow users to develop their own BPF-based path manager, > this patch defines a struct ops "mptcp_pm_ops" for a userspace path > manager, which contains a set of interfaces. > > Add a set of functions to register, unregister, find and validate a > given struct ops. > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > --- > include/net/mptcp.h | 29 ++++++++++++++++++++++ > net/mptcp/pm.c | 59 ++++++++++++++++++++++++++++++++++++++++++++ > net/mptcp/protocol.h | 5 ++++ > 3 files changed, 93 insertions(+) > > diff --git a/include/net/mptcp.h b/include/net/mptcp.h > index a41d6c74760f..f51e75d3882d 100644 > --- a/include/net/mptcp.h > +++ b/include/net/mptcp.h > @@ -134,6 +134,35 @@ struct mptcp_pm_param { > struct mptcp_addr_info addr; > }; > > +struct mptcp_pm_ops { > + int (*created)(struct mptcp_sock *msk); > + int (*established)(struct mptcp_sock *msk); > + int (*closed)(struct mptcp_sock *msk); > + int (*address_announced)(struct mptcp_sock *msk, > + struct mptcp_pm_param *param); > + int (*address_removed)(struct mptcp_sock *msk, > + struct mptcp_pm_param *param); > + int (*subflow_established)(struct mptcp_sock *msk, > + struct mptcp_pm_param *param); > + int (*subflow_closed)(struct mptcp_sock *msk, > + struct mptcp_pm_param *param); > + int (*get_local_id)(struct mptcp_sock *msk, > + struct mptcp_pm_param *param); > + bool (*get_priority)(struct mptcp_sock *msk, > + struct mptcp_pm_param *param); > + int (*set_priority)(struct mptcp_sock *msk, > + struct mptcp_pm_param *param); > + int (*listener_created)(struct mptcp_sock *msk); > + int (*listener_closed)(struct mptcp_sock *msk); > + > + u8 type; I guess the type matches net.mptcp.pm_type sysctl knob, right? I wonder if we should not deprecate this sysctl, and use a string like with the scheduler. So instead, we could have: char name[MPTCP_PM_NAME_MAX]; And on ctrl.c, we could map pm_type for the moment with a custom proc_handler (and even remove this sysctl knob in a few releases): - 0 → in-kernel - 1 → userspace - >1 → bpf WDYT? Would it not be clearer for devs and users? Note that if for the implementation, if it is easier to keep this "type" entry for the moment for the sysctl stuff, I'm fine with that. But if we don't need it, let's not introduce it. > + struct module *owner; > + struct list_head list; > + > + void (*init)(struct mptcp_sock *msk); > + void (*release)(struct mptcp_sock *msk); To answer my question from the v3 review: the init/release is done for each MPTCP connection handled by this PM. > +} ____cacheline_aligned_in_smp; > + > #ifdef CONFIG_MPTCP > void mptcp_init(void); > > diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c > index e3457f34621c..f56b2d1e3409 100644 > --- a/net/mptcp/pm.c > +++ b/net/mptcp/pm.c > @@ -6,12 +6,17 @@ > #define pr_fmt(fmt) "MPTCP: " fmt > > #include <linux/kernel.h> > +#include <linux/rculist.h> > +#include <linux/spinlock.h> > #include <net/mptcp.h> > #include "protocol.h" > > #include "mib.h" > #include "mptcp_pm_gen.h" > > +static DEFINE_SPINLOCK(mptcp_pm_list_lock); > +static LIST_HEAD(mptcp_pm_list); > + > /* path manager command handlers */ > > int mptcp_pm_announce_addr(struct mptcp_sock *msk, > @@ -661,3 +666,57 @@ void __init mptcp_pm_init(void) > { > mptcp_pm_nl_init(); > } > + > +/* Must be called with rcu read lock held */ > +struct mptcp_pm_ops *mptcp_pm_find(enum mptcp_pm_type type) > +{ > + struct mptcp_pm_ops *pm; > + > + list_for_each_entry_rcu(pm, &mptcp_pm_list, list) { > + if (pm->type == type) > + return pm; > + } > + > + return NULL; > +} > + > +int mptcp_pm_validate(struct mptcp_pm_ops *pm) > +{ > + if (!pm->created && !pm->established && !pm->closed && > + !pm->address_announced && !pm->address_removed && > + !pm->subflow_established && !pm->subflow_closed && > + !pm->get_local_id && !pm->get_priority && !pm->set_priority && > + !pm->listener_created && !pm->listener_closed) { I'm not sure to understand the purpose of this validation. Why not forcing some or all of them? (...) Cheers, Matt
Hi Matt, Thanks for the review. On Fri, 2025-02-21 at 18:23 +0100, Matthieu Baerts wrote: > Hi Geliang, > > On 20/02/2025 03:57, Geliang Tang wrote: > > From: Geliang Tang <tanggeliang@kylinos.cn> > > > > In order to allow users to develop their own BPF-based path > > manager, > > this patch defines a struct ops "mptcp_pm_ops" for a userspace path > > manager, which contains a set of interfaces. > > > > Add a set of functions to register, unregister, find and validate a > > given struct ops. > > > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > > --- > > include/net/mptcp.h | 29 ++++++++++++++++++++++ > > net/mptcp/pm.c | 59 > > ++++++++++++++++++++++++++++++++++++++++++++ > > net/mptcp/protocol.h | 5 ++++ > > 3 files changed, 93 insertions(+) > > > > diff --git a/include/net/mptcp.h b/include/net/mptcp.h > > index a41d6c74760f..f51e75d3882d 100644 > > --- a/include/net/mptcp.h > > +++ b/include/net/mptcp.h > > @@ -134,6 +134,35 @@ struct mptcp_pm_param { > > struct mptcp_addr_info addr; > > }; > > > > +struct mptcp_pm_ops { > > + int (*created)(struct mptcp_sock *msk); > > + int (*established)(struct mptcp_sock *msk); > > + int (*closed)(struct mptcp_sock *msk); > > + int (*address_announced)(struct mptcp_sock *msk, > > + struct mptcp_pm_param *param); > > + int (*address_removed)(struct mptcp_sock *msk, > > + struct mptcp_pm_param *param); > > + int (*subflow_established)(struct mptcp_sock *msk, > > + struct mptcp_pm_param *param); > > + int (*subflow_closed)(struct mptcp_sock *msk, > > + struct mptcp_pm_param *param); > > + int (*get_local_id)(struct mptcp_sock *msk, > > + struct mptcp_pm_param *param); > > + bool (*get_priority)(struct mptcp_sock *msk, > > + struct mptcp_pm_param *param); > > + int (*set_priority)(struct mptcp_sock *msk, > > + struct mptcp_pm_param *param); > > + int (*listener_created)(struct mptcp_sock *msk); > > + int (*listener_closed)(struct mptcp_sock *msk); > > + > > + u8 type; > > I guess the type matches net.mptcp.pm_type sysctl knob, right? > > I wonder if we should not deprecate this sysctl, and use a string > like > with the scheduler. So instead, we could have: > > char name[MPTCP_PM_NAME_MAX]; > > And on ctrl.c, we could map pm_type for the moment with a custom > proc_handler (and even remove this sysctl knob in a few releases): > > - 0 → in-kernel > - 1 → userspace > - >1 → bpf Do we need to restrict BPF path manager to only extend the userspace type path manager? That is, for all BPF path managers, mptcp_pm_is_userspace() returns true. If this is the case, there is no need to add "type" field in struct mptcp_pm_ops. Is it necessary to allow BPF to create kernel type path managers, that is, path managers for which mptcp_pm_is_userspace() returns false? Since we registered mptcp_netlink_pm from this version, can the in- kernel type path manager also be extended by BPF? If this is the case, we need to add "type" field in struct mptcp_pm_ops to distinguish whether it is a userspace type path manager. > > WDYT? Would it not be clearer for devs and users? > > Note that if for the implementation, if it is easier to keep this > "type" > entry for the moment for the sysctl stuff, I'm fine with that. But if > we > don't need it, let's not introduce it. > > > + struct module *owner; > > + struct list_head list; > > + > > + void (*init)(struct mptcp_sock *msk); > > + void (*release)(struct mptcp_sock *msk); > > To answer my question from the v3 review: the init/release is done > for > each MPTCP connection handled by this PM. In-kernel pm is for each MPTCP connection handled by this PM while userspace pm is only for this one being initialized. How should I deal with this? Please give me more detailed suggestions. Thanks, -Geliang > > > +} ____cacheline_aligned_in_smp; > > + > > #ifdef CONFIG_MPTCP > > void mptcp_init(void); > > > > diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c > > index e3457f34621c..f56b2d1e3409 100644 > > --- a/net/mptcp/pm.c > > +++ b/net/mptcp/pm.c > > @@ -6,12 +6,17 @@ > > #define pr_fmt(fmt) "MPTCP: " fmt > > > > #include <linux/kernel.h> > > +#include <linux/rculist.h> > > +#include <linux/spinlock.h> > > #include <net/mptcp.h> > > #include "protocol.h" > > > > #include "mib.h" > > #include "mptcp_pm_gen.h" > > > > +static DEFINE_SPINLOCK(mptcp_pm_list_lock); > > +static LIST_HEAD(mptcp_pm_list); > > + > > /* path manager command handlers */ > > > > int mptcp_pm_announce_addr(struct mptcp_sock *msk, > > @@ -661,3 +666,57 @@ void __init mptcp_pm_init(void) > > { > > mptcp_pm_nl_init(); > > } > > + > > +/* Must be called with rcu read lock held */ > > +struct mptcp_pm_ops *mptcp_pm_find(enum mptcp_pm_type type) > > +{ > > + struct mptcp_pm_ops *pm; > > + > > + list_for_each_entry_rcu(pm, &mptcp_pm_list, list) { > > + if (pm->type == type) > > + return pm; > > + } > > + > > + return NULL; > > +} > > + > > +int mptcp_pm_validate(struct mptcp_pm_ops *pm) > > +{ > > + if (!pm->created && !pm->established && !pm->closed && > > + !pm->address_announced && !pm->address_removed && > > + !pm->subflow_established && !pm->subflow_closed && > > + !pm->get_local_id && !pm->get_priority && !pm- > > >set_priority && > > + !pm->listener_created && !pm->listener_closed) { > > I'm not sure to understand the purpose of this validation. > > Why not forcing some or all of them? > > (...) > > Cheers, > Matt
Hi Geliang, On 24/02/2025 07:54, Geliang Tang wrote: > Hi Matt, > > Thanks for the review. > > On Fri, 2025-02-21 at 18:23 +0100, Matthieu Baerts wrote: >> Hi Geliang, >> >> On 20/02/2025 03:57, Geliang Tang wrote: >>> From: Geliang Tang <tanggeliang@kylinos.cn> >>> >>> In order to allow users to develop their own BPF-based path >>> manager, >>> this patch defines a struct ops "mptcp_pm_ops" for a userspace path >>> manager, which contains a set of interfaces. >>> >>> Add a set of functions to register, unregister, find and validate a >>> given struct ops. >>> >>> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> >>> --- >>> include/net/mptcp.h | 29 ++++++++++++++++++++++ >>> net/mptcp/pm.c | 59 >>> ++++++++++++++++++++++++++++++++++++++++++++ >>> net/mptcp/protocol.h | 5 ++++ >>> 3 files changed, 93 insertions(+) >>> >>> diff --git a/include/net/mptcp.h b/include/net/mptcp.h >>> index a41d6c74760f..f51e75d3882d 100644 >>> --- a/include/net/mptcp.h >>> +++ b/include/net/mptcp.h >>> @@ -134,6 +134,35 @@ struct mptcp_pm_param { >>> struct mptcp_addr_info addr; >>> }; >>> >>> +struct mptcp_pm_ops { >>> + int (*created)(struct mptcp_sock *msk); >>> + int (*established)(struct mptcp_sock *msk); >>> + int (*closed)(struct mptcp_sock *msk); >>> + int (*address_announced)(struct mptcp_sock *msk, >>> + struct mptcp_pm_param *param); >>> + int (*address_removed)(struct mptcp_sock *msk, >>> + struct mptcp_pm_param *param); >>> + int (*subflow_established)(struct mptcp_sock *msk, >>> + struct mptcp_pm_param *param); >>> + int (*subflow_closed)(struct mptcp_sock *msk, >>> + struct mptcp_pm_param *param); >>> + int (*get_local_id)(struct mptcp_sock *msk, >>> + struct mptcp_pm_param *param); >>> + bool (*get_priority)(struct mptcp_sock *msk, >>> + struct mptcp_pm_param *param); >>> + int (*set_priority)(struct mptcp_sock *msk, >>> + struct mptcp_pm_param *param); >>> + int (*listener_created)(struct mptcp_sock *msk); >>> + int (*listener_closed)(struct mptcp_sock *msk); >>> + >>> + u8 type; >> >> I guess the type matches net.mptcp.pm_type sysctl knob, right? >> >> I wonder if we should not deprecate this sysctl, and use a string >> like >> with the scheduler. So instead, we could have: >> >> char name[MPTCP_PM_NAME_MAX]; >> >> And on ctrl.c, we could map pm_type for the moment with a custom >> proc_handler (and even remove this sysctl knob in a few releases): >> >> - 0 → in-kernel >> - 1 → userspace >> - >1 → bpf > > Do we need to restrict BPF path manager to only extend the userspace > type path manager? That is, for all BPF path managers, > mptcp_pm_is_userspace() returns true. If this is the case, there is no > need to add "type" field in struct mptcp_pm_ops. To me, mptcp_pm_is_userspace() should no longer be needed, because pm->ops will be used instead. So the "type" field should no longer be needed. Only the name should be stored, and the net.mptcp.pm_type sysctl knob should depend only on the name. (I guess a lock will be needed to handle the case where both the "pm_type" and "path_manager" will be set at the same time.) > Is it necessary to allow BPF to create kernel type path managers, that > is, path managers for which mptcp_pm_is_userspace() returns false? > Since we registered mptcp_netlink_pm from this version, can the in- > kernel type path manager also be extended by BPF? If this is the case, > we need to add "type" field in struct mptcp_pm_ops to distinguish > whether it is a userspace type path manager. I guess a BPF path-manager will have its own mptcp_pm_ops structure, and it will not extend anything. Of course, it is possible to add kfunc to share some code. No? >> WDYT? Would it not be clearer for devs and users? >> >> Note that if for the implementation, if it is easier to keep this >> "type" >> entry for the moment for the sysctl stuff, I'm fine with that. But if >> we >> don't need it, let's not introduce it. >> >>> + struct module *owner; >>> + struct list_head list; >>> + >>> + void (*init)(struct mptcp_sock *msk); >>> + void (*release)(struct mptcp_sock *msk); >> >> To answer my question from the v3 review: the init/release is done >> for >> each MPTCP connection handled by this PM. > > In-kernel pm is for each MPTCP connection handled by this PM while > userspace pm is only for this one being initialized. How should I deal > with this? Please give me more detailed suggestions. Sorry, I'm not sure to understand your question. If one PM doesn't need to init and/or release anything, these ops can be set to NULL, no? Cheers, Matt
Hi Matt, On Mon, 2025-02-24 at 09:26 +0100, Matthieu Baerts wrote: > Hi Geliang, > > On 24/02/2025 07:54, Geliang Tang wrote: > > Hi Matt, > > > > Thanks for the review. > > > > On Fri, 2025-02-21 at 18:23 +0100, Matthieu Baerts wrote: > > > Hi Geliang, > > > > > > On 20/02/2025 03:57, Geliang Tang wrote: > > > > From: Geliang Tang <tanggeliang@kylinos.cn> > > > > > > > > In order to allow users to develop their own BPF-based path > > > > manager, > > > > this patch defines a struct ops "mptcp_pm_ops" for a userspace > > > > path > > > > manager, which contains a set of interfaces. > > > > > > > > Add a set of functions to register, unregister, find and > > > > validate a > > > > given struct ops. > > > > > > > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > > > > --- > > > > include/net/mptcp.h | 29 ++++++++++++++++++++++ > > > > net/mptcp/pm.c | 59 > > > > ++++++++++++++++++++++++++++++++++++++++++++ > > > > net/mptcp/protocol.h | 5 ++++ > > > > 3 files changed, 93 insertions(+) > > > > > > > > diff --git a/include/net/mptcp.h b/include/net/mptcp.h > > > > index a41d6c74760f..f51e75d3882d 100644 > > > > --- a/include/net/mptcp.h > > > > +++ b/include/net/mptcp.h > > > > @@ -134,6 +134,35 @@ struct mptcp_pm_param { > > > > struct mptcp_addr_info addr; > > > > }; > > > > > > > > +struct mptcp_pm_ops { > > > > + int (*created)(struct mptcp_sock *msk); > > > > + int (*established)(struct mptcp_sock *msk); > > > > + int (*closed)(struct mptcp_sock *msk); > > > > + int (*address_announced)(struct mptcp_sock *msk, > > > > + struct mptcp_pm_param > > > > *param); > > > > + int (*address_removed)(struct mptcp_sock *msk, > > > > + struct mptcp_pm_param *param); > > > > + int (*subflow_established)(struct mptcp_sock *msk, > > > > + struct mptcp_pm_param > > > > *param); > > > > + int (*subflow_closed)(struct mptcp_sock *msk, > > > > + struct mptcp_pm_param *param); > > > > + int (*get_local_id)(struct mptcp_sock *msk, > > > > + struct mptcp_pm_param *param); > > > > + bool (*get_priority)(struct mptcp_sock *msk, > > > > + struct mptcp_pm_param *param); > > > > + int (*set_priority)(struct mptcp_sock *msk, > > > > + struct mptcp_pm_param *param); > > > > + int (*listener_created)(struct mptcp_sock *msk); > > > > + int (*listener_closed)(struct mptcp_sock *msk); > > > > + > > > > + u8 type; > > > > > > I guess the type matches net.mptcp.pm_type sysctl knob, right? > > > > > > I wonder if we should not deprecate this sysctl, and use a string > > > like > > > with the scheduler. So instead, we could have: > > > > > > char name[MPTCP_PM_NAME_MAX]; > > > > > > And on ctrl.c, we could map pm_type for the moment with a custom > > > proc_handler (and even remove this sysctl knob in a few > > > releases): > > > > > > - 0 → in-kernel > > > - 1 → userspace > > > - >1 → bpf > > > > Do we need to restrict BPF path manager to only extend the > > userspace > > type path manager? That is, for all BPF path managers, > > mptcp_pm_is_userspace() returns true. If this is the case, there is > > no > > need to add "type" field in struct mptcp_pm_ops. > > To me, mptcp_pm_is_userspace() should no longer be needed, because > pm->ops will be used instead. So the "type" field should no longer be Currently, the path manager must rely on mptcp_pm_is_userspace(). mptcp_pm_is_userspace() is not needed for get_local_id() and get_priority(), but it's useful for address_announced(), address_removed(), subflow_created() and subflow_closed(). Take address_announced() as an example: For userspace pm, address_announced() is invoked in mptcp_pm_nl_announce_doit(), which is the handler of the command MPTCP_PM_CMD_ANNOUNCE [1]. While for in-kernel pm, address_announced() is invoked in mptcp_pm_nl_add_addr_doit(), which is the handler of the command MPTCP_PM_CMD_ADD_ADDR [2]. When we implement a BPF path manager, we must tell the PM core which path to use to call its address_announced() interface. So I added "type" in struct mptcp_pm_ops, and each path manager needs to set it to tell the PM core whether it is a userspace type path manager, and check this type in mptcp_pm_is_userspace(). [1] https://patchwork.kernel.org/project/mptcp/patch/6d39ed9364b41f84b273598f198fa1aa226a2cbc.1740047738.git.tanggeliang@kylinos.cn/ [2] https://patchwork.kernel.org/project/mptcp/patch/5881dc057b4927f30070193bde21703f0079e233.1740047738.git.tanggeliang@kylinos.cn/ > needed. Only the name should be stored, and the net.mptcp.pm_type > sysctl > knob should depend only on the name. (I guess a lock will be needed > to > handle the case where both the "pm_type" and "path_manager" will be > set > at the same time.) > > > Is it necessary to allow BPF to create kernel type path managers, > > that > > is, path managers for which mptcp_pm_is_userspace() returns false? > > Since we registered mptcp_netlink_pm from this version, can the in- > > kernel type path manager also be extended by BPF? If this is the > > case, > > we need to add "type" field in struct mptcp_pm_ops to distinguish > > whether it is a userspace type path manager. > > I guess a BPF path-manager will have its own mptcp_pm_ops structure, > and > it will not extend anything. Of course, it is possible to add kfunc > to > share some code. No? > > > > WDYT? Would it not be clearer for devs and users? > > > > > > Note that if for the implementation, if it is easier to keep this > > > "type" > > > entry for the moment for the sysctl stuff, I'm fine with that. > > > But if > > > we > > > don't need it, let's not introduce it. > > > > > > > + struct module *owner; > > > > + struct list_head list; > > > > + > > > > + void (*init)(struct mptcp_sock *msk); > > > > + void (*release)(struct mptcp_sock *msk); > > > > > > To answer my question from the v3 review: the init/release is > > > done > > > for > > > each MPTCP connection handled by this PM. > > > > In-kernel pm is for each MPTCP connection handled by this PM while > > userspace pm is only for this one being initialized. How should I > > deal > > with this? Please give me more detailed suggestions. > > Sorry, I'm not sure to understand your question. If one PM doesn't > need > to init and/or release anything, these ops can be set to NULL, no? > > Cheers, > Matt
Hi Geliang, On 24/02/2025 10:11, Geliang Tang wrote: > Hi Matt, > > On Mon, 2025-02-24 at 09:26 +0100, Matthieu Baerts wrote: >> Hi Geliang, >> >> On 24/02/2025 07:54, Geliang Tang wrote: >>> Hi Matt, >>> >>> Thanks for the review. >>> >>> On Fri, 2025-02-21 at 18:23 +0100, Matthieu Baerts wrote: >>>> Hi Geliang, >>>> >>>> On 20/02/2025 03:57, Geliang Tang wrote: >>>>> From: Geliang Tang <tanggeliang@kylinos.cn> >>>>> >>>>> In order to allow users to develop their own BPF-based path >>>>> manager, >>>>> this patch defines a struct ops "mptcp_pm_ops" for a userspace >>>>> path >>>>> manager, which contains a set of interfaces. >>>>> >>>>> Add a set of functions to register, unregister, find and >>>>> validate a >>>>> given struct ops. >>>>> >>>>> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> >>>>> --- >>>>> include/net/mptcp.h | 29 ++++++++++++++++++++++ >>>>> net/mptcp/pm.c | 59 >>>>> ++++++++++++++++++++++++++++++++++++++++++++ >>>>> net/mptcp/protocol.h | 5 ++++ >>>>> 3 files changed, 93 insertions(+) >>>>> >>>>> diff --git a/include/net/mptcp.h b/include/net/mptcp.h >>>>> index a41d6c74760f..f51e75d3882d 100644 >>>>> --- a/include/net/mptcp.h >>>>> +++ b/include/net/mptcp.h >>>>> @@ -134,6 +134,35 @@ struct mptcp_pm_param { >>>>> struct mptcp_addr_info addr; >>>>> }; >>>>> >>>>> +struct mptcp_pm_ops { >>>>> + int (*created)(struct mptcp_sock *msk); >>>>> + int (*established)(struct mptcp_sock *msk); >>>>> + int (*closed)(struct mptcp_sock *msk); >>>>> + int (*address_announced)(struct mptcp_sock *msk, >>>>> + struct mptcp_pm_param >>>>> *param); >>>>> + int (*address_removed)(struct mptcp_sock *msk, >>>>> + struct mptcp_pm_param *param); >>>>> + int (*subflow_established)(struct mptcp_sock *msk, >>>>> + struct mptcp_pm_param >>>>> *param); >>>>> + int (*subflow_closed)(struct mptcp_sock *msk, >>>>> + struct mptcp_pm_param *param); >>>>> + int (*get_local_id)(struct mptcp_sock *msk, >>>>> + struct mptcp_pm_param *param); >>>>> + bool (*get_priority)(struct mptcp_sock *msk, >>>>> + struct mptcp_pm_param *param); >>>>> + int (*set_priority)(struct mptcp_sock *msk, >>>>> + struct mptcp_pm_param *param); >>>>> + int (*listener_created)(struct mptcp_sock *msk); >>>>> + int (*listener_closed)(struct mptcp_sock *msk); >>>>> + >>>>> + u8 type; >>>> >>>> I guess the type matches net.mptcp.pm_type sysctl knob, right? >>>> >>>> I wonder if we should not deprecate this sysctl, and use a string >>>> like >>>> with the scheduler. So instead, we could have: >>>> >>>> char name[MPTCP_PM_NAME_MAX]; >>>> >>>> And on ctrl.c, we could map pm_type for the moment with a custom >>>> proc_handler (and even remove this sysctl knob in a few >>>> releases): >>>> >>>> - 0 → in-kernel >>>> - 1 → userspace >>>> - >1 → bpf >>> >>> Do we need to restrict BPF path manager to only extend the >>> userspace >>> type path manager? That is, for all BPF path managers, >>> mptcp_pm_is_userspace() returns true. If this is the case, there is >>> no >>> need to add "type" field in struct mptcp_pm_ops. >> >> To me, mptcp_pm_is_userspace() should no longer be needed, because >> pm->ops will be used instead. So the "type" field should no longer be > > Currently, the path manager must rely on mptcp_pm_is_userspace(). > mptcp_pm_is_userspace() is not needed for get_local_id() and > get_priority(), but it's useful for address_announced(), > address_removed(), subflow_created() and subflow_closed(). Yes but once all ops are implemented, it should be fine, no? > Take address_announced() as an example: > > For userspace pm, address_announced() is invoked in > mptcp_pm_nl_announce_doit(), which is the handler of the command > MPTCP_PM_CMD_ANNOUNCE [1]. > > While for in-kernel pm, address_announced() is invoked in > mptcp_pm_nl_add_addr_doit(), which is the handler of the command > MPTCP_PM_CMD_ADD_ADDR [2]. > > When we implement a BPF path manager, we must tell the PM core which > path to use to call its address_announced() interface. So I added > "type" in struct mptcp_pm_ops, and each path manager needs to set it to > tell the PM core whether it is a userspace type path manager, and check > this type in mptcp_pm_is_userspace(). I think there is a bit of confusion here: address_announced() will be called when the other peer has announced an address. In other words, when an ADD_ADDR is received, pm->ops->address_announced() will be called from mptcp_pm_add_addr_received(), replacing: if (mptcp_pm_is_userspace(msk)) { (...) } else (...) { (...) } address_announced() could then either: - return a boolean: handled or drop → the PM will be in charged of sending the ADD_ADDR echo. - Or, if it is easier for the BPF PM, an enum could be returned, and in pm.c, the kernel will either: - send add addr echo - drop - do nothing (the in-kernel PM would schedule the PM worker) WDYT? Similar for address_removed(), subflow_created() and subflow_closed(). If I'm not mistaken, the userspace PM does nothing here, because that's the userspace daemon to react with the events that will be sent. Cheers, Matt
diff --git a/include/net/mptcp.h b/include/net/mptcp.h index a41d6c74760f..f51e75d3882d 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -134,6 +134,35 @@ struct mptcp_pm_param { struct mptcp_addr_info addr; }; +struct mptcp_pm_ops { + int (*created)(struct mptcp_sock *msk); + int (*established)(struct mptcp_sock *msk); + int (*closed)(struct mptcp_sock *msk); + int (*address_announced)(struct mptcp_sock *msk, + struct mptcp_pm_param *param); + int (*address_removed)(struct mptcp_sock *msk, + struct mptcp_pm_param *param); + int (*subflow_established)(struct mptcp_sock *msk, + struct mptcp_pm_param *param); + int (*subflow_closed)(struct mptcp_sock *msk, + struct mptcp_pm_param *param); + int (*get_local_id)(struct mptcp_sock *msk, + struct mptcp_pm_param *param); + bool (*get_priority)(struct mptcp_sock *msk, + struct mptcp_pm_param *param); + int (*set_priority)(struct mptcp_sock *msk, + struct mptcp_pm_param *param); + int (*listener_created)(struct mptcp_sock *msk); + int (*listener_closed)(struct mptcp_sock *msk); + + u8 type; + struct module *owner; + struct list_head list; + + void (*init)(struct mptcp_sock *msk); + void (*release)(struct mptcp_sock *msk); +} ____cacheline_aligned_in_smp; + #ifdef CONFIG_MPTCP void mptcp_init(void); diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index e3457f34621c..f56b2d1e3409 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -6,12 +6,17 @@ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/kernel.h> +#include <linux/rculist.h> +#include <linux/spinlock.h> #include <net/mptcp.h> #include "protocol.h" #include "mib.h" #include "mptcp_pm_gen.h" +static DEFINE_SPINLOCK(mptcp_pm_list_lock); +static LIST_HEAD(mptcp_pm_list); + /* path manager command handlers */ int mptcp_pm_announce_addr(struct mptcp_sock *msk, @@ -661,3 +666,57 @@ void __init mptcp_pm_init(void) { mptcp_pm_nl_init(); } + +/* Must be called with rcu read lock held */ +struct mptcp_pm_ops *mptcp_pm_find(enum mptcp_pm_type type) +{ + struct mptcp_pm_ops *pm; + + list_for_each_entry_rcu(pm, &mptcp_pm_list, list) { + if (pm->type == type) + return pm; + } + + return NULL; +} + +int mptcp_pm_validate(struct mptcp_pm_ops *pm) +{ + if (!pm->created && !pm->established && !pm->closed && + !pm->address_announced && !pm->address_removed && + !pm->subflow_established && !pm->subflow_closed && + !pm->get_local_id && !pm->get_priority && !pm->set_priority && + !pm->listener_created && !pm->listener_closed) { + pr_err("%u does not implement required ops\n", pm->type); + return -EINVAL; + } + + return 0; +} + +int mptcp_pm_register(struct mptcp_pm_ops *pm) +{ + int ret; + + ret = mptcp_pm_validate(pm); + if (ret) + return ret; + + spin_lock(&mptcp_pm_list_lock); + if (mptcp_pm_find(pm->type)) { + spin_unlock(&mptcp_pm_list_lock); + return -EEXIST; + } + list_add_tail_rcu(&pm->list, &mptcp_pm_list); + spin_unlock(&mptcp_pm_list_lock); + + pr_debug("userspace_pm type %u registered\n", pm->type); + return 0; +} + +void mptcp_pm_unregister(struct mptcp_pm_ops *pm) +{ + spin_lock(&mptcp_pm_list_lock); + list_del_rcu(&pm->list); + spin_unlock(&mptcp_pm_list_lock); +} diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7987beaa730e..f3e04927e214 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1039,6 +1039,11 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry); +struct mptcp_pm_ops *mptcp_pm_find(enum mptcp_pm_type type); +int mptcp_pm_validate(struct mptcp_pm_ops *pm); +int mptcp_pm_register(struct mptcp_pm_ops *pm); +void mptcp_pm_unregister(struct mptcp_pm_ops *pm); + void mptcp_free_local_addr_list(struct mptcp_sock *msk); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,