diff mbox series

[mptcp-next,v3,1/5] bpf: Add mptcp_subflow bpf_iter

Message ID 026dce3d6903ad189e4b0518a64b60c910e660c0.1725946276.git.tanggeliang@kylinos.cn (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [mptcp-next,v3,1/5] bpf: Add mptcp_subflow bpf_iter | expand

Commit Message

Geliang Tang Sept. 10, 2024, 5:37 a.m. UTC
From: Geliang Tang <tanggeliang@kylinos.cn>

It's necessary to traverse all subflows on the conn_list of an MPTCP
socket and then call kfunc to modify the fields of each subflow. In
kernel space, mptcp_for_each_subflow() helper is used for this:

	mptcp_for_each_subflow(msk, subflow)
		kfunc(subflow);

But in the MPTCP BPF program, this has not yet been implemented. As
Martin suggested recently, this conn_list walking + modify-by-kfunc
usage fits the bpf_iter use case. So this patch adds a new bpf_iter
type named "mptcp_subflow" to do this and implements its helpers
bpf_iter_mptcp_subflow_new()/_next()/_destroy().

Then bpf_for_each() for mptcp_subflow can be used in BPF program like
this:

	bpf_rcu_read_lock();
	bpf_for_each(mptcp_subflow, subflow, msk)
		kfunc(subflow);
	bpf_rcu_read_unlock();

Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/mptcp/bpf.c | 47 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 4 deletions(-)

Comments

Andrii Nakryiko Sept. 11, 2024, 9 p.m. UTC | #1
On Mon, Sep 9, 2024 at 10:37 PM Geliang Tang <geliang@kernel.org> wrote:
>
> From: Geliang Tang <tanggeliang@kylinos.cn>
>
> It's necessary to traverse all subflows on the conn_list of an MPTCP
> socket and then call kfunc to modify the fields of each subflow. In
> kernel space, mptcp_for_each_subflow() helper is used for this:
>
>         mptcp_for_each_subflow(msk, subflow)
>                 kfunc(subflow);
>
> But in the MPTCP BPF program, this has not yet been implemented. As
> Martin suggested recently, this conn_list walking + modify-by-kfunc
> usage fits the bpf_iter use case. So this patch adds a new bpf_iter
> type named "mptcp_subflow" to do this and implements its helpers
> bpf_iter_mptcp_subflow_new()/_next()/_destroy().
>
> Then bpf_for_each() for mptcp_subflow can be used in BPF program like
> this:
>
>         bpf_rcu_read_lock();
>         bpf_for_each(mptcp_subflow, subflow, msk)
>                 kfunc(subflow);
>         bpf_rcu_read_unlock();
>
> Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
>  net/mptcp/bpf.c | 47 +++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 43 insertions(+), 4 deletions(-)
>

Not sure why, but only this patch made it to the BPF mailing list? Did
you cc bpf@vger on all patches?

> diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> index 6414824402e6..350672e24a3d 100644
> --- a/net/mptcp/bpf.c
> +++ b/net/mptcp/bpf.c
> @@ -201,9 +201,48 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
>         .set   = &bpf_mptcp_fmodret_ids,
>  };
>
> -__diag_push();
> -__diag_ignore_all("-Wmissing-prototypes",
> -                 "kfuncs which will be used in BPF programs");
> +struct bpf_iter_mptcp_subflow {
> +       __u64 __opaque[2];
> +} __attribute__((aligned(8)));
> +
> +struct bpf_iter_mptcp_subflow_kern {
> +       struct mptcp_sock *msk;
> +       struct list_head *pos;
> +} __attribute__((aligned(8)));
> +
> +__bpf_kfunc_start_defs();
> +
> +__bpf_kfunc int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it,
> +                                          struct mptcp_sock *msk)
> +{
> +       struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> +
> +       if (!msk)
> +               return -EINVAL;

you still need to initialize at least kit->msk to NULL to prevent next
implementation below doing the wrong thing

keep in mind, iterator constructor returning error doesn't prevent BPF
program from still calling next() and destroy(), so implementation has
to set iterator state such that next can return NULL if iterator was
never successfully initialized

pw-bot: cr

> +
> +       kit->msk = msk;
> +       kit->pos = &msk->conn_list;
> +       return 0;
> +}
> +
> +__bpf_kfunc struct mptcp_subflow_context *
> +bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it)
> +{
> +       struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> +       struct mptcp_subflow_context *subflow;
> +       struct mptcp_sock *msk = kit->msk;
> +

you should check if (!msk) early here, before accessing kit->pos->next below

> +       subflow = list_entry((kit->pos)->next, struct mptcp_subflow_context, node);

nit: why () around kit->pos?

> +       if (!msk || list_entry_is_head(subflow, &msk->conn_list, node))

as I mentioned, !msk check seems too late. Maybe list_entry_is_head()
is a bit too late as well?

> +               return NULL;
> +
> +       kit->pos = &subflow->node;
> +       return subflow;
> +}
> +
> +__bpf_kfunc void bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it)
> +{
> +}
>
>  __bpf_kfunc struct mptcp_subflow_context *
>  bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos)
> @@ -218,7 +257,7 @@ __bpf_kfunc bool bpf_mptcp_subflow_queues_empty(struct sock *sk)
>         return tcp_rtx_queue_empty(sk);
>  }
>
> -__diag_pop();
> +__bpf_kfunc_end_defs();
>
>  BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids)
>  BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled)
> --
> 2.43.0
>
>
Geliang Tang Sept. 12, 2024, 3:06 a.m. UTC | #2
Hi Andrii,

On Wed, 2024-09-11 at 14:00 -0700, Andrii Nakryiko wrote:
> On Mon, Sep 9, 2024 at 10:37 PM Geliang Tang <geliang@kernel.org>
> wrote:
> > 
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> > 
> > It's necessary to traverse all subflows on the conn_list of an
> > MPTCP
> > socket and then call kfunc to modify the fields of each subflow. In
> > kernel space, mptcp_for_each_subflow() helper is used for this:
> > 
> >         mptcp_for_each_subflow(msk, subflow)
> >                 kfunc(subflow);
> > 
> > But in the MPTCP BPF program, this has not yet been implemented. As
> > Martin suggested recently, this conn_list walking + modify-by-kfunc
> > usage fits the bpf_iter use case. So this patch adds a new bpf_iter
> > type named "mptcp_subflow" to do this and implements its helpers
> > bpf_iter_mptcp_subflow_new()/_next()/_destroy().
> > 
> > Then bpf_for_each() for mptcp_subflow can be used in BPF program
> > like
> > this:
> > 
> >         bpf_rcu_read_lock();
> >         bpf_for_each(mptcp_subflow, subflow, msk)
> >                 kfunc(subflow);
> >         bpf_rcu_read_unlock();
> > 
> > Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> >  net/mptcp/bpf.c | 47 +++++++++++++++++++++++++++++++++++++++++++--
> > --
> >  1 file changed, 43 insertions(+), 4 deletions(-)
> > 
> 
> Not sure why, but only this patch made it to the BPF mailing list?
> Did
> you cc bpf@vger on all patches?

This patch is for "mptcp-next" [1], it depends on the "new MPTCP
subflow subtest" which is under review on the bpf list. We will send it
to the bpf list very soon.

[1]
https://patchwork.kernel.org/project/mptcp/cover/cover.1726023577.git.tanggeliang@kylinos.cn/

> 
> > diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> > index 6414824402e6..350672e24a3d 100644
> > --- a/net/mptcp/bpf.c
> > +++ b/net/mptcp/bpf.c
> > @@ -201,9 +201,48 @@ static const struct btf_kfunc_id_set
> > bpf_mptcp_fmodret_set = {
> >         .set   = &bpf_mptcp_fmodret_ids,
> >  };
> > 
> > -__diag_push();
> > -__diag_ignore_all("-Wmissing-prototypes",
> > -                 "kfuncs which will be used in BPF programs");
> > +struct bpf_iter_mptcp_subflow {
> > +       __u64 __opaque[2];
> > +} __attribute__((aligned(8)));
> > +
> > +struct bpf_iter_mptcp_subflow_kern {
> > +       struct mptcp_sock *msk;
> > +       struct list_head *pos;
> > +} __attribute__((aligned(8)));
> > +
> > +__bpf_kfunc_start_defs();
> > +
> > +__bpf_kfunc int bpf_iter_mptcp_subflow_new(struct
> > bpf_iter_mptcp_subflow *it,
> > +                                          struct mptcp_sock *msk)
> > +{
> > +       struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> > +
> > +       if (!msk)
> > +               return -EINVAL;
> 
> you still need to initialize at least kit->msk to NULL to prevent
> next
> implementation below doing the wrong thing
> 
> keep in mind, iterator constructor returning error doesn't prevent
> BPF
> program from still calling next() and destroy(), so implementation
> has
> to set iterator state such that next can return NULL if iterator was
> never successfully initialized
> 

I'll move "kit->msk = msk;" earlier like this:

{
        struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;

        kit->msk = msk;
        if (!msk)
                return -EINVAL;

        kit->pos = &msk->conn_list;
        return 0;
}

> pw-bot: cr
> 
> > +
> > +       kit->msk = msk;
> > +       kit->pos = &msk->conn_list;
> > +       return 0;
> > +}
> > +
> > +__bpf_kfunc struct mptcp_subflow_context *
> > +bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it)
> > +{
> > +       struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> > +       struct mptcp_subflow_context *subflow;
> > +       struct mptcp_sock *msk = kit->msk;
> > +
> 
> you should check if (!msk) early here, before accessing kit->pos-
> >next below
> 
> > +       subflow = list_entry((kit->pos)->next, struct
> > mptcp_subflow_context, node);
> 
> nit: why () around kit->pos?
> 
> > +       if (!msk || list_entry_is_head(subflow, &msk->conn_list,
> > node))
> 
> as I mentioned, !msk check seems too late. Maybe list_entry_is_head()
> is a bit too late as well?

We can use list_is_last() to check kit->pos earlier. But here we use
list_entry_is_head(), it should be after list_entry().

I'll move "if (!msk)" check earlier like this:

{
        struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
        struct mptcp_subflow_context *subflow;
        struct mptcp_sock *msk = kit->msk;

        if (!msk)
                return NULL;

        subflow = list_entry(kit->pos->next, struct
mptcp_subflow_context, node);
        if (!subflow || list_entry_is_head(subflow, &msk->conn_list,
node))
                return NULL;

        kit->pos = &subflow->node;
        return subflow;
}

Thanks,
-Geliang

> 
> > +               return NULL;
> > +
> > +       kit->pos = &subflow->node;
> > +       return subflow;
> > +}
> > +
> > +__bpf_kfunc void bpf_iter_mptcp_subflow_destroy(struct
> > bpf_iter_mptcp_subflow *it)
> > +{
> > +}
> > 
> >  __bpf_kfunc struct mptcp_subflow_context *
> >  bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data,
> > unsigned int pos)
> > @@ -218,7 +257,7 @@ __bpf_kfunc bool
> > bpf_mptcp_subflow_queues_empty(struct sock *sk)
> >         return tcp_rtx_queue_empty(sk);
> >  }
> > 
> > -__diag_pop();
> > +__bpf_kfunc_end_defs();
> > 
> >  BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids)
> >  BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled)
> > --
> > 2.43.0
> > 
> >
diff mbox series

Patch

diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 6414824402e6..350672e24a3d 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -201,9 +201,48 @@  static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
 	.set   = &bpf_mptcp_fmodret_ids,
 };
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "kfuncs which will be used in BPF programs");
+struct bpf_iter_mptcp_subflow {
+	__u64 __opaque[2];
+} __attribute__((aligned(8)));
+
+struct bpf_iter_mptcp_subflow_kern {
+	struct mptcp_sock *msk;
+	struct list_head *pos;
+} __attribute__((aligned(8)));
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it,
+					   struct mptcp_sock *msk)
+{
+	struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
+
+	if (!msk)
+		return -EINVAL;
+
+	kit->msk = msk;
+	kit->pos = &msk->conn_list;
+	return 0;
+}
+
+__bpf_kfunc struct mptcp_subflow_context *
+bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it)
+{
+	struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
+	struct mptcp_subflow_context *subflow;
+	struct mptcp_sock *msk = kit->msk;
+
+	subflow = list_entry((kit->pos)->next, struct mptcp_subflow_context, node);
+	if (!msk || list_entry_is_head(subflow, &msk->conn_list, node))
+		return NULL;
+
+	kit->pos = &subflow->node;
+	return subflow;
+}
+
+__bpf_kfunc void bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it)
+{
+}
 
 __bpf_kfunc struct mptcp_subflow_context *
 bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos)
@@ -218,7 +257,7 @@  __bpf_kfunc bool bpf_mptcp_subflow_queues_empty(struct sock *sk)
 	return tcp_rtx_queue_empty(sk);
 }
 
-__diag_pop();
+__bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids)
 BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled)