diff mbox series

[bpf-next,v4,9/9] selftests/bpf: Test switching TCP Congestion Control algorithms.

Message ID 20230307233307.3626875-10-kuifeng@meta.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Transit between BPF TCP congestion controls. | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail merge-conflict
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/apply fail Patch does not apply to bpf-next

Commit Message

Kui-Feng Lee March 7, 2023, 11:33 p.m. UTC
Create a pair of sockets that utilize the congestion control algorithm
under a particular name. Then switch up this congestion control
algorithm to another implementation and check whether newly created
connections using the same cc name now run the new implementation.

Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
---
 .../selftests/bpf/prog_tests/bpf_tcp_ca.c     | 38 ++++++++++++
 .../selftests/bpf/progs/tcp_ca_update.c       | 62 +++++++++++++++++++
 2 files changed, 100 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c

Comments

Andrii Nakryiko March 8, 2023, 1:10 a.m. UTC | #1
On Tue, Mar 7, 2023 at 3:34 PM Kui-Feng Lee <kuifeng@meta.com> wrote:
>
> Create a pair of sockets that utilize the congestion control algorithm
> under a particular name. Then switch up this congestion control
> algorithm to another implementation and check whether newly created
> connections using the same cc name now run the new implementation.
>
> Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
> ---
>  .../selftests/bpf/prog_tests/bpf_tcp_ca.c     | 38 ++++++++++++
>  .../selftests/bpf/progs/tcp_ca_update.c       | 62 +++++++++++++++++++
>  2 files changed, 100 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
> index e980188d4124..caaa9175ee36 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
> @@ -8,6 +8,7 @@
>  #include "bpf_dctcp.skel.h"
>  #include "bpf_cubic.skel.h"
>  #include "bpf_tcp_nogpl.skel.h"
> +#include "tcp_ca_update.skel.h"
>  #include "bpf_dctcp_release.skel.h"
>  #include "tcp_ca_write_sk_pacing.skel.h"
>  #include "tcp_ca_incompl_cong_ops.skel.h"
> @@ -381,6 +382,41 @@ static void test_unsupp_cong_op(void)
>         libbpf_set_print(old_print_fn);
>  }
>
> +static void test_update_ca(void)
> +{
> +       struct tcp_ca_update *skel;
> +       struct bpf_link *link;
> +       int saved_ca1_cnt;
> +       int err;
> +
> +       skel = tcp_ca_update__open();
> +       if (!ASSERT_OK_PTR(skel, "open"))
> +               return;
> +
> +       err = tcp_ca_update__load(skel);

tcp_ca_update__open_and_load()

> +       if (!ASSERT_OK(err, "load")) {
> +               tcp_ca_update__destroy(skel);
> +               return;
> +       }
> +
> +       link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);

I think it's time to generate link holder for each struct_ops map to
the BPF skeleton, and support auto-attach of struct_ops skeleton.
Please do that as a follow up, once this patch set lands.

> +       ASSERT_OK_PTR(link, "attach_struct_ops");
> +
> +       do_test("tcp_ca_update", NULL);
> +       saved_ca1_cnt = skel->bss->ca1_cnt;
> +       ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
> +
> +       err = bpf_link__update_map(link, skel->maps.ca_update_2);
> +       ASSERT_OK(err, "update_struct_ops");
> +
> +       do_test("tcp_ca_update", NULL);
> +       ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
> +       ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt");

how do we know that struct_ops programs were triggered? what
guarantees that? if nothing, we are just adding another flaky
networking test

> +
> +       bpf_link__destroy(link);
> +       tcp_ca_update__destroy(skel);
> +}
> +
>  void test_bpf_tcp_ca(void)
>  {
>         if (test__start_subtest("dctcp"))
> @@ -399,4 +435,6 @@ void test_bpf_tcp_ca(void)
>                 test_incompl_cong_ops();
>         if (test__start_subtest("unsupp_cong_op"))
>                 test_unsupp_cong_op();
> +       if (test__start_subtest("update_ca"))
> +               test_update_ca();
>  }
> diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
> new file mode 100644
> index 000000000000..36a04be95df5
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
> @@ -0,0 +1,62 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include "vmlinux.h"
> +
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +int ca1_cnt = 0;
> +int ca2_cnt = 0;
> +
> +#define USEC_PER_SEC 1000000UL
> +
> +#define min(a, b) ((a) < (b) ? (a) : (b))
> +
> +static inline struct tcp_sock *tcp_sk(const struct sock *sk)
> +{
> +       return (struct tcp_sock *)sk;
> +}
> +
> +SEC("struct_ops/ca_update_1_cong_control")
> +void BPF_PROG(ca_update_1_cong_control, struct sock *sk,
> +             const struct rate_sample *rs)
> +{
> +       ca1_cnt++;
> +}
> +
> +SEC("struct_ops/ca_update_2_cong_control")
> +void BPF_PROG(ca_update_2_cong_control, struct sock *sk,
> +             const struct rate_sample *rs)
> +{
> +       ca2_cnt++;
> +}
> +
> +SEC("struct_ops/ca_update_ssthresh")
> +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
> +{
> +       return tcp_sk(sk)->snd_ssthresh;
> +}
> +
> +SEC("struct_ops/ca_update_undo_cwnd")
> +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
> +{
> +       return tcp_sk(sk)->snd_cwnd;
> +}
> +
> +SEC(".struct_ops.link")
> +struct tcp_congestion_ops ca_update_1 = {
> +       .cong_control = (void *)ca_update_1_cong_control,
> +       .ssthresh = (void *)ca_update_ssthresh,
> +       .undo_cwnd = (void *)ca_update_undo_cwnd,
> +       .name = "tcp_ca_update",
> +};
> +
> +SEC(".struct_ops.link")
> +struct tcp_congestion_ops ca_update_2 = {
> +       .cong_control = (void *)ca_update_2_cong_control,
> +       .ssthresh = (void *)ca_update_ssthresh,
> +       .undo_cwnd = (void *)ca_update_undo_cwnd,
> +       .name = "tcp_ca_update",
> +};

please add a test where you combine both .struct_ops and
.struct_ops.link, it's an obvious potentially problematic combination

as I mentioned in previous patches, let's also have a negative test
where bpf_link__update_map() fails

> --
> 2.34.1
>
Kuifeng Lee March 8, 2023, 3:58 p.m. UTC | #2
On 3/7/23 17:10, Andrii Nakryiko wrote:
> On Tue, Mar 7, 2023 at 3:34 PM Kui-Feng Lee <kuifeng@meta.com> wrote:
>>
>> Create a pair of sockets that utilize the congestion control algorithm
>> under a particular name. Then switch up this congestion control
>> algorithm to another implementation and check whether newly created
>> connections using the same cc name now run the new implementation.
>>
>> Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
>> ---
>>   .../selftests/bpf/prog_tests/bpf_tcp_ca.c     | 38 ++++++++++++
>>   .../selftests/bpf/progs/tcp_ca_update.c       | 62 +++++++++++++++++++
>>   2 files changed, 100 insertions(+)
>>   create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
>> index e980188d4124..caaa9175ee36 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
>> @@ -8,6 +8,7 @@
>>   #include "bpf_dctcp.skel.h"
>>   #include "bpf_cubic.skel.h"
>>   #include "bpf_tcp_nogpl.skel.h"
>> +#include "tcp_ca_update.skel.h"
>>   #include "bpf_dctcp_release.skel.h"
>>   #include "tcp_ca_write_sk_pacing.skel.h"
>>   #include "tcp_ca_incompl_cong_ops.skel.h"
>> @@ -381,6 +382,41 @@ static void test_unsupp_cong_op(void)
>>          libbpf_set_print(old_print_fn);
>>   }
>>
>> +static void test_update_ca(void)
>> +{
>> +       struct tcp_ca_update *skel;
>> +       struct bpf_link *link;
>> +       int saved_ca1_cnt;
>> +       int err;
>> +
>> +       skel = tcp_ca_update__open();
>> +       if (!ASSERT_OK_PTR(skel, "open"))
>> +               return;
>> +
>> +       err = tcp_ca_update__load(skel);
> 
> tcp_ca_update__open_and_load()
> 
>> +       if (!ASSERT_OK(err, "load")) {
>> +               tcp_ca_update__destroy(skel);
>> +               return;
>> +       }
>> +
>> +       link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
> 
> I think it's time to generate link holder for each struct_ops map to
> the BPF skeleton, and support auto-attach of struct_ops skeleton.
> Please do that as a follow up, once this patch set lands.

Got it.

> 
>> +       ASSERT_OK_PTR(link, "attach_struct_ops");
>> +
>> +       do_test("tcp_ca_update", NULL);
>> +       saved_ca1_cnt = skel->bss->ca1_cnt;
>> +       ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
>> +
>> +       err = bpf_link__update_map(link, skel->maps.ca_update_2);
>> +       ASSERT_OK(err, "update_struct_ops");
>> +
>> +       do_test("tcp_ca_update", NULL);
>> +       ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
>> +       ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt");
> 
> how do we know that struct_ops programs were triggered? what
> guarantees that? if nothing, we are just adding another flaky
> networking test

When an ack is received, cong_control of ca_update_1 and ca_update_2
will be called if they are activated.  By checking ca1_cnt & ca2_cnt, we
know which one is activated.  Here, we check if the ca1_cnt keeps the
same and ca2_cnt increase to make that ca_update_2 have replaced
ca_update_1.

> 
>> +
>> +       bpf_link__destroy(link);
>> +       tcp_ca_update__destroy(skel);
>> +}
>> +
>>   void test_bpf_tcp_ca(void)
>>   {
>>          if (test__start_subtest("dctcp"))
>> @@ -399,4 +435,6 @@ void test_bpf_tcp_ca(void)
>>                  test_incompl_cong_ops();
>>          if (test__start_subtest("unsupp_cong_op"))
>>                  test_unsupp_cong_op();
>> +       if (test__start_subtest("update_ca"))
>> +               test_update_ca();
>>   }
>> diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
>> new file mode 100644
>> index 000000000000..36a04be95df5
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
>> @@ -0,0 +1,62 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +#include "vmlinux.h"
>> +
>> +#include <bpf/bpf_helpers.h>
>> +#include <bpf/bpf_tracing.h>
>> +
>> +char _license[] SEC("license") = "GPL";
>> +
>> +int ca1_cnt = 0;
>> +int ca2_cnt = 0;
>> +
>> +#define USEC_PER_SEC 1000000UL
>> +
>> +#define min(a, b) ((a) < (b) ? (a) : (b))
>> +
>> +static inline struct tcp_sock *tcp_sk(const struct sock *sk)
>> +{
>> +       return (struct tcp_sock *)sk;
>> +}
>> +
>> +SEC("struct_ops/ca_update_1_cong_control")
>> +void BPF_PROG(ca_update_1_cong_control, struct sock *sk,
>> +             const struct rate_sample *rs)
>> +{
>> +       ca1_cnt++;
>> +}
>> +
>> +SEC("struct_ops/ca_update_2_cong_control")
>> +void BPF_PROG(ca_update_2_cong_control, struct sock *sk,
>> +             const struct rate_sample *rs)
>> +{
>> +       ca2_cnt++;
>> +}
>> +
>> +SEC("struct_ops/ca_update_ssthresh")
>> +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
>> +{
>> +       return tcp_sk(sk)->snd_ssthresh;
>> +}
>> +
>> +SEC("struct_ops/ca_update_undo_cwnd")
>> +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
>> +{
>> +       return tcp_sk(sk)->snd_cwnd;
>> +}
>> +
>> +SEC(".struct_ops.link")
>> +struct tcp_congestion_ops ca_update_1 = {
>> +       .cong_control = (void *)ca_update_1_cong_control,
>> +       .ssthresh = (void *)ca_update_ssthresh,
>> +       .undo_cwnd = (void *)ca_update_undo_cwnd,
>> +       .name = "tcp_ca_update",
>> +};
>> +
>> +SEC(".struct_ops.link")
>> +struct tcp_congestion_ops ca_update_2 = {
>> +       .cong_control = (void *)ca_update_2_cong_control,
>> +       .ssthresh = (void *)ca_update_ssthresh,
>> +       .undo_cwnd = (void *)ca_update_undo_cwnd,
>> +       .name = "tcp_ca_update",
>> +};
> 
> please add a test where you combine both .struct_ops and
> .struct_ops.link, it's an obvious potentially problematic combination
> 
> as I mentioned in previous patches, let's also have a negative test
> where bpf_link__update_map() fails

Sure

> 
>> --
>> 2.34.1
>>
Andrii Nakryiko March 8, 2023, 5:18 p.m. UTC | #3
On Wed, Mar 8, 2023 at 7:58 AM Kui-Feng Lee <sinquersw@gmail.com> wrote:
>
>
>
> On 3/7/23 17:10, Andrii Nakryiko wrote:
> > On Tue, Mar 7, 2023 at 3:34 PM Kui-Feng Lee <kuifeng@meta.com> wrote:
> >>
> >> Create a pair of sockets that utilize the congestion control algorithm
> >> under a particular name. Then switch up this congestion control
> >> algorithm to another implementation and check whether newly created
> >> connections using the same cc name now run the new implementation.
> >>
> >> Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
> >> ---
> >>   .../selftests/bpf/prog_tests/bpf_tcp_ca.c     | 38 ++++++++++++
> >>   .../selftests/bpf/progs/tcp_ca_update.c       | 62 +++++++++++++++++++
> >>   2 files changed, 100 insertions(+)
> >>   create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c
> >>
> >> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
> >> index e980188d4124..caaa9175ee36 100644
> >> --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
> >> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
> >> @@ -8,6 +8,7 @@
> >>   #include "bpf_dctcp.skel.h"
> >>   #include "bpf_cubic.skel.h"
> >>   #include "bpf_tcp_nogpl.skel.h"
> >> +#include "tcp_ca_update.skel.h"
> >>   #include "bpf_dctcp_release.skel.h"
> >>   #include "tcp_ca_write_sk_pacing.skel.h"
> >>   #include "tcp_ca_incompl_cong_ops.skel.h"
> >> @@ -381,6 +382,41 @@ static void test_unsupp_cong_op(void)
> >>          libbpf_set_print(old_print_fn);
> >>   }
> >>
> >> +static void test_update_ca(void)
> >> +{
> >> +       struct tcp_ca_update *skel;
> >> +       struct bpf_link *link;
> >> +       int saved_ca1_cnt;
> >> +       int err;
> >> +
> >> +       skel = tcp_ca_update__open();
> >> +       if (!ASSERT_OK_PTR(skel, "open"))
> >> +               return;
> >> +
> >> +       err = tcp_ca_update__load(skel);
> >
> > tcp_ca_update__open_and_load()
> >
> >> +       if (!ASSERT_OK(err, "load")) {
> >> +               tcp_ca_update__destroy(skel);
> >> +               return;
> >> +       }
> >> +
> >> +       link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
> >
> > I think it's time to generate link holder for each struct_ops map to
> > the BPF skeleton, and support auto-attach of struct_ops skeleton.
> > Please do that as a follow up, once this patch set lands.
>
> Got it.
>
> >
> >> +       ASSERT_OK_PTR(link, "attach_struct_ops");
> >> +
> >> +       do_test("tcp_ca_update", NULL);
> >> +       saved_ca1_cnt = skel->bss->ca1_cnt;
> >> +       ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
> >> +
> >> +       err = bpf_link__update_map(link, skel->maps.ca_update_2);
> >> +       ASSERT_OK(err, "update_struct_ops");
> >> +
> >> +       do_test("tcp_ca_update", NULL);
> >> +       ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
> >> +       ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt");
> >
> > how do we know that struct_ops programs were triggered? what
> > guarantees that? if nothing, we are just adding another flaky
> > networking test
>
> When an ack is received, cong_control of ca_update_1 and ca_update_2
> will be called if they are activated.  By checking ca1_cnt & ca2_cnt, we
> know which one is activated.  Here, we check if the ca1_cnt keeps the
> same and ca2_cnt increase to make that ca_update_2 have replaced
> ca_update_1.

I just don't see anything in the test ensuring that ack is
sent/received, so it seems like we are relying on some background
system activity and proper timing (unless I miss something, which is
why I'm asking), so this is fragile, as in CI environment timings and
background activity would be very different and unpredictable, causing
flakiness of the test

>
> >
> >> +
> >> +       bpf_link__destroy(link);
> >> +       tcp_ca_update__destroy(skel);
> >> +}
> >> +
> >>   void test_bpf_tcp_ca(void)
> >>   {
> >>          if (test__start_subtest("dctcp"))
> >> @@ -399,4 +435,6 @@ void test_bpf_tcp_ca(void)
> >>                  test_incompl_cong_ops();
> >>          if (test__start_subtest("unsupp_cong_op"))
> >>                  test_unsupp_cong_op();
> >> +       if (test__start_subtest("update_ca"))
> >> +               test_update_ca();
> >>   }
> >> diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
> >> new file mode 100644
> >> index 000000000000..36a04be95df5
> >> --- /dev/null
> >> +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
> >> @@ -0,0 +1,62 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +
> >> +#include "vmlinux.h"
> >> +
> >> +#include <bpf/bpf_helpers.h>
> >> +#include <bpf/bpf_tracing.h>
> >> +
> >> +char _license[] SEC("license") = "GPL";
> >> +
> >> +int ca1_cnt = 0;
> >> +int ca2_cnt = 0;
> >> +
> >> +#define USEC_PER_SEC 1000000UL
> >> +
> >> +#define min(a, b) ((a) < (b) ? (a) : (b))
> >> +
> >> +static inline struct tcp_sock *tcp_sk(const struct sock *sk)
> >> +{
> >> +       return (struct tcp_sock *)sk;
> >> +}
> >> +
> >> +SEC("struct_ops/ca_update_1_cong_control")
> >> +void BPF_PROG(ca_update_1_cong_control, struct sock *sk,
> >> +             const struct rate_sample *rs)
> >> +{
> >> +       ca1_cnt++;
> >> +}
> >> +
> >> +SEC("struct_ops/ca_update_2_cong_control")
> >> +void BPF_PROG(ca_update_2_cong_control, struct sock *sk,
> >> +             const struct rate_sample *rs)
> >> +{
> >> +       ca2_cnt++;
> >> +}
> >> +
> >> +SEC("struct_ops/ca_update_ssthresh")
> >> +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
> >> +{
> >> +       return tcp_sk(sk)->snd_ssthresh;
> >> +}
> >> +
> >> +SEC("struct_ops/ca_update_undo_cwnd")
> >> +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
> >> +{
> >> +       return tcp_sk(sk)->snd_cwnd;
> >> +}
> >> +
> >> +SEC(".struct_ops.link")
> >> +struct tcp_congestion_ops ca_update_1 = {
> >> +       .cong_control = (void *)ca_update_1_cong_control,
> >> +       .ssthresh = (void *)ca_update_ssthresh,
> >> +       .undo_cwnd = (void *)ca_update_undo_cwnd,
> >> +       .name = "tcp_ca_update",
> >> +};
> >> +
> >> +SEC(".struct_ops.link")
> >> +struct tcp_congestion_ops ca_update_2 = {
> >> +       .cong_control = (void *)ca_update_2_cong_control,
> >> +       .ssthresh = (void *)ca_update_ssthresh,
> >> +       .undo_cwnd = (void *)ca_update_undo_cwnd,
> >> +       .name = "tcp_ca_update",
> >> +};
> >
> > please add a test where you combine both .struct_ops and
> > .struct_ops.link, it's an obvious potentially problematic combination
> >
> > as I mentioned in previous patches, let's also have a negative test
> > where bpf_link__update_map() fails
>
> Sure
>
> >
> >> --
> >> 2.34.1
> >>
Kuifeng Lee March 8, 2023, 6:10 p.m. UTC | #4
On 3/8/23 09:18, Andrii Nakryiko wrote:
> On Wed, Mar 8, 2023 at 7:58 AM Kui-Feng Lee <sinquersw@gmail.com> wrote:
>>
>>
>>
>> On 3/7/23 17:10, Andrii Nakryiko wrote:
>>> On Tue, Mar 7, 2023 at 3:34 PM Kui-Feng Lee <kuifeng@meta.com> wrote:
>>>>
>>>> Create a pair of sockets that utilize the congestion control algorithm
>>>> under a particular name. Then switch up this congestion control
>>>> algorithm to another implementation and check whether newly created
>>>> connections using the same cc name now run the new implementation.
>>>>
>>>> Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
>>>> ---
>>>>    .../selftests/bpf/prog_tests/bpf_tcp_ca.c     | 38 ++++++++++++
>>>>    .../selftests/bpf/progs/tcp_ca_update.c       | 62 +++++++++++++++++++
>>>>    2 files changed, 100 insertions(+)
>>>>    create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c
>>>>
>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
>>>> index e980188d4124..caaa9175ee36 100644
>>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
>>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
>>>> @@ -8,6 +8,7 @@
>>>>    #include "bpf_dctcp.skel.h"
>>>>    #include "bpf_cubic.skel.h"
>>>>    #include "bpf_tcp_nogpl.skel.h"
>>>> +#include "tcp_ca_update.skel.h"
>>>>    #include "bpf_dctcp_release.skel.h"
>>>>    #include "tcp_ca_write_sk_pacing.skel.h"
>>>>    #include "tcp_ca_incompl_cong_ops.skel.h"
>>>> @@ -381,6 +382,41 @@ static void test_unsupp_cong_op(void)
>>>>           libbpf_set_print(old_print_fn);
>>>>    }
>>>>
>>>> +static void test_update_ca(void)
>>>> +{
>>>> +       struct tcp_ca_update *skel;
>>>> +       struct bpf_link *link;
>>>> +       int saved_ca1_cnt;
>>>> +       int err;
>>>> +
>>>> +       skel = tcp_ca_update__open();
>>>> +       if (!ASSERT_OK_PTR(skel, "open"))
>>>> +               return;
>>>> +
>>>> +       err = tcp_ca_update__load(skel);
>>>
>>> tcp_ca_update__open_and_load()
>>>
>>>> +       if (!ASSERT_OK(err, "load")) {
>>>> +               tcp_ca_update__destroy(skel);
>>>> +               return;
>>>> +       }
>>>> +
>>>> +       link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
>>>
>>> I think it's time to generate link holder for each struct_ops map to
>>> the BPF skeleton, and support auto-attach of struct_ops skeleton.
>>> Please do that as a follow up, once this patch set lands.
>>
>> Got it.
>>
>>>
>>>> +       ASSERT_OK_PTR(link, "attach_struct_ops");
>>>> +
>>>> +       do_test("tcp_ca_update", NULL);
>>>> +       saved_ca1_cnt = skel->bss->ca1_cnt;
>>>> +       ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
>>>> +
>>>> +       err = bpf_link__update_map(link, skel->maps.ca_update_2);
>>>> +       ASSERT_OK(err, "update_struct_ops");
>>>> +
>>>> +       do_test("tcp_ca_update", NULL);
>>>> +       ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
>>>> +       ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt");
>>>
>>> how do we know that struct_ops programs were triggered? what
>>> guarantees that? if nothing, we are just adding another flaky
>>> networking test
>>
>> When an ack is received, cong_control of ca_update_1 and ca_update_2
>> will be called if they are activated.  By checking ca1_cnt & ca2_cnt, we
>> know which one is activated.  Here, we check if the ca1_cnt keeps the
>> same and ca2_cnt increase to make that ca_update_2 have replaced
>> ca_update_1.
> 
> I just don't see anything in the test ensuring that ack is
> sent/received, so it seems like we are relying on some background
> system activity and proper timing (unless I miss something, which is
> why I'm asking), so this is fragile, as in CI environment timings and
> background activity would be very different and unpredictable, causing
> flakiness of the test


The do_test() function creates two sockets to form a direct connection
that must receive at least one acknowledgment packet for the sockets to
progress into an ESTABLISHED state.  If they don't, that means it fails
to establish a connection.

> 
>>
>>>
>>>> +
>>>> +       bpf_link__destroy(link);
>>>> +       tcp_ca_update__destroy(skel);
>>>> +}
>>>> +
>>>>    void test_bpf_tcp_ca(void)
>>>>    {
>>>>           if (test__start_subtest("dctcp"))
>>>> @@ -399,4 +435,6 @@ void test_bpf_tcp_ca(void)
>>>>                   test_incompl_cong_ops();
>>>>           if (test__start_subtest("unsupp_cong_op"))
>>>>                   test_unsupp_cong_op();
>>>> +       if (test__start_subtest("update_ca"))
>>>> +               test_update_ca();
>>>>    }
>>>> diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
>>>> new file mode 100644
>>>> index 000000000000..36a04be95df5
>>>> --- /dev/null
>>>> +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
>>>> @@ -0,0 +1,62 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +
>>>> +#include "vmlinux.h"
>>>> +
>>>> +#include <bpf/bpf_helpers.h>
>>>> +#include <bpf/bpf_tracing.h>
>>>> +
>>>> +char _license[] SEC("license") = "GPL";
>>>> +
>>>> +int ca1_cnt = 0;
>>>> +int ca2_cnt = 0;
>>>> +
>>>> +#define USEC_PER_SEC 1000000UL
>>>> +
>>>> +#define min(a, b) ((a) < (b) ? (a) : (b))
>>>> +
>>>> +static inline struct tcp_sock *tcp_sk(const struct sock *sk)
>>>> +{
>>>> +       return (struct tcp_sock *)sk;
>>>> +}
>>>> +
>>>> +SEC("struct_ops/ca_update_1_cong_control")
>>>> +void BPF_PROG(ca_update_1_cong_control, struct sock *sk,
>>>> +             const struct rate_sample *rs)
>>>> +{
>>>> +       ca1_cnt++;
>>>> +}
>>>> +
>>>> +SEC("struct_ops/ca_update_2_cong_control")
>>>> +void BPF_PROG(ca_update_2_cong_control, struct sock *sk,
>>>> +             const struct rate_sample *rs)
>>>> +{
>>>> +       ca2_cnt++;
>>>> +}
>>>> +
>>>> +SEC("struct_ops/ca_update_ssthresh")
>>>> +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
>>>> +{
>>>> +       return tcp_sk(sk)->snd_ssthresh;
>>>> +}
>>>> +
>>>> +SEC("struct_ops/ca_update_undo_cwnd")
>>>> +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
>>>> +{
>>>> +       return tcp_sk(sk)->snd_cwnd;
>>>> +}
>>>> +
>>>> +SEC(".struct_ops.link")
>>>> +struct tcp_congestion_ops ca_update_1 = {
>>>> +       .cong_control = (void *)ca_update_1_cong_control,
>>>> +       .ssthresh = (void *)ca_update_ssthresh,
>>>> +       .undo_cwnd = (void *)ca_update_undo_cwnd,
>>>> +       .name = "tcp_ca_update",
>>>> +};
>>>> +
>>>> +SEC(".struct_ops.link")
>>>> +struct tcp_congestion_ops ca_update_2 = {
>>>> +       .cong_control = (void *)ca_update_2_cong_control,
>>>> +       .ssthresh = (void *)ca_update_ssthresh,
>>>> +       .undo_cwnd = (void *)ca_update_undo_cwnd,
>>>> +       .name = "tcp_ca_update",
>>>> +};
>>>
>>> please add a test where you combine both .struct_ops and
>>> .struct_ops.link, it's an obvious potentially problematic combination
>>>
>>> as I mentioned in previous patches, let's also have a negative test
>>> where bpf_link__update_map() fails
>>
>> Sure
>>
>>>
>>>> --
>>>> 2.34.1
>>>>
Andrii Nakryiko March 8, 2023, 6:43 p.m. UTC | #5
On Wed, Mar 8, 2023 at 10:10 AM Kui-Feng Lee <sinquersw@gmail.com> wrote:
>
>
>
> On 3/8/23 09:18, Andrii Nakryiko wrote:
> > On Wed, Mar 8, 2023 at 7:58 AM Kui-Feng Lee <sinquersw@gmail.com> wrote:
> >>
> >>
> >>
> >> On 3/7/23 17:10, Andrii Nakryiko wrote:
> >>> On Tue, Mar 7, 2023 at 3:34 PM Kui-Feng Lee <kuifeng@meta.com> wrote:
> >>>>
> >>>> Create a pair of sockets that utilize the congestion control algorithm
> >>>> under a particular name. Then switch up this congestion control
> >>>> algorithm to another implementation and check whether newly created
> >>>> connections using the same cc name now run the new implementation.
> >>>>
> >>>> Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
> >>>> ---
> >>>>    .../selftests/bpf/prog_tests/bpf_tcp_ca.c     | 38 ++++++++++++
> >>>>    .../selftests/bpf/progs/tcp_ca_update.c       | 62 +++++++++++++++++++
> >>>>    2 files changed, 100 insertions(+)
> >>>>    create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c
> >>>>
> >>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
> >>>> index e980188d4124..caaa9175ee36 100644
> >>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
> >>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
> >>>> @@ -8,6 +8,7 @@
> >>>>    #include "bpf_dctcp.skel.h"
> >>>>    #include "bpf_cubic.skel.h"
> >>>>    #include "bpf_tcp_nogpl.skel.h"
> >>>> +#include "tcp_ca_update.skel.h"
> >>>>    #include "bpf_dctcp_release.skel.h"
> >>>>    #include "tcp_ca_write_sk_pacing.skel.h"
> >>>>    #include "tcp_ca_incompl_cong_ops.skel.h"
> >>>> @@ -381,6 +382,41 @@ static void test_unsupp_cong_op(void)
> >>>>           libbpf_set_print(old_print_fn);
> >>>>    }
> >>>>
> >>>> +static void test_update_ca(void)
> >>>> +{
> >>>> +       struct tcp_ca_update *skel;
> >>>> +       struct bpf_link *link;
> >>>> +       int saved_ca1_cnt;
> >>>> +       int err;
> >>>> +
> >>>> +       skel = tcp_ca_update__open();
> >>>> +       if (!ASSERT_OK_PTR(skel, "open"))
> >>>> +               return;
> >>>> +
> >>>> +       err = tcp_ca_update__load(skel);
> >>>
> >>> tcp_ca_update__open_and_load()
> >>>
> >>>> +       if (!ASSERT_OK(err, "load")) {
> >>>> +               tcp_ca_update__destroy(skel);
> >>>> +               return;
> >>>> +       }
> >>>> +
> >>>> +       link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
> >>>
> >>> I think it's time to generate link holder for each struct_ops map to
> >>> the BPF skeleton, and support auto-attach of struct_ops skeleton.
> >>> Please do that as a follow up, once this patch set lands.
> >>
> >> Got it.
> >>
> >>>
> >>>> +       ASSERT_OK_PTR(link, "attach_struct_ops");
> >>>> +
> >>>> +       do_test("tcp_ca_update", NULL);
> >>>> +       saved_ca1_cnt = skel->bss->ca1_cnt;
> >>>> +       ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
> >>>> +
> >>>> +       err = bpf_link__update_map(link, skel->maps.ca_update_2);
> >>>> +       ASSERT_OK(err, "update_struct_ops");
> >>>> +
> >>>> +       do_test("tcp_ca_update", NULL);
> >>>> +       ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
> >>>> +       ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt");
> >>>
> >>> how do we know that struct_ops programs were triggered? what
> >>> guarantees that? if nothing, we are just adding another flaky
> >>> networking test
> >>
> >> When an ack is received, cong_control of ca_update_1 and ca_update_2
> >> will be called if they are activated.  By checking ca1_cnt & ca2_cnt, we
> >> know which one is activated.  Here, we check if the ca1_cnt keeps the
> >> same and ca2_cnt increase to make that ca_update_2 have replaced
> >> ca_update_1.
> >
> > I just don't see anything in the test ensuring that ack is
> > sent/received, so it seems like we are relying on some background
> > system activity and proper timing (unless I miss something, which is
> > why I'm asking), so this is fragile, as in CI environment timings and
> > background activity would be very different and unpredictable, causing
> > flakiness of the test
>
>
> The do_test() function creates two sockets to form a direct connection
> that must receive at least one acknowledgment packet for the sockets to
> progress into an ESTABLISHED state.  If they don't, that means it fails
> to establish a connection.

yeah, my bad, I *completely* missed `do_test("tcp_ca_update", NULL)`
(even though I went specifically looking for something like that).
It's all good here.


>
> >
> >>
> >>>
> >>>> +
> >>>> +       bpf_link__destroy(link);
> >>>> +       tcp_ca_update__destroy(skel);
> >>>> +}
> >>>> +
> >>>>    void test_bpf_tcp_ca(void)
> >>>>    {
> >>>>           if (test__start_subtest("dctcp"))
> >>>> @@ -399,4 +435,6 @@ void test_bpf_tcp_ca(void)
> >>>>                   test_incompl_cong_ops();
> >>>>           if (test__start_subtest("unsupp_cong_op"))
> >>>>                   test_unsupp_cong_op();
> >>>> +       if (test__start_subtest("update_ca"))
> >>>> +               test_update_ca();
> >>>>    }
> >>>> diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
> >>>> new file mode 100644
> >>>> index 000000000000..36a04be95df5
> >>>> --- /dev/null
> >>>> +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
> >>>> @@ -0,0 +1,62 @@
> >>>> +// SPDX-License-Identifier: GPL-2.0
> >>>> +
> >>>> +#include "vmlinux.h"
> >>>> +
> >>>> +#include <bpf/bpf_helpers.h>
> >>>> +#include <bpf/bpf_tracing.h>
> >>>> +
> >>>> +char _license[] SEC("license") = "GPL";
> >>>> +
> >>>> +int ca1_cnt = 0;
> >>>> +int ca2_cnt = 0;
> >>>> +
> >>>> +#define USEC_PER_SEC 1000000UL
> >>>> +
> >>>> +#define min(a, b) ((a) < (b) ? (a) : (b))
> >>>> +
> >>>> +static inline struct tcp_sock *tcp_sk(const struct sock *sk)
> >>>> +{
> >>>> +       return (struct tcp_sock *)sk;
> >>>> +}
> >>>> +
> >>>> +SEC("struct_ops/ca_update_1_cong_control")
> >>>> +void BPF_PROG(ca_update_1_cong_control, struct sock *sk,
> >>>> +             const struct rate_sample *rs)
> >>>> +{
> >>>> +       ca1_cnt++;
> >>>> +}
> >>>> +
> >>>> +SEC("struct_ops/ca_update_2_cong_control")
> >>>> +void BPF_PROG(ca_update_2_cong_control, struct sock *sk,
> >>>> +             const struct rate_sample *rs)
> >>>> +{
> >>>> +       ca2_cnt++;
> >>>> +}
> >>>> +
> >>>> +SEC("struct_ops/ca_update_ssthresh")
> >>>> +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
> >>>> +{
> >>>> +       return tcp_sk(sk)->snd_ssthresh;
> >>>> +}
> >>>> +
> >>>> +SEC("struct_ops/ca_update_undo_cwnd")
> >>>> +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
> >>>> +{
> >>>> +       return tcp_sk(sk)->snd_cwnd;
> >>>> +}
> >>>> +
> >>>> +SEC(".struct_ops.link")
> >>>> +struct tcp_congestion_ops ca_update_1 = {
> >>>> +       .cong_control = (void *)ca_update_1_cong_control,
> >>>> +       .ssthresh = (void *)ca_update_ssthresh,
> >>>> +       .undo_cwnd = (void *)ca_update_undo_cwnd,
> >>>> +       .name = "tcp_ca_update",
> >>>> +};
> >>>> +
> >>>> +SEC(".struct_ops.link")
> >>>> +struct tcp_congestion_ops ca_update_2 = {
> >>>> +       .cong_control = (void *)ca_update_2_cong_control,
> >>>> +       .ssthresh = (void *)ca_update_ssthresh,
> >>>> +       .undo_cwnd = (void *)ca_update_undo_cwnd,
> >>>> +       .name = "tcp_ca_update",
> >>>> +};
> >>>
> >>> please add a test where you combine both .struct_ops and
> >>> .struct_ops.link, it's an obvious potentially problematic combination
> >>>
> >>> as I mentioned in previous patches, let's also have a negative test
> >>> where bpf_link__update_map() fails
> >>
> >> Sure
> >>
> >>>
> >>>> --
> >>>> 2.34.1
> >>>>
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index e980188d4124..caaa9175ee36 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -8,6 +8,7 @@ 
 #include "bpf_dctcp.skel.h"
 #include "bpf_cubic.skel.h"
 #include "bpf_tcp_nogpl.skel.h"
+#include "tcp_ca_update.skel.h"
 #include "bpf_dctcp_release.skel.h"
 #include "tcp_ca_write_sk_pacing.skel.h"
 #include "tcp_ca_incompl_cong_ops.skel.h"
@@ -381,6 +382,41 @@  static void test_unsupp_cong_op(void)
 	libbpf_set_print(old_print_fn);
 }
 
+static void test_update_ca(void)
+{
+	struct tcp_ca_update *skel;
+	struct bpf_link *link;
+	int saved_ca1_cnt;
+	int err;
+
+	skel = tcp_ca_update__open();
+	if (!ASSERT_OK_PTR(skel, "open"))
+		return;
+
+	err = tcp_ca_update__load(skel);
+	if (!ASSERT_OK(err, "load")) {
+		tcp_ca_update__destroy(skel);
+		return;
+	}
+
+	link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+	ASSERT_OK_PTR(link, "attach_struct_ops");
+
+	do_test("tcp_ca_update", NULL);
+	saved_ca1_cnt = skel->bss->ca1_cnt;
+	ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
+
+	err = bpf_link__update_map(link, skel->maps.ca_update_2);
+	ASSERT_OK(err, "update_struct_ops");
+
+	do_test("tcp_ca_update", NULL);
+	ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
+	ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt");
+
+	bpf_link__destroy(link);
+	tcp_ca_update__destroy(skel);
+}
+
 void test_bpf_tcp_ca(void)
 {
 	if (test__start_subtest("dctcp"))
@@ -399,4 +435,6 @@  void test_bpf_tcp_ca(void)
 		test_incompl_cong_ops();
 	if (test__start_subtest("unsupp_cong_op"))
 		test_unsupp_cong_op();
+	if (test__start_subtest("update_ca"))
+		test_update_ca();
 }
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
new file mode 100644
index 000000000000..36a04be95df5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
@@ -0,0 +1,62 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int ca1_cnt = 0;
+int ca2_cnt = 0;
+
+#define USEC_PER_SEC 1000000UL
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+	return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/ca_update_1_cong_control")
+void BPF_PROG(ca_update_1_cong_control, struct sock *sk,
+	      const struct rate_sample *rs)
+{
+	ca1_cnt++;
+}
+
+SEC("struct_ops/ca_update_2_cong_control")
+void BPF_PROG(ca_update_2_cong_control, struct sock *sk,
+	      const struct rate_sample *rs)
+{
+	ca2_cnt++;
+}
+
+SEC("struct_ops/ca_update_ssthresh")
+__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
+{
+	return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/ca_update_undo_cwnd")
+__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
+{
+	return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_update_1 = {
+	.cong_control = (void *)ca_update_1_cong_control,
+	.ssthresh = (void *)ca_update_ssthresh,
+	.undo_cwnd = (void *)ca_update_undo_cwnd,
+	.name = "tcp_ca_update",
+};
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_update_2 = {
+	.cong_control = (void *)ca_update_2_cong_control,
+	.ssthresh = (void *)ca_update_ssthresh,
+	.undo_cwnd = (void *)ca_update_undo_cwnd,
+	.name = "tcp_ca_update",
+};