Message ID | 20230307233307.3626875-10-kuifeng@meta.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | Transit between BPF TCP congestion controls. | expand |
Context | Check | Description |
---|---|---|
bpf/vmtest-bpf-next-PR | fail | merge-conflict |
netdev/tree_selection | success | Clearly marked for bpf-next, async |
netdev/apply | fail | Patch does not apply to bpf-next |
On Tue, Mar 7, 2023 at 3:34 PM Kui-Feng Lee <kuifeng@meta.com> wrote: > > Create a pair of sockets that utilize the congestion control algorithm > under a particular name. Then switch up this congestion control > algorithm to another implementation and check whether newly created > connections using the same cc name now run the new implementation. > > Signed-off-by: Kui-Feng Lee <kuifeng@meta.com> > --- > .../selftests/bpf/prog_tests/bpf_tcp_ca.c | 38 ++++++++++++ > .../selftests/bpf/progs/tcp_ca_update.c | 62 +++++++++++++++++++ > 2 files changed, 100 insertions(+) > create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c > > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c > index e980188d4124..caaa9175ee36 100644 > --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c > @@ -8,6 +8,7 @@ > #include "bpf_dctcp.skel.h" > #include "bpf_cubic.skel.h" > #include "bpf_tcp_nogpl.skel.h" > +#include "tcp_ca_update.skel.h" > #include "bpf_dctcp_release.skel.h" > #include "tcp_ca_write_sk_pacing.skel.h" > #include "tcp_ca_incompl_cong_ops.skel.h" > @@ -381,6 +382,41 @@ static void test_unsupp_cong_op(void) > libbpf_set_print(old_print_fn); > } > > +static void test_update_ca(void) > +{ > + struct tcp_ca_update *skel; > + struct bpf_link *link; > + int saved_ca1_cnt; > + int err; > + > + skel = tcp_ca_update__open(); > + if (!ASSERT_OK_PTR(skel, "open")) > + return; > + > + err = tcp_ca_update__load(skel); tcp_ca_update__open_and_load() > + if (!ASSERT_OK(err, "load")) { > + tcp_ca_update__destroy(skel); > + return; > + } > + > + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); I think it's time to generate link holder for each struct_ops map to the BPF skeleton, and support auto-attach of struct_ops skeleton. Please do that as a follow up, once this patch set lands. > + ASSERT_OK_PTR(link, "attach_struct_ops"); > + > + do_test("tcp_ca_update", NULL); > + saved_ca1_cnt = skel->bss->ca1_cnt; > + ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); > + > + err = bpf_link__update_map(link, skel->maps.ca_update_2); > + ASSERT_OK(err, "update_struct_ops"); > + > + do_test("tcp_ca_update", NULL); > + ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); > + ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); how do we know that struct_ops programs were triggered? what guarantees that? if nothing, we are just adding another flaky networking test > + > + bpf_link__destroy(link); > + tcp_ca_update__destroy(skel); > +} > + > void test_bpf_tcp_ca(void) > { > if (test__start_subtest("dctcp")) > @@ -399,4 +435,6 @@ void test_bpf_tcp_ca(void) > test_incompl_cong_ops(); > if (test__start_subtest("unsupp_cong_op")) > test_unsupp_cong_op(); > + if (test__start_subtest("update_ca")) > + test_update_ca(); > } > diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c > new file mode 100644 > index 000000000000..36a04be95df5 > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c > @@ -0,0 +1,62 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +#include "vmlinux.h" > + > +#include <bpf/bpf_helpers.h> > +#include <bpf/bpf_tracing.h> > + > +char _license[] SEC("license") = "GPL"; > + > +int ca1_cnt = 0; > +int ca2_cnt = 0; > + > +#define USEC_PER_SEC 1000000UL > + > +#define min(a, b) ((a) < (b) ? (a) : (b)) > + > +static inline struct tcp_sock *tcp_sk(const struct sock *sk) > +{ > + return (struct tcp_sock *)sk; > +} > + > +SEC("struct_ops/ca_update_1_cong_control") > +void BPF_PROG(ca_update_1_cong_control, struct sock *sk, > + const struct rate_sample *rs) > +{ > + ca1_cnt++; > +} > + > +SEC("struct_ops/ca_update_2_cong_control") > +void BPF_PROG(ca_update_2_cong_control, struct sock *sk, > + const struct rate_sample *rs) > +{ > + ca2_cnt++; > +} > + > +SEC("struct_ops/ca_update_ssthresh") > +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk) > +{ > + return tcp_sk(sk)->snd_ssthresh; > +} > + > +SEC("struct_ops/ca_update_undo_cwnd") > +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk) > +{ > + return tcp_sk(sk)->snd_cwnd; > +} > + > +SEC(".struct_ops.link") > +struct tcp_congestion_ops ca_update_1 = { > + .cong_control = (void *)ca_update_1_cong_control, > + .ssthresh = (void *)ca_update_ssthresh, > + .undo_cwnd = (void *)ca_update_undo_cwnd, > + .name = "tcp_ca_update", > +}; > + > +SEC(".struct_ops.link") > +struct tcp_congestion_ops ca_update_2 = { > + .cong_control = (void *)ca_update_2_cong_control, > + .ssthresh = (void *)ca_update_ssthresh, > + .undo_cwnd = (void *)ca_update_undo_cwnd, > + .name = "tcp_ca_update", > +}; please add a test where you combine both .struct_ops and .struct_ops.link, it's an obvious potentially problematic combination as I mentioned in previous patches, let's also have a negative test where bpf_link__update_map() fails > -- > 2.34.1 >
On 3/7/23 17:10, Andrii Nakryiko wrote: > On Tue, Mar 7, 2023 at 3:34 PM Kui-Feng Lee <kuifeng@meta.com> wrote: >> >> Create a pair of sockets that utilize the congestion control algorithm >> under a particular name. Then switch up this congestion control >> algorithm to another implementation and check whether newly created >> connections using the same cc name now run the new implementation. >> >> Signed-off-by: Kui-Feng Lee <kuifeng@meta.com> >> --- >> .../selftests/bpf/prog_tests/bpf_tcp_ca.c | 38 ++++++++++++ >> .../selftests/bpf/progs/tcp_ca_update.c | 62 +++++++++++++++++++ >> 2 files changed, 100 insertions(+) >> create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c >> >> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c >> index e980188d4124..caaa9175ee36 100644 >> --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c >> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c >> @@ -8,6 +8,7 @@ >> #include "bpf_dctcp.skel.h" >> #include "bpf_cubic.skel.h" >> #include "bpf_tcp_nogpl.skel.h" >> +#include "tcp_ca_update.skel.h" >> #include "bpf_dctcp_release.skel.h" >> #include "tcp_ca_write_sk_pacing.skel.h" >> #include "tcp_ca_incompl_cong_ops.skel.h" >> @@ -381,6 +382,41 @@ static void test_unsupp_cong_op(void) >> libbpf_set_print(old_print_fn); >> } >> >> +static void test_update_ca(void) >> +{ >> + struct tcp_ca_update *skel; >> + struct bpf_link *link; >> + int saved_ca1_cnt; >> + int err; >> + >> + skel = tcp_ca_update__open(); >> + if (!ASSERT_OK_PTR(skel, "open")) >> + return; >> + >> + err = tcp_ca_update__load(skel); > > tcp_ca_update__open_and_load() > >> + if (!ASSERT_OK(err, "load")) { >> + tcp_ca_update__destroy(skel); >> + return; >> + } >> + >> + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); > > I think it's time to generate link holder for each struct_ops map to > the BPF skeleton, and support auto-attach of struct_ops skeleton. > Please do that as a follow up, once this patch set lands. Got it. > >> + ASSERT_OK_PTR(link, "attach_struct_ops"); >> + >> + do_test("tcp_ca_update", NULL); >> + saved_ca1_cnt = skel->bss->ca1_cnt; >> + ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); >> + >> + err = bpf_link__update_map(link, skel->maps.ca_update_2); >> + ASSERT_OK(err, "update_struct_ops"); >> + >> + do_test("tcp_ca_update", NULL); >> + ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); >> + ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); > > how do we know that struct_ops programs were triggered? what > guarantees that? if nothing, we are just adding another flaky > networking test When an ack is received, cong_control of ca_update_1 and ca_update_2 will be called if they are activated. By checking ca1_cnt & ca2_cnt, we know which one is activated. Here, we check if the ca1_cnt keeps the same and ca2_cnt increase to make that ca_update_2 have replaced ca_update_1. > >> + >> + bpf_link__destroy(link); >> + tcp_ca_update__destroy(skel); >> +} >> + >> void test_bpf_tcp_ca(void) >> { >> if (test__start_subtest("dctcp")) >> @@ -399,4 +435,6 @@ void test_bpf_tcp_ca(void) >> test_incompl_cong_ops(); >> if (test__start_subtest("unsupp_cong_op")) >> test_unsupp_cong_op(); >> + if (test__start_subtest("update_ca")) >> + test_update_ca(); >> } >> diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c >> new file mode 100644 >> index 000000000000..36a04be95df5 >> --- /dev/null >> +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c >> @@ -0,0 +1,62 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> + >> +#include "vmlinux.h" >> + >> +#include <bpf/bpf_helpers.h> >> +#include <bpf/bpf_tracing.h> >> + >> +char _license[] SEC("license") = "GPL"; >> + >> +int ca1_cnt = 0; >> +int ca2_cnt = 0; >> + >> +#define USEC_PER_SEC 1000000UL >> + >> +#define min(a, b) ((a) < (b) ? (a) : (b)) >> + >> +static inline struct tcp_sock *tcp_sk(const struct sock *sk) >> +{ >> + return (struct tcp_sock *)sk; >> +} >> + >> +SEC("struct_ops/ca_update_1_cong_control") >> +void BPF_PROG(ca_update_1_cong_control, struct sock *sk, >> + const struct rate_sample *rs) >> +{ >> + ca1_cnt++; >> +} >> + >> +SEC("struct_ops/ca_update_2_cong_control") >> +void BPF_PROG(ca_update_2_cong_control, struct sock *sk, >> + const struct rate_sample *rs) >> +{ >> + ca2_cnt++; >> +} >> + >> +SEC("struct_ops/ca_update_ssthresh") >> +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk) >> +{ >> + return tcp_sk(sk)->snd_ssthresh; >> +} >> + >> +SEC("struct_ops/ca_update_undo_cwnd") >> +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk) >> +{ >> + return tcp_sk(sk)->snd_cwnd; >> +} >> + >> +SEC(".struct_ops.link") >> +struct tcp_congestion_ops ca_update_1 = { >> + .cong_control = (void *)ca_update_1_cong_control, >> + .ssthresh = (void *)ca_update_ssthresh, >> + .undo_cwnd = (void *)ca_update_undo_cwnd, >> + .name = "tcp_ca_update", >> +}; >> + >> +SEC(".struct_ops.link") >> +struct tcp_congestion_ops ca_update_2 = { >> + .cong_control = (void *)ca_update_2_cong_control, >> + .ssthresh = (void *)ca_update_ssthresh, >> + .undo_cwnd = (void *)ca_update_undo_cwnd, >> + .name = "tcp_ca_update", >> +}; > > please add a test where you combine both .struct_ops and > .struct_ops.link, it's an obvious potentially problematic combination > > as I mentioned in previous patches, let's also have a negative test > where bpf_link__update_map() fails Sure > >> -- >> 2.34.1 >>
On Wed, Mar 8, 2023 at 7:58 AM Kui-Feng Lee <sinquersw@gmail.com> wrote: > > > > On 3/7/23 17:10, Andrii Nakryiko wrote: > > On Tue, Mar 7, 2023 at 3:34 PM Kui-Feng Lee <kuifeng@meta.com> wrote: > >> > >> Create a pair of sockets that utilize the congestion control algorithm > >> under a particular name. Then switch up this congestion control > >> algorithm to another implementation and check whether newly created > >> connections using the same cc name now run the new implementation. > >> > >> Signed-off-by: Kui-Feng Lee <kuifeng@meta.com> > >> --- > >> .../selftests/bpf/prog_tests/bpf_tcp_ca.c | 38 ++++++++++++ > >> .../selftests/bpf/progs/tcp_ca_update.c | 62 +++++++++++++++++++ > >> 2 files changed, 100 insertions(+) > >> create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c > >> > >> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c > >> index e980188d4124..caaa9175ee36 100644 > >> --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c > >> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c > >> @@ -8,6 +8,7 @@ > >> #include "bpf_dctcp.skel.h" > >> #include "bpf_cubic.skel.h" > >> #include "bpf_tcp_nogpl.skel.h" > >> +#include "tcp_ca_update.skel.h" > >> #include "bpf_dctcp_release.skel.h" > >> #include "tcp_ca_write_sk_pacing.skel.h" > >> #include "tcp_ca_incompl_cong_ops.skel.h" > >> @@ -381,6 +382,41 @@ static void test_unsupp_cong_op(void) > >> libbpf_set_print(old_print_fn); > >> } > >> > >> +static void test_update_ca(void) > >> +{ > >> + struct tcp_ca_update *skel; > >> + struct bpf_link *link; > >> + int saved_ca1_cnt; > >> + int err; > >> + > >> + skel = tcp_ca_update__open(); > >> + if (!ASSERT_OK_PTR(skel, "open")) > >> + return; > >> + > >> + err = tcp_ca_update__load(skel); > > > > tcp_ca_update__open_and_load() > > > >> + if (!ASSERT_OK(err, "load")) { > >> + tcp_ca_update__destroy(skel); > >> + return; > >> + } > >> + > >> + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); > > > > I think it's time to generate link holder for each struct_ops map to > > the BPF skeleton, and support auto-attach of struct_ops skeleton. > > Please do that as a follow up, once this patch set lands. > > Got it. > > > > >> + ASSERT_OK_PTR(link, "attach_struct_ops"); > >> + > >> + do_test("tcp_ca_update", NULL); > >> + saved_ca1_cnt = skel->bss->ca1_cnt; > >> + ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); > >> + > >> + err = bpf_link__update_map(link, skel->maps.ca_update_2); > >> + ASSERT_OK(err, "update_struct_ops"); > >> + > >> + do_test("tcp_ca_update", NULL); > >> + ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); > >> + ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); > > > > how do we know that struct_ops programs were triggered? what > > guarantees that? if nothing, we are just adding another flaky > > networking test > > When an ack is received, cong_control of ca_update_1 and ca_update_2 > will be called if they are activated. By checking ca1_cnt & ca2_cnt, we > know which one is activated. Here, we check if the ca1_cnt keeps the > same and ca2_cnt increase to make that ca_update_2 have replaced > ca_update_1. I just don't see anything in the test ensuring that ack is sent/received, so it seems like we are relying on some background system activity and proper timing (unless I miss something, which is why I'm asking), so this is fragile, as in CI environment timings and background activity would be very different and unpredictable, causing flakiness of the test > > > > >> + > >> + bpf_link__destroy(link); > >> + tcp_ca_update__destroy(skel); > >> +} > >> + > >> void test_bpf_tcp_ca(void) > >> { > >> if (test__start_subtest("dctcp")) > >> @@ -399,4 +435,6 @@ void test_bpf_tcp_ca(void) > >> test_incompl_cong_ops(); > >> if (test__start_subtest("unsupp_cong_op")) > >> test_unsupp_cong_op(); > >> + if (test__start_subtest("update_ca")) > >> + test_update_ca(); > >> } > >> diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c > >> new file mode 100644 > >> index 000000000000..36a04be95df5 > >> --- /dev/null > >> +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c > >> @@ -0,0 +1,62 @@ > >> +// SPDX-License-Identifier: GPL-2.0 > >> + > >> +#include "vmlinux.h" > >> + > >> +#include <bpf/bpf_helpers.h> > >> +#include <bpf/bpf_tracing.h> > >> + > >> +char _license[] SEC("license") = "GPL"; > >> + > >> +int ca1_cnt = 0; > >> +int ca2_cnt = 0; > >> + > >> +#define USEC_PER_SEC 1000000UL > >> + > >> +#define min(a, b) ((a) < (b) ? (a) : (b)) > >> + > >> +static inline struct tcp_sock *tcp_sk(const struct sock *sk) > >> +{ > >> + return (struct tcp_sock *)sk; > >> +} > >> + > >> +SEC("struct_ops/ca_update_1_cong_control") > >> +void BPF_PROG(ca_update_1_cong_control, struct sock *sk, > >> + const struct rate_sample *rs) > >> +{ > >> + ca1_cnt++; > >> +} > >> + > >> +SEC("struct_ops/ca_update_2_cong_control") > >> +void BPF_PROG(ca_update_2_cong_control, struct sock *sk, > >> + const struct rate_sample *rs) > >> +{ > >> + ca2_cnt++; > >> +} > >> + > >> +SEC("struct_ops/ca_update_ssthresh") > >> +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk) > >> +{ > >> + return tcp_sk(sk)->snd_ssthresh; > >> +} > >> + > >> +SEC("struct_ops/ca_update_undo_cwnd") > >> +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk) > >> +{ > >> + return tcp_sk(sk)->snd_cwnd; > >> +} > >> + > >> +SEC(".struct_ops.link") > >> +struct tcp_congestion_ops ca_update_1 = { > >> + .cong_control = (void *)ca_update_1_cong_control, > >> + .ssthresh = (void *)ca_update_ssthresh, > >> + .undo_cwnd = (void *)ca_update_undo_cwnd, > >> + .name = "tcp_ca_update", > >> +}; > >> + > >> +SEC(".struct_ops.link") > >> +struct tcp_congestion_ops ca_update_2 = { > >> + .cong_control = (void *)ca_update_2_cong_control, > >> + .ssthresh = (void *)ca_update_ssthresh, > >> + .undo_cwnd = (void *)ca_update_undo_cwnd, > >> + .name = "tcp_ca_update", > >> +}; > > > > please add a test where you combine both .struct_ops and > > .struct_ops.link, it's an obvious potentially problematic combination > > > > as I mentioned in previous patches, let's also have a negative test > > where bpf_link__update_map() fails > > Sure > > > > >> -- > >> 2.34.1 > >>
On 3/8/23 09:18, Andrii Nakryiko wrote: > On Wed, Mar 8, 2023 at 7:58 AM Kui-Feng Lee <sinquersw@gmail.com> wrote: >> >> >> >> On 3/7/23 17:10, Andrii Nakryiko wrote: >>> On Tue, Mar 7, 2023 at 3:34 PM Kui-Feng Lee <kuifeng@meta.com> wrote: >>>> >>>> Create a pair of sockets that utilize the congestion control algorithm >>>> under a particular name. Then switch up this congestion control >>>> algorithm to another implementation and check whether newly created >>>> connections using the same cc name now run the new implementation. >>>> >>>> Signed-off-by: Kui-Feng Lee <kuifeng@meta.com> >>>> --- >>>> .../selftests/bpf/prog_tests/bpf_tcp_ca.c | 38 ++++++++++++ >>>> .../selftests/bpf/progs/tcp_ca_update.c | 62 +++++++++++++++++++ >>>> 2 files changed, 100 insertions(+) >>>> create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c >>>> >>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c >>>> index e980188d4124..caaa9175ee36 100644 >>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c >>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c >>>> @@ -8,6 +8,7 @@ >>>> #include "bpf_dctcp.skel.h" >>>> #include "bpf_cubic.skel.h" >>>> #include "bpf_tcp_nogpl.skel.h" >>>> +#include "tcp_ca_update.skel.h" >>>> #include "bpf_dctcp_release.skel.h" >>>> #include "tcp_ca_write_sk_pacing.skel.h" >>>> #include "tcp_ca_incompl_cong_ops.skel.h" >>>> @@ -381,6 +382,41 @@ static void test_unsupp_cong_op(void) >>>> libbpf_set_print(old_print_fn); >>>> } >>>> >>>> +static void test_update_ca(void) >>>> +{ >>>> + struct tcp_ca_update *skel; >>>> + struct bpf_link *link; >>>> + int saved_ca1_cnt; >>>> + int err; >>>> + >>>> + skel = tcp_ca_update__open(); >>>> + if (!ASSERT_OK_PTR(skel, "open")) >>>> + return; >>>> + >>>> + err = tcp_ca_update__load(skel); >>> >>> tcp_ca_update__open_and_load() >>> >>>> + if (!ASSERT_OK(err, "load")) { >>>> + tcp_ca_update__destroy(skel); >>>> + return; >>>> + } >>>> + >>>> + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); >>> >>> I think it's time to generate link holder for each struct_ops map to >>> the BPF skeleton, and support auto-attach of struct_ops skeleton. >>> Please do that as a follow up, once this patch set lands. >> >> Got it. >> >>> >>>> + ASSERT_OK_PTR(link, "attach_struct_ops"); >>>> + >>>> + do_test("tcp_ca_update", NULL); >>>> + saved_ca1_cnt = skel->bss->ca1_cnt; >>>> + ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); >>>> + >>>> + err = bpf_link__update_map(link, skel->maps.ca_update_2); >>>> + ASSERT_OK(err, "update_struct_ops"); >>>> + >>>> + do_test("tcp_ca_update", NULL); >>>> + ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); >>>> + ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); >>> >>> how do we know that struct_ops programs were triggered? what >>> guarantees that? if nothing, we are just adding another flaky >>> networking test >> >> When an ack is received, cong_control of ca_update_1 and ca_update_2 >> will be called if they are activated. By checking ca1_cnt & ca2_cnt, we >> know which one is activated. Here, we check if the ca1_cnt keeps the >> same and ca2_cnt increase to make that ca_update_2 have replaced >> ca_update_1. > > I just don't see anything in the test ensuring that ack is > sent/received, so it seems like we are relying on some background > system activity and proper timing (unless I miss something, which is > why I'm asking), so this is fragile, as in CI environment timings and > background activity would be very different and unpredictable, causing > flakiness of the test The do_test() function creates two sockets to form a direct connection that must receive at least one acknowledgment packet for the sockets to progress into an ESTABLISHED state. If they don't, that means it fails to establish a connection. > >> >>> >>>> + >>>> + bpf_link__destroy(link); >>>> + tcp_ca_update__destroy(skel); >>>> +} >>>> + >>>> void test_bpf_tcp_ca(void) >>>> { >>>> if (test__start_subtest("dctcp")) >>>> @@ -399,4 +435,6 @@ void test_bpf_tcp_ca(void) >>>> test_incompl_cong_ops(); >>>> if (test__start_subtest("unsupp_cong_op")) >>>> test_unsupp_cong_op(); >>>> + if (test__start_subtest("update_ca")) >>>> + test_update_ca(); >>>> } >>>> diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c >>>> new file mode 100644 >>>> index 000000000000..36a04be95df5 >>>> --- /dev/null >>>> +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c >>>> @@ -0,0 +1,62 @@ >>>> +// SPDX-License-Identifier: GPL-2.0 >>>> + >>>> +#include "vmlinux.h" >>>> + >>>> +#include <bpf/bpf_helpers.h> >>>> +#include <bpf/bpf_tracing.h> >>>> + >>>> +char _license[] SEC("license") = "GPL"; >>>> + >>>> +int ca1_cnt = 0; >>>> +int ca2_cnt = 0; >>>> + >>>> +#define USEC_PER_SEC 1000000UL >>>> + >>>> +#define min(a, b) ((a) < (b) ? (a) : (b)) >>>> + >>>> +static inline struct tcp_sock *tcp_sk(const struct sock *sk) >>>> +{ >>>> + return (struct tcp_sock *)sk; >>>> +} >>>> + >>>> +SEC("struct_ops/ca_update_1_cong_control") >>>> +void BPF_PROG(ca_update_1_cong_control, struct sock *sk, >>>> + const struct rate_sample *rs) >>>> +{ >>>> + ca1_cnt++; >>>> +} >>>> + >>>> +SEC("struct_ops/ca_update_2_cong_control") >>>> +void BPF_PROG(ca_update_2_cong_control, struct sock *sk, >>>> + const struct rate_sample *rs) >>>> +{ >>>> + ca2_cnt++; >>>> +} >>>> + >>>> +SEC("struct_ops/ca_update_ssthresh") >>>> +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk) >>>> +{ >>>> + return tcp_sk(sk)->snd_ssthresh; >>>> +} >>>> + >>>> +SEC("struct_ops/ca_update_undo_cwnd") >>>> +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk) >>>> +{ >>>> + return tcp_sk(sk)->snd_cwnd; >>>> +} >>>> + >>>> +SEC(".struct_ops.link") >>>> +struct tcp_congestion_ops ca_update_1 = { >>>> + .cong_control = (void *)ca_update_1_cong_control, >>>> + .ssthresh = (void *)ca_update_ssthresh, >>>> + .undo_cwnd = (void *)ca_update_undo_cwnd, >>>> + .name = "tcp_ca_update", >>>> +}; >>>> + >>>> +SEC(".struct_ops.link") >>>> +struct tcp_congestion_ops ca_update_2 = { >>>> + .cong_control = (void *)ca_update_2_cong_control, >>>> + .ssthresh = (void *)ca_update_ssthresh, >>>> + .undo_cwnd = (void *)ca_update_undo_cwnd, >>>> + .name = "tcp_ca_update", >>>> +}; >>> >>> please add a test where you combine both .struct_ops and >>> .struct_ops.link, it's an obvious potentially problematic combination >>> >>> as I mentioned in previous patches, let's also have a negative test >>> where bpf_link__update_map() fails >> >> Sure >> >>> >>>> -- >>>> 2.34.1 >>>>
On Wed, Mar 8, 2023 at 10:10 AM Kui-Feng Lee <sinquersw@gmail.com> wrote: > > > > On 3/8/23 09:18, Andrii Nakryiko wrote: > > On Wed, Mar 8, 2023 at 7:58 AM Kui-Feng Lee <sinquersw@gmail.com> wrote: > >> > >> > >> > >> On 3/7/23 17:10, Andrii Nakryiko wrote: > >>> On Tue, Mar 7, 2023 at 3:34 PM Kui-Feng Lee <kuifeng@meta.com> wrote: > >>>> > >>>> Create a pair of sockets that utilize the congestion control algorithm > >>>> under a particular name. Then switch up this congestion control > >>>> algorithm to another implementation and check whether newly created > >>>> connections using the same cc name now run the new implementation. > >>>> > >>>> Signed-off-by: Kui-Feng Lee <kuifeng@meta.com> > >>>> --- > >>>> .../selftests/bpf/prog_tests/bpf_tcp_ca.c | 38 ++++++++++++ > >>>> .../selftests/bpf/progs/tcp_ca_update.c | 62 +++++++++++++++++++ > >>>> 2 files changed, 100 insertions(+) > >>>> create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c > >>>> > >>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c > >>>> index e980188d4124..caaa9175ee36 100644 > >>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c > >>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c > >>>> @@ -8,6 +8,7 @@ > >>>> #include "bpf_dctcp.skel.h" > >>>> #include "bpf_cubic.skel.h" > >>>> #include "bpf_tcp_nogpl.skel.h" > >>>> +#include "tcp_ca_update.skel.h" > >>>> #include "bpf_dctcp_release.skel.h" > >>>> #include "tcp_ca_write_sk_pacing.skel.h" > >>>> #include "tcp_ca_incompl_cong_ops.skel.h" > >>>> @@ -381,6 +382,41 @@ static void test_unsupp_cong_op(void) > >>>> libbpf_set_print(old_print_fn); > >>>> } > >>>> > >>>> +static void test_update_ca(void) > >>>> +{ > >>>> + struct tcp_ca_update *skel; > >>>> + struct bpf_link *link; > >>>> + int saved_ca1_cnt; > >>>> + int err; > >>>> + > >>>> + skel = tcp_ca_update__open(); > >>>> + if (!ASSERT_OK_PTR(skel, "open")) > >>>> + return; > >>>> + > >>>> + err = tcp_ca_update__load(skel); > >>> > >>> tcp_ca_update__open_and_load() > >>> > >>>> + if (!ASSERT_OK(err, "load")) { > >>>> + tcp_ca_update__destroy(skel); > >>>> + return; > >>>> + } > >>>> + > >>>> + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); > >>> > >>> I think it's time to generate link holder for each struct_ops map to > >>> the BPF skeleton, and support auto-attach of struct_ops skeleton. > >>> Please do that as a follow up, once this patch set lands. > >> > >> Got it. > >> > >>> > >>>> + ASSERT_OK_PTR(link, "attach_struct_ops"); > >>>> + > >>>> + do_test("tcp_ca_update", NULL); > >>>> + saved_ca1_cnt = skel->bss->ca1_cnt; > >>>> + ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); > >>>> + > >>>> + err = bpf_link__update_map(link, skel->maps.ca_update_2); > >>>> + ASSERT_OK(err, "update_struct_ops"); > >>>> + > >>>> + do_test("tcp_ca_update", NULL); > >>>> + ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); > >>>> + ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); > >>> > >>> how do we know that struct_ops programs were triggered? what > >>> guarantees that? if nothing, we are just adding another flaky > >>> networking test > >> > >> When an ack is received, cong_control of ca_update_1 and ca_update_2 > >> will be called if they are activated. By checking ca1_cnt & ca2_cnt, we > >> know which one is activated. Here, we check if the ca1_cnt keeps the > >> same and ca2_cnt increase to make that ca_update_2 have replaced > >> ca_update_1. > > > > I just don't see anything in the test ensuring that ack is > > sent/received, so it seems like we are relying on some background > > system activity and proper timing (unless I miss something, which is > > why I'm asking), so this is fragile, as in CI environment timings and > > background activity would be very different and unpredictable, causing > > flakiness of the test > > > The do_test() function creates two sockets to form a direct connection > that must receive at least one acknowledgment packet for the sockets to > progress into an ESTABLISHED state. If they don't, that means it fails > to establish a connection. yeah, my bad, I *completely* missed `do_test("tcp_ca_update", NULL)` (even though I went specifically looking for something like that). It's all good here. > > > > >> > >>> > >>>> + > >>>> + bpf_link__destroy(link); > >>>> + tcp_ca_update__destroy(skel); > >>>> +} > >>>> + > >>>> void test_bpf_tcp_ca(void) > >>>> { > >>>> if (test__start_subtest("dctcp")) > >>>> @@ -399,4 +435,6 @@ void test_bpf_tcp_ca(void) > >>>> test_incompl_cong_ops(); > >>>> if (test__start_subtest("unsupp_cong_op")) > >>>> test_unsupp_cong_op(); > >>>> + if (test__start_subtest("update_ca")) > >>>> + test_update_ca(); > >>>> } > >>>> diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c > >>>> new file mode 100644 > >>>> index 000000000000..36a04be95df5 > >>>> --- /dev/null > >>>> +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c > >>>> @@ -0,0 +1,62 @@ > >>>> +// SPDX-License-Identifier: GPL-2.0 > >>>> + > >>>> +#include "vmlinux.h" > >>>> + > >>>> +#include <bpf/bpf_helpers.h> > >>>> +#include <bpf/bpf_tracing.h> > >>>> + > >>>> +char _license[] SEC("license") = "GPL"; > >>>> + > >>>> +int ca1_cnt = 0; > >>>> +int ca2_cnt = 0; > >>>> + > >>>> +#define USEC_PER_SEC 1000000UL > >>>> + > >>>> +#define min(a, b) ((a) < (b) ? (a) : (b)) > >>>> + > >>>> +static inline struct tcp_sock *tcp_sk(const struct sock *sk) > >>>> +{ > >>>> + return (struct tcp_sock *)sk; > >>>> +} > >>>> + > >>>> +SEC("struct_ops/ca_update_1_cong_control") > >>>> +void BPF_PROG(ca_update_1_cong_control, struct sock *sk, > >>>> + const struct rate_sample *rs) > >>>> +{ > >>>> + ca1_cnt++; > >>>> +} > >>>> + > >>>> +SEC("struct_ops/ca_update_2_cong_control") > >>>> +void BPF_PROG(ca_update_2_cong_control, struct sock *sk, > >>>> + const struct rate_sample *rs) > >>>> +{ > >>>> + ca2_cnt++; > >>>> +} > >>>> + > >>>> +SEC("struct_ops/ca_update_ssthresh") > >>>> +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk) > >>>> +{ > >>>> + return tcp_sk(sk)->snd_ssthresh; > >>>> +} > >>>> + > >>>> +SEC("struct_ops/ca_update_undo_cwnd") > >>>> +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk) > >>>> +{ > >>>> + return tcp_sk(sk)->snd_cwnd; > >>>> +} > >>>> + > >>>> +SEC(".struct_ops.link") > >>>> +struct tcp_congestion_ops ca_update_1 = { > >>>> + .cong_control = (void *)ca_update_1_cong_control, > >>>> + .ssthresh = (void *)ca_update_ssthresh, > >>>> + .undo_cwnd = (void *)ca_update_undo_cwnd, > >>>> + .name = "tcp_ca_update", > >>>> +}; > >>>> + > >>>> +SEC(".struct_ops.link") > >>>> +struct tcp_congestion_ops ca_update_2 = { > >>>> + .cong_control = (void *)ca_update_2_cong_control, > >>>> + .ssthresh = (void *)ca_update_ssthresh, > >>>> + .undo_cwnd = (void *)ca_update_undo_cwnd, > >>>> + .name = "tcp_ca_update", > >>>> +}; > >>> > >>> please add a test where you combine both .struct_ops and > >>> .struct_ops.link, it's an obvious potentially problematic combination > >>> > >>> as I mentioned in previous patches, let's also have a negative test > >>> where bpf_link__update_map() fails > >> > >> Sure > >> > >>> > >>>> -- > >>>> 2.34.1 > >>>>
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index e980188d4124..caaa9175ee36 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -8,6 +8,7 @@ #include "bpf_dctcp.skel.h" #include "bpf_cubic.skel.h" #include "bpf_tcp_nogpl.skel.h" +#include "tcp_ca_update.skel.h" #include "bpf_dctcp_release.skel.h" #include "tcp_ca_write_sk_pacing.skel.h" #include "tcp_ca_incompl_cong_ops.skel.h" @@ -381,6 +382,41 @@ static void test_unsupp_cong_op(void) libbpf_set_print(old_print_fn); } +static void test_update_ca(void) +{ + struct tcp_ca_update *skel; + struct bpf_link *link; + int saved_ca1_cnt; + int err; + + skel = tcp_ca_update__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + err = tcp_ca_update__load(skel); + if (!ASSERT_OK(err, "load")) { + tcp_ca_update__destroy(skel); + return; + } + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops"); + + do_test("tcp_ca_update", NULL); + saved_ca1_cnt = skel->bss->ca1_cnt; + ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); + + err = bpf_link__update_map(link, skel->maps.ca_update_2); + ASSERT_OK(err, "update_struct_ops"); + + do_test("tcp_ca_update", NULL); + ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); + ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); + + bpf_link__destroy(link); + tcp_ca_update__destroy(skel); +} + void test_bpf_tcp_ca(void) { if (test__start_subtest("dctcp")) @@ -399,4 +435,6 @@ void test_bpf_tcp_ca(void) test_incompl_cong_ops(); if (test__start_subtest("unsupp_cong_op")) test_unsupp_cong_op(); + if (test__start_subtest("update_ca")) + test_update_ca(); } diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c new file mode 100644 index 000000000000..36a04be95df5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +int ca1_cnt = 0; +int ca2_cnt = 0; + +#define USEC_PER_SEC 1000000UL + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +SEC("struct_ops/ca_update_1_cong_control") +void BPF_PROG(ca_update_1_cong_control, struct sock *sk, + const struct rate_sample *rs) +{ + ca1_cnt++; +} + +SEC("struct_ops/ca_update_2_cong_control") +void BPF_PROG(ca_update_2_cong_control, struct sock *sk, + const struct rate_sample *rs) +{ + ca2_cnt++; +} + +SEC("struct_ops/ca_update_ssthresh") +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk) +{ + return tcp_sk(sk)->snd_ssthresh; +} + +SEC("struct_ops/ca_update_undo_cwnd") +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +SEC(".struct_ops.link") +struct tcp_congestion_ops ca_update_1 = { + .cong_control = (void *)ca_update_1_cong_control, + .ssthresh = (void *)ca_update_ssthresh, + .undo_cwnd = (void *)ca_update_undo_cwnd, + .name = "tcp_ca_update", +}; + +SEC(".struct_ops.link") +struct tcp_congestion_ops ca_update_2 = { + .cong_control = (void *)ca_update_2_cong_control, + .ssthresh = (void *)ca_update_ssthresh, + .undo_cwnd = (void *)ca_update_undo_cwnd, + .name = "tcp_ca_update", +};
Create a pair of sockets that utilize the congestion control algorithm under a particular name. Then switch up this congestion control algorithm to another implementation and check whether newly created connections using the same cc name now run the new implementation. Signed-off-by: Kui-Feng Lee <kuifeng@meta.com> --- .../selftests/bpf/prog_tests/bpf_tcp_ca.c | 38 ++++++++++++ .../selftests/bpf/progs/tcp_ca_update.c | 62 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c