Message ID | 20220806074019.2756957-8-houtao@huaweicloud.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | fixes for bpf map iterator | expand |
On 8/6/22 12:40 AM, Hou Tao wrote: > From: Hou Tao <houtao1@huawei.com> > > After closing both related link fd and map fd, reading the map > iterator fd to ensure it is OK to do so. > > Signed-off-by: Hou Tao <houtao1@huawei.com> > --- > .../selftests/bpf/prog_tests/bpf_iter.c | 90 +++++++++++++++++++ > 1 file changed, 90 insertions(+) > > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c > index a33874b081b6..94c2c8df3fe4 100644 > --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c > @@ -28,6 +28,7 @@ > #include "bpf_iter_test_kern6.skel.h" > #include "bpf_iter_bpf_link.skel.h" > #include "bpf_iter_ksym.skel.h" > +#include "bpf_iter_sockmap.skel.h" > > static int duration; > > @@ -67,6 +68,48 @@ static void do_dummy_read(struct bpf_program *prog) > bpf_link__destroy(link); > } > > +static void do_read_map_iter_fd(struct bpf_object_skeleton **skel, struct bpf_program *prog, > + struct bpf_map *map) > +{ > + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); > + union bpf_iter_link_info linfo; > + struct bpf_link *link; > + char buf[16] = {}; > + int iter_fd, len; > + > + memset(&linfo, 0, sizeof(linfo)); > + linfo.map.map_fd = bpf_map__fd(map); > + opts.link_info = &linfo; > + opts.link_info_len = sizeof(linfo); > + link = bpf_program__attach_iter(prog, &opts); > + if (!ASSERT_OK_PTR(link, "attach_map_iter")) > + return; > + > + iter_fd = bpf_iter_create(bpf_link__fd(link)); > + if (!ASSERT_GE(iter_fd, 0, "create_map_iter")) { > + bpf_link__destroy(link); > + return; > + } > + > + /* Close link and map fd prematurely */ > + bpf_link__destroy(link); > + bpf_object__destroy_skeleton(*skel); > + *skel = NULL; > + > + /* Let kworker to run first */ Which kworker? > + usleep(100); > + /* Sock map is freed after two synchronize_rcu() calls, so wait */ > + kern_sync_rcu(); > + kern_sync_rcu(); In btf_map_in_map.c, the comment mentions two kern_sync_rcu() is needed for 5.8 and earlier kernel. Other cases in prog_tests/ directory only has one kern_sync_rcu(). Why we need two kern_sync_rcu() for the current kernel? > + > + /* Read after both map fd and link fd are closed */ > + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) > + ; > + ASSERT_GE(len, 0, "read_iterator"); > + > + close(iter_fd); > +} > + > static int read_fd_into_buffer(int fd, char *buf, int size) > { > int bufleft = size; > @@ -827,6 +870,20 @@ static void test_bpf_array_map(void) > bpf_iter_bpf_array_map__destroy(skel); > } > > +static void test_bpf_array_map_iter_fd(void) > +{ > + struct bpf_iter_bpf_array_map *skel; > + > + skel = bpf_iter_bpf_array_map__open_and_load(); > + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load")) > + return; > + > + do_read_map_iter_fd(&skel->skeleton, skel->progs.dump_bpf_array_map, > + skel->maps.arraymap1); > + > + bpf_iter_bpf_array_map__destroy(skel); > +} > + [...]
Hi, On 8/8/2022 11:15 PM, Yonghong Song wrote: > > > On 8/6/22 12:40 AM, Hou Tao wrote: >> From: Hou Tao <houtao1@huawei.com> >> >> After closing both related link fd and map fd, reading the map >> iterator fd to ensure it is OK to do so. >> >> Signed-off-by: Hou Tao <houtao1@huawei.com> >> --- >> .../selftests/bpf/prog_tests/bpf_iter.c | 90 +++++++++++++++++++ >> 1 file changed, 90 insertions(+) SNIP >> + /* Close link and map fd prematurely */ >> + bpf_link__destroy(link); >> + bpf_object__destroy_skeleton(*skel); >> + *skel = NULL; >> + >> + /* Let kworker to run first */ > > Which kworker? Now bpf map is freed through bpf_map_free_deferred() and it is running in the kworker context. Will be more specific in v2. > >> + usleep(100); >> + /* Sock map is freed after two synchronize_rcu() calls, so wait */ >> + kern_sync_rcu(); >> + kern_sync_rcu(); > > In btf_map_in_map.c, the comment mentions two kern_sync_rcu() > is needed for 5.8 and earlier kernel. Other cases in prog_tests/ > directory only has one kern_sync_rcu(). Why we need two > kern_sync_rcu() for the current kernel? As tried to explain in the comment, for both sock map and sock storage map, the used memory is freed two synchronize_rcu(), so if there are not two kern_sync_rcu() in the test prog, reading the iterator fd will not be able to trigger the Use-After-Free problem and it will end normally. > >> + >> + /* Read after both map fd and link fd are closed */ >> + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) >> + ; >> + ASSERT_GE(len, 0, "read_iterator"); >> + >> + close(iter_fd); >> +} >> + >> static int read_fd_into_buffer(int fd, char *buf, int size) >> { >> int bufleft = size; >> @@ -827,6 +870,20 @@ static void test_bpf_array_map(void) >> bpf_iter_bpf_array_map__destroy(skel); >> } >> +static void test_bpf_array_map_iter_fd(void) >> +{ >> + struct bpf_iter_bpf_array_map *skel; >> + >> + skel = bpf_iter_bpf_array_map__open_and_load(); >> + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load")) >> + return; >> + >> + do_read_map_iter_fd(&skel->skeleton, skel->progs.dump_bpf_array_map, >> + skel->maps.arraymap1); >> + >> + bpf_iter_bpf_array_map__destroy(skel); >> +} >> + > [...]
On Tue, Aug 09, 2022 at 09:23:39AM +0800, houtao wrote: > >> + /* Sock map is freed after two synchronize_rcu() calls, so wait */ > >> + kern_sync_rcu(); > >> + kern_sync_rcu(); > > > > In btf_map_in_map.c, the comment mentions two kern_sync_rcu() > > is needed for 5.8 and earlier kernel. Other cases in prog_tests/ > > directory only has one kern_sync_rcu(). Why we need two > > kern_sync_rcu() for the current kernel? > As tried to explain in the comment, for both sock map and sock storage map, the > used memory is freed two synchronize_rcu(), so if there are not two > kern_sync_rcu() in the test prog, reading the iterator fd will not be able to > trigger the Use-After-Free problem and it will end normally. For sk storage map, the map can also be used by the kernel sk_clone_lock() code path. The deferred prog and map free is not going to help since it only ensures no bpf prog is still using it but cannot ensure no kernel rcu reader is using it. There is more details comment in bpf_local_storage_map_free() to explain for both synchronize_rcu()s.
On 8/9/22 12:13 PM, Martin KaFai Lau wrote: > On Tue, Aug 09, 2022 at 09:23:39AM +0800, houtao wrote: >>>> + /* Sock map is freed after two synchronize_rcu() calls, so wait */ >>>> + kern_sync_rcu(); >>>> + kern_sync_rcu(); >>> >>> In btf_map_in_map.c, the comment mentions two kern_sync_rcu() >>> is needed for 5.8 and earlier kernel. Other cases in prog_tests/ >>> directory only has one kern_sync_rcu(). Why we need two >>> kern_sync_rcu() for the current kernel? >> As tried to explain in the comment, for both sock map and sock storage map, the >> used memory is freed two synchronize_rcu(), so if there are not two >> kern_sync_rcu() in the test prog, reading the iterator fd will not be able to >> trigger the Use-After-Free problem and it will end normally. > For sk storage map, the map can also be used by the > kernel sk_clone_lock() code path. The deferred prog and map > free is not going to help since it only ensures no bpf prog is > still using it but cannot ensure no kernel rcu reader is using it. > There is more details comment in bpf_local_storage_map_free() to > explain for both synchronize_rcu()s. Thanks for explanation!
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index a33874b081b6..94c2c8df3fe4 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -28,6 +28,7 @@ #include "bpf_iter_test_kern6.skel.h" #include "bpf_iter_bpf_link.skel.h" #include "bpf_iter_ksym.skel.h" +#include "bpf_iter_sockmap.skel.h" static int duration; @@ -67,6 +68,48 @@ static void do_dummy_read(struct bpf_program *prog) bpf_link__destroy(link); } +static void do_read_map_iter_fd(struct bpf_object_skeleton **skel, struct bpf_program *prog, + struct bpf_map *map) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + struct bpf_link *link; + char buf[16] = {}; + int iter_fd, len; + + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(map); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + link = bpf_program__attach_iter(prog, &opts); + if (!ASSERT_OK_PTR(link, "attach_map_iter")) + return; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "create_map_iter")) { + bpf_link__destroy(link); + return; + } + + /* Close link and map fd prematurely */ + bpf_link__destroy(link); + bpf_object__destroy_skeleton(*skel); + *skel = NULL; + + /* Let kworker to run first */ + usleep(100); + /* Sock map is freed after two synchronize_rcu() calls, so wait */ + kern_sync_rcu(); + kern_sync_rcu(); + + /* Read after both map fd and link fd are closed */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + ASSERT_GE(len, 0, "read_iterator"); + + close(iter_fd); +} + static int read_fd_into_buffer(int fd, char *buf, int size) { int bufleft = size; @@ -827,6 +870,20 @@ static void test_bpf_array_map(void) bpf_iter_bpf_array_map__destroy(skel); } +static void test_bpf_array_map_iter_fd(void) +{ + struct bpf_iter_bpf_array_map *skel; + + skel = bpf_iter_bpf_array_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load")) + return; + + do_read_map_iter_fd(&skel->skeleton, skel->progs.dump_bpf_array_map, + skel->maps.arraymap1); + + bpf_iter_bpf_array_map__destroy(skel); +} + static void test_bpf_percpu_array_map(void) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); @@ -1009,6 +1066,20 @@ static void test_bpf_sk_storage_get(void) bpf_iter_bpf_sk_storage_helpers__destroy(skel); } +static void test_bpf_sk_stoarge_map_iter_fd(void) +{ + struct bpf_iter_bpf_sk_storage_map *skel; + + skel = bpf_iter_bpf_sk_storage_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_map__open_and_load")) + return; + + do_read_map_iter_fd(&skel->skeleton, skel->progs.dump_bpf_sk_storage_map, + skel->maps.sk_stg_map); + + bpf_iter_bpf_sk_storage_map__destroy(skel); +} + static void test_bpf_sk_storage_map(void) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); @@ -1217,6 +1288,19 @@ static void test_task_vma(void) bpf_iter_task_vma__destroy(skel); } +void test_bpf_sockmap_map_iter_fd(void) +{ + struct bpf_iter_sockmap *skel; + + skel = bpf_iter_sockmap__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_sockmap__open_and_load")) + return; + + do_read_map_iter_fd(&skel->skeleton, skel->progs.copy, skel->maps.sockmap); + + bpf_iter_sockmap__destroy(skel); +} + void test_bpf_iter(void) { if (test__start_subtest("btf_id_or_null")) @@ -1267,10 +1351,14 @@ void test_bpf_iter(void) test_bpf_percpu_hash_map(); if (test__start_subtest("bpf_array_map")) test_bpf_array_map(); + if (test__start_subtest("bpf_array_map_iter_fd")) + test_bpf_array_map_iter_fd(); if (test__start_subtest("bpf_percpu_array_map")) test_bpf_percpu_array_map(); if (test__start_subtest("bpf_sk_storage_map")) test_bpf_sk_storage_map(); + if (test__start_subtest("bpf_sk_storage_map_iter_fd")) + test_bpf_sk_stoarge_map_iter_fd(); if (test__start_subtest("bpf_sk_storage_delete")) test_bpf_sk_storage_delete(); if (test__start_subtest("bpf_sk_storage_get")) @@ -1283,4 +1371,6 @@ void test_bpf_iter(void) test_link_iter(); if (test__start_subtest("ksym")) test_ksym_iter(); + if (test__start_subtest("bpf_sockmap_map_iter_fd")) + test_bpf_sockmap_map_iter_fd(); }