Message ID | 20221214103857.69082-2-xiangxia.m.yue@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | [bpf-next,1/2] bpf: hash map, avoid deadlock with suitable hash mask | expand |
On 12/14/22 2:38 AM, xiangxia.m.yue@gmail.com wrote: > From: Tonghao Zhang <xiangxia.m.yue@gmail.com> > > This testing show how to reproduce deadlock in special case. > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com> > Cc: Alexei Starovoitov <ast@kernel.org> > Cc: Daniel Borkmann <daniel@iogearbox.net> > Cc: Andrii Nakryiko <andrii@kernel.org> > Cc: Martin KaFai Lau <martin.lau@linux.dev> > Cc: Song Liu <song@kernel.org> > Cc: Yonghong Song <yhs@fb.com> > Cc: John Fastabend <john.fastabend@gmail.com> > Cc: KP Singh <kpsingh@kernel.org> > Cc: Stanislav Fomichev <sdf@google.com> > Cc: Hao Luo <haoluo@google.com> > Cc: Jiri Olsa <jolsa@kernel.org> > Cc: Hou Tao <houtao1@huawei.com> > --- > .../selftests/bpf/prog_tests/htab_deadlock.c | 74 +++++++++++++++++++ > .../selftests/bpf/progs/htab_deadlock.c | 30 ++++++++ > 2 files changed, 104 insertions(+) > create mode 100644 tools/testing/selftests/bpf/prog_tests/htab_deadlock.c > create mode 100644 tools/testing/selftests/bpf/progs/htab_deadlock.c > > diff --git a/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c > new file mode 100644 > index 000000000000..7dce4c2fe4f5 > --- /dev/null > +++ b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c > @@ -0,0 +1,74 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2022 DiDi Global Inc. */ > +#define _GNU_SOURCE > +#include <pthread.h> > +#include <sched.h> > +#include <test_progs.h> > + > +#include "htab_deadlock.skel.h" > + > +static int perf_event_open(void) > +{ > + struct perf_event_attr attr = {0}; > + int pfd; > + > + /* create perf event */ > + attr.size = sizeof(attr); > + attr.type = PERF_TYPE_HARDWARE; > + attr.config = PERF_COUNT_HW_CPU_CYCLES; > + attr.freq = 1; > + attr.sample_freq = 1000; > + pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); > + > + return pfd >= 0 ? pfd : -errno; > +} > + > +void test_htab_deadlock(void) > +{ > + unsigned int val = 0, key = 20; > + struct bpf_link *link = NULL; > + struct htab_deadlock *skel; > + cpu_set_t cpus; > + int err; > + int pfd; > + int i; No need to have three lines for type 'int' variables. One line is enough to hold all three variables. > + > + skel = htab_deadlock__open_and_load(); > + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) > + return; > + > + err = htab_deadlock__attach(skel); > + if (!ASSERT_OK(err, "skel_attach")) > + goto clean_skel; > + > + /* NMI events. */ > + pfd = perf_event_open(); > + if (pfd < 0) { > + if (pfd == -ENOENT || pfd == -EOPNOTSUPP) { > + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); > + test__skip(); > + goto clean_skel; > + } > + if (!ASSERT_GE(pfd, 0, "perf_event_open")) > + goto clean_skel; > + } > + > + link = bpf_program__attach_perf_event(skel->progs.bpf_perf_event, pfd); > + if (!ASSERT_OK_PTR(link, "attach_perf_event")) > + goto clean_pfd; > + > + /* Pinned on CPU 0 */ > + CPU_ZERO(&cpus); > + CPU_SET(0, &cpus); > + pthread_setaffinity_np(pthread_self(), sizeof(cpus), &cpus); > + > + for (i = 0; i < 100000; i++) Please add some comments in the above loop to mention the test expects (hopefully) duriing one of bpf_map_update_elem(), one perf event might kick to trigger prog bpf_nmi_handle run. > + bpf_map_update_elem(bpf_map__fd(skel->maps.htab), > + &key, &val, BPF_ANY); > + > + bpf_link__destroy(link); > +clean_pfd: > + close(pfd); > +clean_skel: > + htab_deadlock__destroy(skel); > +} > diff --git a/tools/testing/selftests/bpf/progs/htab_deadlock.c b/tools/testing/selftests/bpf/progs/htab_deadlock.c > new file mode 100644 > index 000000000000..c4bd1567f882 > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/htab_deadlock.c > @@ -0,0 +1,30 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2022 DiDi Global Inc. */ > +#include <linux/bpf.h> > +#include <bpf/bpf_helpers.h> > +#include <bpf/bpf_tracing.h> > + > +char _license[] SEC("license") = "GPL"; > + > +struct { > + __uint(type, BPF_MAP_TYPE_HASH); > + __uint(max_entries, 2); > + __uint(map_flags, BPF_F_ZERO_SEED); > + __uint(key_size, sizeof(unsigned int)); > + __uint(value_size, sizeof(unsigned int)); > +} htab SEC(".maps"); You can use __type(key, unsigned int); __type(value, unsigned int); This is more expressive. > + > +SEC("fentry/nmi_handle") > +int bpf_nmi_handle(struct pt_regs *regs) Do we need this fentry function? Can be just put bpf_map_update_elem() into bpf_perf_event program? Also s390x and aarch64 failed the test due to none/incomplete trampoline support. See bpf ci https://github.com/kernel-patches/bpf/pull/4211. You need to add them in their corresponding deny list if this fentry bpf program is used. > +{ > + unsigned int val = 0, key = 4; > + > + bpf_map_update_elem(&htab, &key, &val, BPF_ANY); > + return 0; > +} > + > +SEC("perf_event") > +int bpf_perf_event(struct pt_regs *regs) > +{ > + return 0; > +}
On Fri, Dec 16, 2022 at 12:10 PM Yonghong Song <yhs@meta.com> wrote: > > > > On 12/14/22 2:38 AM, xiangxia.m.yue@gmail.com wrote: > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com> > > > > This testing show how to reproduce deadlock in special case. > > > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com> > > Cc: Alexei Starovoitov <ast@kernel.org> > > Cc: Daniel Borkmann <daniel@iogearbox.net> > > Cc: Andrii Nakryiko <andrii@kernel.org> > > Cc: Martin KaFai Lau <martin.lau@linux.dev> > > Cc: Song Liu <song@kernel.org> > > Cc: Yonghong Song <yhs@fb.com> > > Cc: John Fastabend <john.fastabend@gmail.com> > > Cc: KP Singh <kpsingh@kernel.org> > > Cc: Stanislav Fomichev <sdf@google.com> > > Cc: Hao Luo <haoluo@google.com> > > Cc: Jiri Olsa <jolsa@kernel.org> > > Cc: Hou Tao <houtao1@huawei.com> > > --- > > .../selftests/bpf/prog_tests/htab_deadlock.c | 74 +++++++++++++++++++ > > .../selftests/bpf/progs/htab_deadlock.c | 30 ++++++++ > > 2 files changed, 104 insertions(+) > > create mode 100644 tools/testing/selftests/bpf/prog_tests/htab_deadlock.c > > create mode 100644 tools/testing/selftests/bpf/progs/htab_deadlock.c > > > > diff --git a/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c > > new file mode 100644 > > index 000000000000..7dce4c2fe4f5 > > --- /dev/null > > +++ b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c > > @@ -0,0 +1,74 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* Copyright (c) 2022 DiDi Global Inc. */ > > +#define _GNU_SOURCE > > +#include <pthread.h> > > +#include <sched.h> > > +#include <test_progs.h> > > + > > +#include "htab_deadlock.skel.h" > > + > > +static int perf_event_open(void) > > +{ > > + struct perf_event_attr attr = {0}; > > + int pfd; > > + > > + /* create perf event */ > > + attr.size = sizeof(attr); > > + attr.type = PERF_TYPE_HARDWARE; > > + attr.config = PERF_COUNT_HW_CPU_CYCLES; > > + attr.freq = 1; > > + attr.sample_freq = 1000; > > + pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); > > + > > + return pfd >= 0 ? pfd : -errno; > > +} > > + > > +void test_htab_deadlock(void) > > +{ > > + unsigned int val = 0, key = 20; > > + struct bpf_link *link = NULL; > > + struct htab_deadlock *skel; > > + cpu_set_t cpus; > > + int err; > > + int pfd; > > + int i; > > No need to have three lines for type 'int' variables. One line > is enough to hold all three variables. > > > + > > + skel = htab_deadlock__open_and_load(); > > + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) > > + return; > > + > > + err = htab_deadlock__attach(skel); > > + if (!ASSERT_OK(err, "skel_attach")) > > + goto clean_skel; > > + > > + /* NMI events. */ > > + pfd = perf_event_open(); > > + if (pfd < 0) { > > + if (pfd == -ENOENT || pfd == -EOPNOTSUPP) { > > + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); > > + test__skip(); > > + goto clean_skel; > > + } > > + if (!ASSERT_GE(pfd, 0, "perf_event_open")) > > + goto clean_skel; > > + } > > + > > + link = bpf_program__attach_perf_event(skel->progs.bpf_perf_event, pfd); > > + if (!ASSERT_OK_PTR(link, "attach_perf_event")) > > + goto clean_pfd; > > + > > + /* Pinned on CPU 0 */ > > + CPU_ZERO(&cpus); > > + CPU_SET(0, &cpus); > > + pthread_setaffinity_np(pthread_self(), sizeof(cpus), &cpus); > > + > > + for (i = 0; i < 100000; i++) > > Please add some comments in the above loop to mention the test > expects (hopefully) duriing one of bpf_map_update_elem(), one > perf event might kick to trigger prog bpf_nmi_handle run. > > > + bpf_map_update_elem(bpf_map__fd(skel->maps.htab), > > + &key, &val, BPF_ANY); > > + > > + bpf_link__destroy(link); > > +clean_pfd: > > + close(pfd); > > +clean_skel: > > + htab_deadlock__destroy(skel); > > +} > > diff --git a/tools/testing/selftests/bpf/progs/htab_deadlock.c b/tools/testing/selftests/bpf/progs/htab_deadlock.c > > new file mode 100644 > > index 000000000000..c4bd1567f882 > > --- /dev/null > > +++ b/tools/testing/selftests/bpf/progs/htab_deadlock.c > > @@ -0,0 +1,30 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* Copyright (c) 2022 DiDi Global Inc. */ > > +#include <linux/bpf.h> > > +#include <bpf/bpf_helpers.h> > > +#include <bpf/bpf_tracing.h> > > + > > +char _license[] SEC("license") = "GPL"; > > + > > +struct { > > + __uint(type, BPF_MAP_TYPE_HASH); > > + __uint(max_entries, 2); > > + __uint(map_flags, BPF_F_ZERO_SEED); > > + __uint(key_size, sizeof(unsigned int)); > > + __uint(value_size, sizeof(unsigned int)); > > +} htab SEC(".maps"); > > You can use > __type(key, unsigned int); > __type(value, unsigned int); > This is more expressive. > > > + > > +SEC("fentry/nmi_handle") > > +int bpf_nmi_handle(struct pt_regs *regs) > > Do we need this fentry function? Can be just put > bpf_map_update_elem() into bpf_perf_event program? Hi bpf_overflow_handler will check the bpf_prog_active, and bpf_map_update_value invokes bpf_disable_instrumentation, so the deadlock will not occur. In fentry/nmi_handle, bpf does not check the bpf_prog_active. Other comments look good to me, I will send v2 soon. > Also s390x and aarch64 failed the test due to none/incomplete trampoline > support. See bpf ci https://github.com/kernel-patches/bpf/pull/4211. > You need to add them in their corresponding deny list if this fentry > bpf program is used. > > > +{ > > + unsigned int val = 0, key = 4; > > + > > + bpf_map_update_elem(&htab, &key, &val, BPF_ANY); > > + return 0; > > +} > > + > > +SEC("perf_event") > > +int bpf_perf_event(struct pt_regs *regs) > > +{ > > + return 0; > > +}
Hi, On 12/16/2022 12:10 PM, Yonghong Song wrote: > > > On 12/14/22 2:38 AM, xiangxia.m.yue@gmail.com wrote: >> From: Tonghao Zhang <xiangxia.m.yue@gmail.com> >> >> This testing show how to reproduce deadlock in special case. Could you elaborate the commit message to show >> >> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com> >> Cc: Alexei Starovoitov <ast@kernel.org> >> Cc: Daniel Borkmann <daniel@iogearbox.net> >> Cc: Andrii Nakryiko <andrii@kernel.org> >> Cc: Martin KaFai Lau <martin.lau@linux.dev> >> Cc: Song Liu <song@kernel.org> >> Cc: Yonghong Song <yhs@fb.com> >> Cc: John Fastabend <john.fastabend@gmail.com> >> Cc: KP Singh <kpsingh@kernel.org> >> Cc: Stanislav Fomichev <sdf@google.com> >> Cc: Hao Luo <haoluo@google.com> >> Cc: Jiri Olsa <jolsa@kernel.org> >> Cc: Hou Tao <houtao1@huawei.com> >> --- >> .../selftests/bpf/prog_tests/htab_deadlock.c | 74 +++++++++++++++++++ >> .../selftests/bpf/progs/htab_deadlock.c | 30 ++++++++ >> 2 files changed, 104 insertions(+) >> create mode 100644 tools/testing/selftests/bpf/prog_tests/htab_deadlock.c >> create mode 100644 tools/testing/selftests/bpf/progs/htab_deadlock.c >> >> diff --git a/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c >> b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c >> new file mode 100644 >> index 000000000000..7dce4c2fe4f5 >> --- /dev/null >> +++ b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c >> @@ -0,0 +1,74 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* Copyright (c) 2022 DiDi Global Inc. */ >> +#define _GNU_SOURCE >> +#include <pthread.h> >> +#include <sched.h> >> +#include <test_progs.h> >> + >> +#include "htab_deadlock.skel.h" >> + >> +static int perf_event_open(void) >> +{ >> + struct perf_event_attr attr = {0}; >> + int pfd; >> + >> + /* create perf event */ >> + attr.size = sizeof(attr); >> + attr.type = PERF_TYPE_HARDWARE; >> + attr.config = PERF_COUNT_HW_CPU_CYCLES; >> + attr.freq = 1; >> + attr.sample_freq = 1000; >> + pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, >> PERF_FLAG_FD_CLOEXEC); >> + >> + return pfd >= 0 ? pfd : -errno; >> +} >> + >> +void test_htab_deadlock(void) >> +{ >> + unsigned int val = 0, key = 20; >> + struct bpf_link *link = NULL; >> + struct htab_deadlock *skel; >> + cpu_set_t cpus; >> + int err; >> + int pfd; >> + int i; > > No need to have three lines for type 'int' variables. One line > is enough to hold all three variables. > >> + >> + skel = htab_deadlock__open_and_load(); >> + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) >> + return; >> + >> + err = htab_deadlock__attach(skel); >> + if (!ASSERT_OK(err, "skel_attach")) >> + goto clean_skel; >> + >> + /* NMI events. */ >> + pfd = perf_event_open(); >> + if (pfd < 0) { >> + if (pfd == -ENOENT || pfd == -EOPNOTSUPP) { >> + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); >> + test__skip(); >> + goto clean_skel; >> + } >> + if (!ASSERT_GE(pfd, 0, "perf_event_open")) >> + goto clean_skel; >> + } >> + >> + link = bpf_program__attach_perf_event(skel->progs.bpf_perf_event, pfd); >> + if (!ASSERT_OK_PTR(link, "attach_perf_event")) >> + goto clean_pfd; >> + >> + /* Pinned on CPU 0 */ >> + CPU_ZERO(&cpus); >> + CPU_SET(0, &cpus); >> + pthread_setaffinity_np(pthread_self(), sizeof(cpus), &cpus); The test will run in process context, so use sched_setaffinity() will better than _np API. >> + >> + for (i = 0; i < 100000; i++) > > Please add some comments in the above loop to mention the test > expects (hopefully) duriing one of bpf_map_update_elem(), one > perf event might kick to trigger prog bpf_nmi_handle run. > >> + bpf_map_update_elem(bpf_map__fd(skel->maps.htab), >> + &key, &val, BPF_ANY); It would be better if we can check that bpf_nmi_handle is being called and the return value of bpf_map_update_elem() in bpf_nmi_handle is expected here. >> + >> + bpf_link__destroy(link); >> +clean_pfd: >> + close(pfd); >> +clean_skel: >> + htab_deadlock__destroy(skel); >> +} >> diff --git a/tools/testing/selftests/bpf/progs/htab_deadlock.c >> b/tools/testing/selftests/bpf/progs/htab_deadlock.c >> new file mode 100644 >> index 000000000000..c4bd1567f882 >> --- /dev/null >> +++ b/tools/testing/selftests/bpf/progs/htab_deadlock.c >> @@ -0,0 +1,30 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* Copyright (c) 2022 DiDi Global Inc. */ >> +#include <linux/bpf.h> >> +#include <bpf/bpf_helpers.h> >> +#include <bpf/bpf_tracing.h> >> + >> +char _license[] SEC("license") = "GPL"; >> + >> +struct { >> + __uint(type, BPF_MAP_TYPE_HASH); >> + __uint(max_entries, 2); >> + __uint(map_flags, BPF_F_ZERO_SEED); >> + __uint(key_size, sizeof(unsigned int)); >> + __uint(value_size, sizeof(unsigned int)); >> +} htab SEC(".maps"); > > You can use > __type(key, unsigned int); > __type(value, unsigned int); > This is more expressive. > >> + >> +SEC("fentry/nmi_handle") >> +int bpf_nmi_handle(struct pt_regs *regs) > > Do we need this fentry function? Can be just put > bpf_map_update_elem() into bpf_perf_event program? IIRC bpf_perf_event program will check bpf_prog_active, and bpf_map_update_value() will increase it, so fentry is needed here. > > Also s390x and aarch64 failed the test due to none/incomplete trampoline > support. See bpf ci https://github.com/kernel-patches/bpf/pull/4211. > You need to add them in their corresponding deny list if this fentry > bpf program is used. > >> +{ >> + unsigned int val = 0, key = 4; >> + >> + bpf_map_update_elem(&htab, &key, &val, BPF_ANY); >> + return 0; >> +} >> + >> +SEC("perf_event") >> +int bpf_perf_event(struct pt_regs *regs) >> +{ >> + return 0; >> +} > .
Hi, On 12/16/2022 6:41 PM, Hou Tao wrote: > Hi, > > On 12/16/2022 12:10 PM, Yonghong Song wrote: >> >> On 12/14/22 2:38 AM, xiangxia.m.yue@gmail.com wrote: >>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com> >>> >>> This testing show how to reproduce deadlock in special case. > Could you elaborate the commit message to show Sorry about that. Just hit the send button too soon. It would be better if you can describe the steps to reproduce the deadlock problem in commit message. .
On Fri, Dec 16, 2022 at 6:45 PM Hou Tao <houtao1@huawei.com> wrote: > > Hi, > > On 12/16/2022 6:41 PM, Hou Tao wrote: > > Hi, > > > > On 12/16/2022 12:10 PM, Yonghong Song wrote: > >> > >> On 12/14/22 2:38 AM, xiangxia.m.yue@gmail.com wrote: > >>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com> > >>> > >>> This testing show how to reproduce deadlock in special case. > > Could you elaborate the commit message to show > Sorry about that. Just hit the send button too soon. It would be better if you > can describe the steps to reproduce the deadlock problem in commit message. Ok, I will update commit message v2. > . >
On 12/16/22 2:36 AM, Tonghao Zhang wrote: > On Fri, Dec 16, 2022 at 12:10 PM Yonghong Song <yhs@meta.com> wrote: >> >> >> >> On 12/14/22 2:38 AM, xiangxia.m.yue@gmail.com wrote: >>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com> >>> >>> This testing show how to reproduce deadlock in special case. >>> >>> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com> >>> Cc: Alexei Starovoitov <ast@kernel.org> >>> Cc: Daniel Borkmann <daniel@iogearbox.net> >>> Cc: Andrii Nakryiko <andrii@kernel.org> >>> Cc: Martin KaFai Lau <martin.lau@linux.dev> >>> Cc: Song Liu <song@kernel.org> >>> Cc: Yonghong Song <yhs@fb.com> >>> Cc: John Fastabend <john.fastabend@gmail.com> >>> Cc: KP Singh <kpsingh@kernel.org> >>> Cc: Stanislav Fomichev <sdf@google.com> >>> Cc: Hao Luo <haoluo@google.com> >>> Cc: Jiri Olsa <jolsa@kernel.org> >>> Cc: Hou Tao <houtao1@huawei.com> >>> --- >>> .../selftests/bpf/prog_tests/htab_deadlock.c | 74 +++++++++++++++++++ >>> .../selftests/bpf/progs/htab_deadlock.c | 30 ++++++++ >>> 2 files changed, 104 insertions(+) >>> create mode 100644 tools/testing/selftests/bpf/prog_tests/htab_deadlock.c >>> create mode 100644 tools/testing/selftests/bpf/progs/htab_deadlock.c >>> >>> diff --git a/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c >>> new file mode 100644 >>> index 000000000000..7dce4c2fe4f5 >>> --- /dev/null >>> +++ b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c >>> @@ -0,0 +1,74 @@ >>> +// SPDX-License-Identifier: GPL-2.0 >>> +/* Copyright (c) 2022 DiDi Global Inc. */ >>> +#define _GNU_SOURCE >>> +#include <pthread.h> >>> +#include <sched.h> >>> +#include <test_progs.h> >>> + >>> +#include "htab_deadlock.skel.h" >>> + >>> +static int perf_event_open(void) >>> +{ >>> + struct perf_event_attr attr = {0}; >>> + int pfd; >>> + >>> + /* create perf event */ >>> + attr.size = sizeof(attr); >>> + attr.type = PERF_TYPE_HARDWARE; >>> + attr.config = PERF_COUNT_HW_CPU_CYCLES; >>> + attr.freq = 1; >>> + attr.sample_freq = 1000; >>> + pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); >>> + >>> + return pfd >= 0 ? pfd : -errno; >>> +} >>> + >>> +void test_htab_deadlock(void) >>> +{ >>> + unsigned int val = 0, key = 20; >>> + struct bpf_link *link = NULL; >>> + struct htab_deadlock *skel; >>> + cpu_set_t cpus; >>> + int err; >>> + int pfd; >>> + int i; >> >> No need to have three lines for type 'int' variables. One line >> is enough to hold all three variables. >> >>> + >>> + skel = htab_deadlock__open_and_load(); >>> + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) >>> + return; >>> + >>> + err = htab_deadlock__attach(skel); >>> + if (!ASSERT_OK(err, "skel_attach")) >>> + goto clean_skel; >>> + >>> + /* NMI events. */ >>> + pfd = perf_event_open(); >>> + if (pfd < 0) { >>> + if (pfd == -ENOENT || pfd == -EOPNOTSUPP) { >>> + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); >>> + test__skip(); >>> + goto clean_skel; >>> + } >>> + if (!ASSERT_GE(pfd, 0, "perf_event_open")) >>> + goto clean_skel; >>> + } >>> + >>> + link = bpf_program__attach_perf_event(skel->progs.bpf_perf_event, pfd); >>> + if (!ASSERT_OK_PTR(link, "attach_perf_event")) >>> + goto clean_pfd; >>> + >>> + /* Pinned on CPU 0 */ >>> + CPU_ZERO(&cpus); >>> + CPU_SET(0, &cpus); >>> + pthread_setaffinity_np(pthread_self(), sizeof(cpus), &cpus); >>> + >>> + for (i = 0; i < 100000; i++) >> >> Please add some comments in the above loop to mention the test >> expects (hopefully) duriing one of bpf_map_update_elem(), one >> perf event might kick to trigger prog bpf_nmi_handle run. >> >>> + bpf_map_update_elem(bpf_map__fd(skel->maps.htab), >>> + &key, &val, BPF_ANY); >>> + >>> + bpf_link__destroy(link); >>> +clean_pfd: >>> + close(pfd); >>> +clean_skel: >>> + htab_deadlock__destroy(skel); >>> +} >>> diff --git a/tools/testing/selftests/bpf/progs/htab_deadlock.c b/tools/testing/selftests/bpf/progs/htab_deadlock.c >>> new file mode 100644 >>> index 000000000000..c4bd1567f882 >>> --- /dev/null >>> +++ b/tools/testing/selftests/bpf/progs/htab_deadlock.c >>> @@ -0,0 +1,30 @@ >>> +// SPDX-License-Identifier: GPL-2.0 >>> +/* Copyright (c) 2022 DiDi Global Inc. */ >>> +#include <linux/bpf.h> >>> +#include <bpf/bpf_helpers.h> >>> +#include <bpf/bpf_tracing.h> >>> + >>> +char _license[] SEC("license") = "GPL"; >>> + >>> +struct { >>> + __uint(type, BPF_MAP_TYPE_HASH); >>> + __uint(max_entries, 2); >>> + __uint(map_flags, BPF_F_ZERO_SEED); >>> + __uint(key_size, sizeof(unsigned int)); >>> + __uint(value_size, sizeof(unsigned int)); >>> +} htab SEC(".maps"); >> >> You can use >> __type(key, unsigned int); >> __type(value, unsigned int); >> This is more expressive. >> >>> + >>> +SEC("fentry/nmi_handle") >>> +int bpf_nmi_handle(struct pt_regs *regs) >> >> Do we need this fentry function? Can be just put >> bpf_map_update_elem() into bpf_perf_event program? > Hi > bpf_overflow_handler will check the bpf_prog_active, and > bpf_map_update_value invokes bpf_disable_instrumentation, > so the deadlock will not occur. In fentry/nmi_handle, bpf does not > check the bpf_prog_active. I see. Yes, fentry program does per prog recursion checking instead of global bpf_prog_active. > > Other comments look good to me, I will send v2 soon. >> Also s390x and aarch64 failed the test due to none/incomplete trampoline >> support. See bpf ci https://github.com/kernel-patches/bpf/pull/4211. >> You need to add them in their corresponding deny list if this fentry >> bpf program is used. >> >>> +{ >>> + unsigned int val = 0, key = 4; >>> + >>> + bpf_map_update_elem(&htab, &key, &val, BPF_ANY); >>> + return 0; >>> +} >>> + >>> +SEC("perf_event") >>> +int bpf_perf_event(struct pt_regs *regs) >>> +{ >>> + return 0; >>> +} > > >
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c new file mode 100644 index 000000000000..7dce4c2fe4f5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 DiDi Global Inc. */ +#define _GNU_SOURCE +#include <pthread.h> +#include <sched.h> +#include <test_progs.h> + +#include "htab_deadlock.skel.h" + +static int perf_event_open(void) +{ + struct perf_event_attr attr = {0}; + int pfd; + + /* create perf event */ + attr.size = sizeof(attr); + attr.type = PERF_TYPE_HARDWARE; + attr.config = PERF_COUNT_HW_CPU_CYCLES; + attr.freq = 1; + attr.sample_freq = 1000; + pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); + + return pfd >= 0 ? pfd : -errno; +} + +void test_htab_deadlock(void) +{ + unsigned int val = 0, key = 20; + struct bpf_link *link = NULL; + struct htab_deadlock *skel; + cpu_set_t cpus; + int err; + int pfd; + int i; + + skel = htab_deadlock__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = htab_deadlock__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto clean_skel; + + /* NMI events. */ + pfd = perf_event_open(); + if (pfd < 0) { + if (pfd == -ENOENT || pfd == -EOPNOTSUPP) { + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); + test__skip(); + goto clean_skel; + } + if (!ASSERT_GE(pfd, 0, "perf_event_open")) + goto clean_skel; + } + + link = bpf_program__attach_perf_event(skel->progs.bpf_perf_event, pfd); + if (!ASSERT_OK_PTR(link, "attach_perf_event")) + goto clean_pfd; + + /* Pinned on CPU 0 */ + CPU_ZERO(&cpus); + CPU_SET(0, &cpus); + pthread_setaffinity_np(pthread_self(), sizeof(cpus), &cpus); + + for (i = 0; i < 100000; i++) + bpf_map_update_elem(bpf_map__fd(skel->maps.htab), + &key, &val, BPF_ANY); + + bpf_link__destroy(link); +clean_pfd: + close(pfd); +clean_skel: + htab_deadlock__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/htab_deadlock.c b/tools/testing/selftests/bpf/progs/htab_deadlock.c new file mode 100644 index 000000000000..c4bd1567f882 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/htab_deadlock.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 DiDi Global Inc. */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 2); + __uint(map_flags, BPF_F_ZERO_SEED); + __uint(key_size, sizeof(unsigned int)); + __uint(value_size, sizeof(unsigned int)); +} htab SEC(".maps"); + +SEC("fentry/nmi_handle") +int bpf_nmi_handle(struct pt_regs *regs) +{ + unsigned int val = 0, key = 4; + + bpf_map_update_elem(&htab, &key, &val, BPF_ANY); + return 0; +} + +SEC("perf_event") +int bpf_perf_event(struct pt_regs *regs) +{ + return 0; +}