Message ID | 20220219003004.1085072-1-mykolal@fb.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | [bpf-next] Improve BPF test stability (related to perf events and scheduling) | expand |
On Fri, Feb 18, 2022 at 4:30 PM Mykola Lysenko <mykolal@fb.com> wrote: > > In send_signal, replace sleep with dummy cpu intensive computation > to increase probability of child process being scheduled. Add few > more asserts. How often do we see the test fail because the child process is not scheduled? [...] > static void sigusr1_handler(int signum) > { > - sigusr1_received++; > + sigusr1_received = 1; > } > > static void test_send_signal_common(struct perf_event_attr *attr, > @@ -42,7 +43,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, > int old_prio; > > /* install signal handler and notify parent */ > + errno = 0; > signal(SIGUSR1, sigusr1_handler); > + ASSERT_OK(errno, "signal"); > > close(pipe_c2p[0]); /* close read */ > close(pipe_p2c[1]); /* close write */ > @@ -63,9 +66,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, > ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); > > /* wait a little for signal handler */ > - sleep(1); > + for (int i = 0; i < 1000000000; i++) > + volatile_variable++; > > buf[0] = sigusr1_received ? '2' : '0'; ^^^^ this "? :" seems useless as we assert sigusr1_received == 1. Let's fix it. > + ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); > + > ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); > > /* wait for parent notification and exit */ > @@ -110,9 +116,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, > ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read"); > > /* trigger the bpf send_signal */ > + skel->bss->signal_thread = signal_thread; > skel->bss->pid = pid; > skel->bss->sig = SIGUSR1; > - skel->bss->signal_thread = signal_thread; Does the order matter here?
On Fri, Feb 18, 2022 at 4:30 PM Mykola Lysenko <mykolal@fb.com> wrote: > > In send_signal, replace sleep with dummy cpu intensive computation > to increase probability of child process being scheduled. Add few > more asserts. > > In find_vma, reduce sample_freq as higher values may be rejected in > some qemu setups, remove usleep and increase length of cpu intensive > computation. > > In bpf_cookie, perf_link and perf_branches, reduce sample_freq as > higher values may be rejected in some qemu setups > > Signed-off-by: Mykola Lysenko <mykolal@fb.com> > --- > .../testing/selftests/bpf/prog_tests/bpf_cookie.c | 2 +- > tools/testing/selftests/bpf/prog_tests/find_vma.c | 5 ++--- > .../selftests/bpf/prog_tests/perf_branches.c | 4 ++-- > tools/testing/selftests/bpf/prog_tests/perf_link.c | 2 +- > .../testing/selftests/bpf/prog_tests/send_signal.c | 14 ++++++++++---- > 5 files changed, 16 insertions(+), 11 deletions(-) > > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c > index cd10df6cd0fc..0612e79a9281 100644 > --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c > @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) > attr.type = PERF_TYPE_SOFTWARE; > attr.config = PERF_COUNT_SW_CPU_CLOCK; > attr.freq = 1; > - attr.sample_freq = 4000; > + attr.sample_freq = 1000; > pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); > if (!ASSERT_GE(pfd, 0, "perf_fd")) > goto cleanup; > diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c > index b74b3c0c555a..acc41223a112 100644 > --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c > +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c > @@ -30,7 +30,7 @@ static int open_pe(void) > attr.type = PERF_TYPE_HARDWARE; > attr.config = PERF_COUNT_HW_CPU_CYCLES; > attr.freq = 1; > - attr.sample_freq = 4000; > + attr.sample_freq = 1000; > pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); > > return pfd >= 0 ? pfd : -errno; > @@ -57,7 +57,7 @@ static void test_find_vma_pe(struct find_vma *skel) > if (!ASSERT_OK_PTR(link, "attach_perf_event")) > goto cleanup; > > - for (i = 0; i < 1000000; ++i) > + for (i = 0; i < 1000000000; ++i) 1bln seems excessive... maybe 10mln would be enough? > ++j; > > test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */); [...] > diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c > index 776916b61c40..841217bd1df6 100644 > --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c > +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c > @@ -4,11 +4,12 @@ > #include <sys/resource.h> > #include "test_send_signal_kern.skel.h" > > -int sigusr1_received = 0; > +int sigusr1_received; > +volatile int volatile_variable; please make them static > > static void sigusr1_handler(int signum) > { > - sigusr1_received++; > + sigusr1_received = 1; > } > > static void test_send_signal_common(struct perf_event_attr *attr, > @@ -42,7 +43,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, > int old_prio; > > /* install signal handler and notify parent */ > + errno = 0; > signal(SIGUSR1, sigusr1_handler); > + ASSERT_OK(errno, "signal"); just ASSERT_OK(signal(...), "signal"); > > close(pipe_c2p[0]); /* close read */ > close(pipe_p2c[1]); /* close write */ > @@ -63,9 +66,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, > ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); > > /* wait a little for signal handler */ > - sleep(1); > + for (int i = 0; i < 1000000000; i++) same about 1bln > + volatile_variable++; > > buf[0] = sigusr1_received ? '2' : '0'; > + ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); > + > ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); > > /* wait for parent notification and exit */ > @@ -110,9 +116,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, > ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read"); > > /* trigger the bpf send_signal */ > + skel->bss->signal_thread = signal_thread; > skel->bss->pid = pid; > skel->bss->sig = SIGUSR1; > - skel->bss->signal_thread = signal_thread; > > /* notify child that bpf program can send_signal now */ > ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write"); > -- > 2.30.2 >
Thanks for your comments Song! > On Feb 18, 2022, at 5:47 PM, Song Liu <song@kernel.org> wrote: > > On Fri, Feb 18, 2022 at 4:30 PM Mykola Lysenko <mykolal@fb.com> wrote: >> >> In send_signal, replace sleep with dummy cpu intensive computation >> to increase probability of child process being scheduled. Add few >> more asserts. > > How often do we see the test fail because the child process is not > scheduled? I just ran this test, non-modified, 100 times in a loop and got 94 failures. However, it is on my setup when using qemu for testing. > > [...] > >> static void sigusr1_handler(int signum) >> { >> - sigusr1_received++; >> + sigusr1_received = 1; >> } >> >> static void test_send_signal_common(struct perf_event_attr *attr, >> @@ -42,7 +43,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, >> int old_prio; >> >> /* install signal handler and notify parent */ >> + errno = 0; >> signal(SIGUSR1, sigusr1_handler); >> + ASSERT_OK(errno, "signal"); >> >> close(pipe_c2p[0]); /* close read */ >> close(pipe_p2c[1]); /* close write */ >> @@ -63,9 +66,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, >> ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); >> >> /* wait a little for signal handler */ >> - sleep(1); >> + for (int i = 0; i < 1000000000; i++) >> + volatile_variable++; >> >> buf[0] = sigusr1_received ? '2' : '0'; > > ^^^^ this "? :" seems useless as we assert sigusr1_received == 1. Let's fix it. In this branch (true for "pid == 0” condition), we execute in the child process. Just asserting here would not fail the test unfortunately. Child process will die (after exit(0)) and test will succeed. However, you then may ask why do we need assert at all. It is useful if we want to debug what happens in the child process. Right now, child process does not print anything and I will fix it in the second version of this patch by substituting exit(0) with return; > >> + ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); >> + >> ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); >> >> /* wait for parent notification and exit */ >> @@ -110,9 +116,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, >> ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read"); >> >> /* trigger the bpf send_signal */ >> + skel->bss->signal_thread = signal_thread; >> skel->bss->pid = pid; >> skel->bss->sig = SIGUSR1; >> - skel->bss->signal_thread = signal_thread; > > Does the order matter here? Unfortunately, yes. The bpf logic will start executing after sig or pid became non-zero (see /d/u/m/l/t/t/s/b/p/test_send_signal_kern.c, line 13). I am trying to remove ambiguity here with this small change. Although, pid and sig still may conflict. Let me think if it will be better to fix the bpf code itself.
Thanks for the review Andrii! > On Feb 19, 2022, at 8:39 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > On Fri, Feb 18, 2022 at 4:30 PM Mykola Lysenko <mykolal@fb.com> wrote: >> >> In send_signal, replace sleep with dummy cpu intensive computation >> to increase probability of child process being scheduled. Add few >> more asserts. >> >> In find_vma, reduce sample_freq as higher values may be rejected in >> some qemu setups, remove usleep and increase length of cpu intensive >> computation. >> >> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as >> higher values may be rejected in some qemu setups >> >> Signed-off-by: Mykola Lysenko <mykolal@fb.com> >> --- >> .../testing/selftests/bpf/prog_tests/bpf_cookie.c | 2 +- >> tools/testing/selftests/bpf/prog_tests/find_vma.c | 5 ++--- >> .../selftests/bpf/prog_tests/perf_branches.c | 4 ++-- >> tools/testing/selftests/bpf/prog_tests/perf_link.c | 2 +- >> .../testing/selftests/bpf/prog_tests/send_signal.c | 14 ++++++++++---- >> 5 files changed, 16 insertions(+), 11 deletions(-) >> >> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >> index cd10df6cd0fc..0612e79a9281 100644 >> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) >> attr.type = PERF_TYPE_SOFTWARE; >> attr.config = PERF_COUNT_SW_CPU_CLOCK; >> attr.freq = 1; >> - attr.sample_freq = 4000; >> + attr.sample_freq = 1000; >> pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); >> if (!ASSERT_GE(pfd, 0, "perf_fd")) >> goto cleanup; >> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c >> index b74b3c0c555a..acc41223a112 100644 >> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c >> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c >> @@ -30,7 +30,7 @@ static int open_pe(void) >> attr.type = PERF_TYPE_HARDWARE; >> attr.config = PERF_COUNT_HW_CPU_CYCLES; >> attr.freq = 1; >> - attr.sample_freq = 4000; >> + attr.sample_freq = 1000; >> pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); >> >> return pfd >= 0 ? pfd : -errno; >> @@ -57,7 +57,7 @@ static void test_find_vma_pe(struct find_vma *skel) >> if (!ASSERT_OK_PTR(link, "attach_perf_event")) >> goto cleanup; >> >> - for (i = 0; i < 1000000; ++i) >> + for (i = 0; i < 1000000000; ++i) > > 1bln seems excessive... maybe 10mln would be enough? See explanation for send_signal test case below > >> ++j; >> >> test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */); > > [...] > >> diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c >> index 776916b61c40..841217bd1df6 100644 >> --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c >> +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c >> @@ -4,11 +4,12 @@ >> #include <sys/resource.h> >> #include "test_send_signal_kern.skel.h" >> >> -int sigusr1_received = 0; >> +int sigusr1_received; >> +volatile int volatile_variable; > > please make them static sure > >> >> static void sigusr1_handler(int signum) >> { >> - sigusr1_received++; >> + sigusr1_received = 1; >> } >> >> static void test_send_signal_common(struct perf_event_attr *attr, >> @@ -42,7 +43,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, >> int old_prio; >> >> /* install signal handler and notify parent */ >> + errno = 0; >> signal(SIGUSR1, sigusr1_handler); >> + ASSERT_OK(errno, "signal"); > > just ASSERT_OK(signal(...), "signal"); I am fine to merge signal and ASSERT lines, but will substitute with condition "signal(SIGUSR1, sigusr1_handler) != SIG_ERR”, sounds good? > >> >> close(pipe_c2p[0]); /* close read */ >> close(pipe_p2c[1]); /* close write */ >> @@ -63,9 +66,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, >> ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); >> >> /* wait a little for signal handler */ >> - sleep(1); >> + for (int i = 0; i < 1000000000; i++) > > same about 1bln With 10mln and 100 test runs I got 86 failures 100mln - 63 failures 1bln - 0 failures on 100 runs Now, there is performance concern for this test. Running time sudo ./test_progs -t send_signal/send_signal_nmi_thread With 1bln takes ~4s 100mln - 1s. Unchanged test with sleep(1); takes ~2s. On the other hand 300mln runs ~2s, and only fails 1 time per 100 runs. As 300mln does not regress performance comparing to the current “sleep(1)” implementation, I propose to go with it. What do you think? > >> + volatile_variable++; >> >> buf[0] = sigusr1_received ? '2' : '0'; >> + ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); >> + >> ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); >> >> /* wait for parent notification and exit */ >> @@ -110,9 +116,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, >> ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read"); >> >> /* trigger the bpf send_signal */ >> + skel->bss->signal_thread = signal_thread; >> skel->bss->pid = pid; >> skel->bss->sig = SIGUSR1; >> - skel->bss->signal_thread = signal_thread; >> >> /* notify child that bpf program can send_signal now */ >> ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write"); >> -- >> 2.30.2
On Tue, Feb 22, 2022 at 12:35 PM Mykola Lysenko <mykolal@fb.com> wrote: > > Thanks for the review Andrii! > > > On Feb 19, 2022, at 8:39 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > > > On Fri, Feb 18, 2022 at 4:30 PM Mykola Lysenko <mykolal@fb.com> wrote: > >> > >> In send_signal, replace sleep with dummy cpu intensive computation > >> to increase probability of child process being scheduled. Add few > >> more asserts. > >> > >> In find_vma, reduce sample_freq as higher values may be rejected in > >> some qemu setups, remove usleep and increase length of cpu intensive > >> computation. > >> > >> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as > >> higher values may be rejected in some qemu setups > >> > >> Signed-off-by: Mykola Lysenko <mykolal@fb.com> > >> --- > >> .../testing/selftests/bpf/prog_tests/bpf_cookie.c | 2 +- > >> tools/testing/selftests/bpf/prog_tests/find_vma.c | 5 ++--- > >> .../selftests/bpf/prog_tests/perf_branches.c | 4 ++-- > >> tools/testing/selftests/bpf/prog_tests/perf_link.c | 2 +- > >> .../testing/selftests/bpf/prog_tests/send_signal.c | 14 ++++++++++---- > >> 5 files changed, 16 insertions(+), 11 deletions(-) > >> > >> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c > >> index cd10df6cd0fc..0612e79a9281 100644 > >> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c > >> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c > >> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) > >> attr.type = PERF_TYPE_SOFTWARE; > >> attr.config = PERF_COUNT_SW_CPU_CLOCK; > >> attr.freq = 1; > >> - attr.sample_freq = 4000; > >> + attr.sample_freq = 1000; > >> pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); > >> if (!ASSERT_GE(pfd, 0, "perf_fd")) > >> goto cleanup; > >> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c > >> index b74b3c0c555a..acc41223a112 100644 > >> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c > >> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c > >> @@ -30,7 +30,7 @@ static int open_pe(void) > >> attr.type = PERF_TYPE_HARDWARE; > >> attr.config = PERF_COUNT_HW_CPU_CYCLES; > >> attr.freq = 1; > >> - attr.sample_freq = 4000; > >> + attr.sample_freq = 1000; > >> pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); > >> > >> return pfd >= 0 ? pfd : -errno; > >> @@ -57,7 +57,7 @@ static void test_find_vma_pe(struct find_vma *skel) > >> if (!ASSERT_OK_PTR(link, "attach_perf_event")) > >> goto cleanup; > >> > >> - for (i = 0; i < 1000000; ++i) > >> + for (i = 0; i < 1000000000; ++i) > > > > 1bln seems excessive... maybe 10mln would be enough? > > See explanation for send_signal test case below > > > > >> ++j; > >> > >> test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */); > > > > [...] > > > >> diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c > >> index 776916b61c40..841217bd1df6 100644 > >> --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c > >> +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c > >> @@ -4,11 +4,12 @@ > >> #include <sys/resource.h> > >> #include "test_send_signal_kern.skel.h" > >> > >> -int sigusr1_received = 0; > >> +int sigusr1_received; > >> +volatile int volatile_variable; > > > > please make them static > > sure > > > > >> > >> static void sigusr1_handler(int signum) > >> { > >> - sigusr1_received++; > >> + sigusr1_received = 1; > >> } > >> > >> static void test_send_signal_common(struct perf_event_attr *attr, > >> @@ -42,7 +43,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, > >> int old_prio; > >> > >> /* install signal handler and notify parent */ > >> + errno = 0; > >> signal(SIGUSR1, sigusr1_handler); > >> + ASSERT_OK(errno, "signal"); > > > > just ASSERT_OK(signal(...), "signal"); > > I am fine to merge signal and ASSERT lines, but will substitute with condition "signal(SIGUSR1, sigusr1_handler) != SIG_ERR”, sounds good? > Ah, signal is a bit special with return values. Yeah, ASSERT_NEQ(signal(...), SIG_ERR, "signal") sounds good. > > > >> > >> close(pipe_c2p[0]); /* close read */ > >> close(pipe_p2c[1]); /* close write */ > >> @@ -63,9 +66,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, > >> ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); > >> > >> /* wait a little for signal handler */ > >> - sleep(1); > >> + for (int i = 0; i < 1000000000; i++) > > > > same about 1bln > > With 10mln and 100 test runs I got 86 failures > 100mln - 63 failures > 1bln - 0 failures on 100 runs > > Now, there is performance concern for this test. Running > > time sudo ./test_progs -t send_signal/send_signal_nmi_thread > > With 1bln takes ~4s > 100mln - 1s. > Unchanged test with sleep(1); takes ~2s. > > On the other hand 300mln runs ~2s, and only fails 1 time per 100 runs. As 300mln does not regress performance comparing to the current “sleep(1)” implementation, I propose to go with it. What do you think? I think if we need to burn multiple seconds of CPU to make the test reliable, then we should either rework or disable/remove the test. In CI those billions of iterations will be much slower. And even waiting for 4 seconds for just one test is painful. Yonghong, WDYT? Should we just drop thi test? It has caused us a bunch of flakiness and maintenance burden without actually catching any issues. Maybe it's better to just get rid of it? > > > > >> + volatile_variable++; > >> > >> buf[0] = sigusr1_received ? '2' : '0'; > >> + ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); > >> + > >> ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); > >> > >> /* wait for parent notification and exit */ > >> @@ -110,9 +116,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, > >> ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read"); > >> > >> /* trigger the bpf send_signal */ > >> + skel->bss->signal_thread = signal_thread; > >> skel->bss->pid = pid; > >> skel->bss->sig = SIGUSR1; > >> - skel->bss->signal_thread = signal_thread; > >> > >> /* notify child that bpf program can send_signal now */ > >> ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write"); > >> -- > >> 2.30.2 >
On 2/22/22 7:13 PM, Andrii Nakryiko wrote: > On Tue, Feb 22, 2022 at 12:35 PM Mykola Lysenko <mykolal@fb.com> wrote: >> >> Thanks for the review Andrii! >> >>> On Feb 19, 2022, at 8:39 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: >>> >>> On Fri, Feb 18, 2022 at 4:30 PM Mykola Lysenko <mykolal@fb.com> wrote: >>>> >>>> In send_signal, replace sleep with dummy cpu intensive computation >>>> to increase probability of child process being scheduled. Add few >>>> more asserts. >>>> >>>> In find_vma, reduce sample_freq as higher values may be rejected in >>>> some qemu setups, remove usleep and increase length of cpu intensive >>>> computation. >>>> >>>> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as >>>> higher values may be rejected in some qemu setups >>>> >>>> Signed-off-by: Mykola Lysenko <mykolal@fb.com> >>>> --- >>>> .../testing/selftests/bpf/prog_tests/bpf_cookie.c | 2 +- >>>> tools/testing/selftests/bpf/prog_tests/find_vma.c | 5 ++--- >>>> .../selftests/bpf/prog_tests/perf_branches.c | 4 ++-- >>>> tools/testing/selftests/bpf/prog_tests/perf_link.c | 2 +- >>>> .../testing/selftests/bpf/prog_tests/send_signal.c | 14 ++++++++++---- >>>> 5 files changed, 16 insertions(+), 11 deletions(-) >>>> >>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >>>> index cd10df6cd0fc..0612e79a9281 100644 >>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >>>> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) >>>> attr.type = PERF_TYPE_SOFTWARE; >>>> attr.config = PERF_COUNT_SW_CPU_CLOCK; >>>> attr.freq = 1; >>>> - attr.sample_freq = 4000; >>>> + attr.sample_freq = 1000; >>>> pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); >>>> if (!ASSERT_GE(pfd, 0, "perf_fd")) >>>> goto cleanup; >>>> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c >>>> index b74b3c0c555a..acc41223a112 100644 >>>> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c >>>> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c >>>> @@ -30,7 +30,7 @@ static int open_pe(void) >>>> attr.type = PERF_TYPE_HARDWARE; >>>> attr.config = PERF_COUNT_HW_CPU_CYCLES; >>>> attr.freq = 1; >>>> - attr.sample_freq = 4000; >>>> + attr.sample_freq = 1000; >>>> pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); >>>> >>>> return pfd >= 0 ? pfd : -errno; >>>> @@ -57,7 +57,7 @@ static void test_find_vma_pe(struct find_vma *skel) >>>> if (!ASSERT_OK_PTR(link, "attach_perf_event")) >>>> goto cleanup; >>>> >>>> - for (i = 0; i < 1000000; ++i) >>>> + for (i = 0; i < 1000000000; ++i) >>> >>> 1bln seems excessive... maybe 10mln would be enough? >> >> See explanation for send_signal test case below >> >>> >>>> ++j; >>>> >>>> test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */); >>> >>> [...] >>> >>>> diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c >>>> index 776916b61c40..841217bd1df6 100644 >>>> --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c >>>> +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c >>>> @@ -4,11 +4,12 @@ >>>> #include <sys/resource.h> >>>> #include "test_send_signal_kern.skel.h" >>>> >>>> -int sigusr1_received = 0; >>>> +int sigusr1_received; >>>> +volatile int volatile_variable; >>> >>> please make them static >> >> sure >> >>> >>>> >>>> static void sigusr1_handler(int signum) >>>> { >>>> - sigusr1_received++; >>>> + sigusr1_received = 1; >>>> } >>>> >>>> static void test_send_signal_common(struct perf_event_attr *attr, >>>> @@ -42,7 +43,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, >>>> int old_prio; >>>> >>>> /* install signal handler and notify parent */ >>>> + errno = 0; >>>> signal(SIGUSR1, sigusr1_handler); >>>> + ASSERT_OK(errno, "signal"); >>> >>> just ASSERT_OK(signal(...), "signal"); >> >> I am fine to merge signal and ASSERT lines, but will substitute with condition "signal(SIGUSR1, sigusr1_handler) != SIG_ERR”, sounds good? >> > > Ah, signal is a bit special with return values. Yeah, > ASSERT_NEQ(signal(...), SIG_ERR, "signal") sounds good. > >>> >>>> >>>> close(pipe_c2p[0]); /* close read */ >>>> close(pipe_p2c[1]); /* close write */ >>>> @@ -63,9 +66,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, >>>> ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); >>>> >>>> /* wait a little for signal handler */ >>>> - sleep(1); >>>> + for (int i = 0; i < 1000000000; i++) >>> >>> same about 1bln >> >> With 10mln and 100 test runs I got 86 failures >> 100mln - 63 failures >> 1bln - 0 failures on 100 runs >> >> Now, there is performance concern for this test. Running >> >> time sudo ./test_progs -t send_signal/send_signal_nmi_thread >> >> With 1bln takes ~4s >> 100mln - 1s. >> Unchanged test with sleep(1); takes ~2s. >> >> On the other hand 300mln runs ~2s, and only fails 1 time per 100 runs. As 300mln does not regress performance comparing to the current “sleep(1)” implementation, I propose to go with it. What do you think? > > > I think if we need to burn multiple seconds of CPU to make the test > reliable, then we should either rework or disable/remove the test. In > CI those billions of iterations will be much slower. And even waiting > for 4 seconds for just one test is painful. > > Yonghong, WDYT? Should we just drop thi test? It has caused us a bunch > of flakiness and maintenance burden without actually catching any > issues. Maybe it's better to just get rid of it? Could we try to set affinity for the child process here? See perf_branches.c: ... /* generate some branches on cpu 0 */ CPU_ZERO(&cpu_set); CPU_SET(0, &cpu_set); err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err)) goto out_destroy; /* spin the loop for a while (random high number) */ for (i = 0; i < 1000000; ++i) ++j; ... Binding the process (single thread) to a particular cpu can prevent other non-binding processes from migrating to this cpu and boost the chance for NMI triggered on this cpu. This could be the reason perf_branches.c (and a few other tests) does. In send_signal case, the cpu affinity probably should set to cpu 1 as cpu 0 has been pinned by previous tests for the main process and I didn't see it 'unpinned' (by setaffinity to ALL cpus). This is inconvenient. So the following is my suggestion: 1. abstract the above 'pthread_setaffinity_np to a helper to set affinity to a particular cpu as this function has been used in several cases. 2. create a new helper to undo setaffinity (set cpu mask to all available cpus) so we can pair it with pthread_setaffinity_np helper in prog_tests/... files. 3. clean up prog_tests/... files which have pthread_setaffinity_np. 4. use helpers 1/2 with loop bound 1000000 for send_signal test. The implementation here will be consistent with other NMI tests. Hopefully the test can consistent pass similar to other NMI tests. WDYT? > >> >>> >>>> + volatile_variable++; >>>> >>>> buf[0] = sigusr1_received ? '2' : '0'; >>>> + ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); >>>> + >>>> ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); >>>> >>>> /* wait for parent notification and exit */ >>>> @@ -110,9 +116,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, >>>> ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read"); >>>> >>>> /* trigger the bpf send_signal */ >>>> + skel->bss->signal_thread = signal_thread; >>>> skel->bss->pid = pid; >>>> skel->bss->sig = SIGUSR1; >>>> - skel->bss->signal_thread = signal_thread; >>>> >>>> /* notify child that bpf program can send_signal now */ >>>> ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write"); >>>> -- >>>> 2.30.2 >>
> On Feb 22, 2022, at 8:32 PM, Yonghong Song <yhs@fb.com> wrote: > > > > On 2/22/22 7:13 PM, Andrii Nakryiko wrote: >> On Tue, Feb 22, 2022 at 12:35 PM Mykola Lysenko <mykolal@fb.com> wrote: >>> >>> Thanks for the review Andrii! >>> >>>> On Feb 19, 2022, at 8:39 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: >>>> >>>> On Fri, Feb 18, 2022 at 4:30 PM Mykola Lysenko <mykolal@fb.com> wrote: >>>>> >>>>> In send_signal, replace sleep with dummy cpu intensive computation >>>>> to increase probability of child process being scheduled. Add few >>>>> more asserts. >>>>> >>>>> In find_vma, reduce sample_freq as higher values may be rejected in >>>>> some qemu setups, remove usleep and increase length of cpu intensive >>>>> computation. >>>>> >>>>> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as >>>>> higher values may be rejected in some qemu setups >>>>> >>>>> Signed-off-by: Mykola Lysenko <mykolal@fb.com> >>>>> --- >>>>> .../testing/selftests/bpf/prog_tests/bpf_cookie.c | 2 +- >>>>> tools/testing/selftests/bpf/prog_tests/find_vma.c | 5 ++--- >>>>> .../selftests/bpf/prog_tests/perf_branches.c | 4 ++-- >>>>> tools/testing/selftests/bpf/prog_tests/perf_link.c | 2 +- >>>>> .../testing/selftests/bpf/prog_tests/send_signal.c | 14 ++++++++++---- >>>>> 5 files changed, 16 insertions(+), 11 deletions(-) >>>>> >>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >>>>> index cd10df6cd0fc..0612e79a9281 100644 >>>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >>>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >>>>> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) >>>>> attr.type = PERF_TYPE_SOFTWARE; >>>>> attr.config = PERF_COUNT_SW_CPU_CLOCK; >>>>> attr.freq = 1; >>>>> - attr.sample_freq = 4000; >>>>> + attr.sample_freq = 1000; >>>>> pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); >>>>> if (!ASSERT_GE(pfd, 0, "perf_fd")) >>>>> goto cleanup; >>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c >>>>> index b74b3c0c555a..acc41223a112 100644 >>>>> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c >>>>> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c >>>>> @@ -30,7 +30,7 @@ static int open_pe(void) >>>>> attr.type = PERF_TYPE_HARDWARE; >>>>> attr.config = PERF_COUNT_HW_CPU_CYCLES; >>>>> attr.freq = 1; >>>>> - attr.sample_freq = 4000; >>>>> + attr.sample_freq = 1000; >>>>> pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); >>>>> >>>>> return pfd >= 0 ? pfd : -errno; >>>>> @@ -57,7 +57,7 @@ static void test_find_vma_pe(struct find_vma *skel) >>>>> if (!ASSERT_OK_PTR(link, "attach_perf_event")) >>>>> goto cleanup; >>>>> >>>>> - for (i = 0; i < 1000000; ++i) >>>>> + for (i = 0; i < 1000000000; ++i) >>>> >>>> 1bln seems excessive... maybe 10mln would be enough? >>> >>> See explanation for send_signal test case below >>> >>>> >>>>> ++j; >>>>> >>>>> test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */); >>>> >>>> [...] >>>> >>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c >>>>> index 776916b61c40..841217bd1df6 100644 >>>>> --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c >>>>> +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c >>>>> @@ -4,11 +4,12 @@ >>>>> #include <sys/resource.h> >>>>> #include "test_send_signal_kern.skel.h" >>>>> >>>>> -int sigusr1_received = 0; >>>>> +int sigusr1_received; >>>>> +volatile int volatile_variable; >>>> >>>> please make them static >>> >>> sure >>> >>>> >>>>> >>>>> static void sigusr1_handler(int signum) >>>>> { >>>>> - sigusr1_received++; >>>>> + sigusr1_received = 1; >>>>> } >>>>> >>>>> static void test_send_signal_common(struct perf_event_attr *attr, >>>>> @@ -42,7 +43,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, >>>>> int old_prio; >>>>> >>>>> /* install signal handler and notify parent */ >>>>> + errno = 0; >>>>> signal(SIGUSR1, sigusr1_handler); >>>>> + ASSERT_OK(errno, "signal"); >>>> >>>> just ASSERT_OK(signal(...), "signal"); >>> >>> I am fine to merge signal and ASSERT lines, but will substitute with condition "signal(SIGUSR1, sigusr1_handler) != SIG_ERR”, sounds good? >>> >> Ah, signal is a bit special with return values. Yeah, >> ASSERT_NEQ(signal(...), SIG_ERR, "signal") sounds good. >>>> >>>>> >>>>> close(pipe_c2p[0]); /* close read */ >>>>> close(pipe_p2c[1]); /* close write */ >>>>> @@ -63,9 +66,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, >>>>> ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); >>>>> >>>>> /* wait a little for signal handler */ >>>>> - sleep(1); >>>>> + for (int i = 0; i < 1000000000; i++) >>>> >>>> same about 1bln >>> >>> With 10mln and 100 test runs I got 86 failures >>> 100mln - 63 failures >>> 1bln - 0 failures on 100 runs >>> >>> Now, there is performance concern for this test. Running >>> >>> time sudo ./test_progs -t send_signal/send_signal_nmi_thread >>> >>> With 1bln takes ~4s >>> 100mln - 1s. >>> Unchanged test with sleep(1); takes ~2s. >>> >>> On the other hand 300mln runs ~2s, and only fails 1 time per 100 runs. As 300mln does not regress performance comparing to the current “sleep(1)” implementation, I propose to go with it. What do you think? >> I think if we need to burn multiple seconds of CPU to make the test >> reliable, then we should either rework or disable/remove the test. In >> CI those billions of iterations will be much slower. And even waiting >> for 4 seconds for just one test is painful. >> Yonghong, WDYT? Should we just drop thi test? It has caused us a bunch >> of flakiness and maintenance burden without actually catching any >> issues. Maybe it's better to just get rid of it? > > Could we try to set affinity for the child process here? > See perf_branches.c: > > ... > /* generate some branches on cpu 0 */ > CPU_ZERO(&cpu_set); > CPU_SET(0, &cpu_set); > err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); > if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err)) > goto out_destroy; > /* spin the loop for a while (random high number) */ > for (i = 0; i < 1000000; ++i) > ++j; > ... > > Binding the process (single thread) to a particular cpu can > prevent other non-binding processes from migrating to this > cpu and boost the chance for NMI triggered on this cpu. > This could be the reason perf_branches.c (and a few other tests) > does. > > In send_signal case, the cpu affinity probably should > set to cpu 1 as cpu 0 has been pinned by previous tests for > the main process and I didn't see it 'unpinned' > (by setaffinity to ALL cpus). > This is inconvenient. > > So the following is my suggestion: > 1. abstract the above 'pthread_setaffinity_np to > a helper to set affinity to a particular cpu as > this function has been used in several cases. > 2. create a new helper to undo setaffinity (set cpu > mask to all available cpus) so we can pair it > with pthread_setaffinity_np helper in prog_tests/... > files. > 3. clean up prog_tests/... files which have pthread_setaffinity_np. > 4. use helpers 1/2 with loop bound 1000000 for send_signal test. > The implementation here will be consistent with > other NMI tests. Hopefully the test can consistent > pass similar to other NMI tests. > > WDYT? Thanks Yonghong, let me test this. > >>> >>>> >>>>> + volatile_variable++; >>>>> >>>>> buf[0] = sigusr1_received ? '2' : '0'; >>>>> + ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); >>>>> + >>>>> ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); >>>>> >>>>> /* wait for parent notification and exit */ >>>>> @@ -110,9 +116,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, >>>>> ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read"); >>>>> >>>>> /* trigger the bpf send_signal */ >>>>> + skel->bss->signal_thread = signal_thread; >>>>> skel->bss->pid = pid; >>>>> skel->bss->sig = SIGUSR1; >>>>> - skel->bss->signal_thread = signal_thread; >>>>> >>>>> /* notify child that bpf program can send_signal now */ >>>>> ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write"); >>>>> -- >>>>> 2.30.2
> On Feb 22, 2022, at 8:32 PM, Yonghong Song <yhs@fb.com> wrote: > > > > On 2/22/22 7:13 PM, Andrii Nakryiko wrote: >> On Tue, Feb 22, 2022 at 12:35 PM Mykola Lysenko <mykolal@fb.com> wrote: >>> >>> Thanks for the review Andrii! >>> >>>> On Feb 19, 2022, at 8:39 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: >>>> >>>> On Fri, Feb 18, 2022 at 4:30 PM Mykola Lysenko <mykolal@fb.com> wrote: >>>>> >>>>> In send_signal, replace sleep with dummy cpu intensive computation >>>>> to increase probability of child process being scheduled. Add few >>>>> more asserts. >>>>> >>>>> In find_vma, reduce sample_freq as higher values may be rejected in >>>>> some qemu setups, remove usleep and increase length of cpu intensive >>>>> computation. >>>>> >>>>> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as >>>>> higher values may be rejected in some qemu setups >>>>> >>>>> Signed-off-by: Mykola Lysenko <mykolal@fb.com> >>>>> --- >>>>> .../testing/selftests/bpf/prog_tests/bpf_cookie.c | 2 +- >>>>> tools/testing/selftests/bpf/prog_tests/find_vma.c | 5 ++--- >>>>> .../selftests/bpf/prog_tests/perf_branches.c | 4 ++-- >>>>> tools/testing/selftests/bpf/prog_tests/perf_link.c | 2 +- >>>>> .../testing/selftests/bpf/prog_tests/send_signal.c | 14 ++++++++++---- >>>>> 5 files changed, 16 insertions(+), 11 deletions(-) >>>>> >>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >>>>> index cd10df6cd0fc..0612e79a9281 100644 >>>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >>>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >>>>> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) >>>>> attr.type = PERF_TYPE_SOFTWARE; >>>>> attr.config = PERF_COUNT_SW_CPU_CLOCK; >>>>> attr.freq = 1; >>>>> - attr.sample_freq = 4000; >>>>> + attr.sample_freq = 1000; >>>>> pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); >>>>> if (!ASSERT_GE(pfd, 0, "perf_fd")) >>>>> goto cleanup; >>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c >>>>> index b74b3c0c555a..acc41223a112 100644 >>>>> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c >>>>> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c >>>>> @@ -30,7 +30,7 @@ static int open_pe(void) >>>>> attr.type = PERF_TYPE_HARDWARE; >>>>> attr.config = PERF_COUNT_HW_CPU_CYCLES; >>>>> attr.freq = 1; >>>>> - attr.sample_freq = 4000; >>>>> + attr.sample_freq = 1000; >>>>> pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); >>>>> >>>>> return pfd >= 0 ? pfd : -errno; >>>>> @@ -57,7 +57,7 @@ static void test_find_vma_pe(struct find_vma *skel) >>>>> if (!ASSERT_OK_PTR(link, "attach_perf_event")) >>>>> goto cleanup; >>>>> >>>>> - for (i = 0; i < 1000000; ++i) >>>>> + for (i = 0; i < 1000000000; ++i) >>>> >>>> 1bln seems excessive... maybe 10mln would be enough? >>> >>> See explanation for send_signal test case below >>> >>>> >>>>> ++j; >>>>> >>>>> test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */); >>>> >>>> [...] >>>> >>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c >>>>> index 776916b61c40..841217bd1df6 100644 >>>>> --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c >>>>> +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c >>>>> @@ -4,11 +4,12 @@ >>>>> #include <sys/resource.h> >>>>> #include "test_send_signal_kern.skel.h" >>>>> >>>>> -int sigusr1_received = 0; >>>>> +int sigusr1_received; >>>>> +volatile int volatile_variable; >>>> >>>> please make them static >>> >>> sure >>> >>>> >>>>> >>>>> static void sigusr1_handler(int signum) >>>>> { >>>>> - sigusr1_received++; >>>>> + sigusr1_received = 1; >>>>> } >>>>> >>>>> static void test_send_signal_common(struct perf_event_attr *attr, >>>>> @@ -42,7 +43,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, >>>>> int old_prio; >>>>> >>>>> /* install signal handler and notify parent */ >>>>> + errno = 0; >>>>> signal(SIGUSR1, sigusr1_handler); >>>>> + ASSERT_OK(errno, "signal"); >>>> >>>> just ASSERT_OK(signal(...), "signal"); >>> >>> I am fine to merge signal and ASSERT lines, but will substitute with condition "signal(SIGUSR1, sigusr1_handler) != SIG_ERR”, sounds good? >>> >> Ah, signal is a bit special with return values. Yeah, >> ASSERT_NEQ(signal(...), SIG_ERR, "signal") sounds good. >>>> >>>>> >>>>> close(pipe_c2p[0]); /* close read */ >>>>> close(pipe_p2c[1]); /* close write */ >>>>> @@ -63,9 +66,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, >>>>> ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); >>>>> >>>>> /* wait a little for signal handler */ >>>>> - sleep(1); >>>>> + for (int i = 0; i < 1000000000; i++) >>>> >>>> same about 1bln >>> >>> With 10mln and 100 test runs I got 86 failures >>> 100mln - 63 failures >>> 1bln - 0 failures on 100 runs >>> >>> Now, there is performance concern for this test. Running >>> >>> time sudo ./test_progs -t send_signal/send_signal_nmi_thread >>> >>> With 1bln takes ~4s >>> 100mln - 1s. >>> Unchanged test with sleep(1); takes ~2s. >>> >>> On the other hand 300mln runs ~2s, and only fails 1 time per 100 runs. As 300mln does not regress performance comparing to the current “sleep(1)” implementation, I propose to go with it. What do you think? >> I think if we need to burn multiple seconds of CPU to make the test >> reliable, then we should either rework or disable/remove the test. In >> CI those billions of iterations will be much slower. And even waiting >> for 4 seconds for just one test is painful. >> Yonghong, WDYT? Should we just drop thi test? It has caused us a bunch >> of flakiness and maintenance burden without actually catching any >> issues. Maybe it's better to just get rid of it? > > Could we try to set affinity for the child process here? > See perf_branches.c: > > ... > /* generate some branches on cpu 0 */ > CPU_ZERO(&cpu_set); > CPU_SET(0, &cpu_set); > err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); > if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err)) > goto out_destroy; > /* spin the loop for a while (random high number) */ > for (i = 0; i < 1000000; ++i) > ++j; > ... > > Binding the process (single thread) to a particular cpu can > prevent other non-binding processes from migrating to this > cpu and boost the chance for NMI triggered on this cpu. > This could be the reason perf_branches.c (and a few other tests) > does. > > In send_signal case, the cpu affinity probably should > set to cpu 1 as cpu 0 has been pinned by previous tests for > the main process and I didn't see it 'unpinned' > (by setaffinity to ALL cpus). > This is inconvenient. > > So the following is my suggestion: > 1. abstract the above 'pthread_setaffinity_np to > a helper to set affinity to a particular cpu as > this function has been used in several cases. > 2. create a new helper to undo setaffinity (set cpu > mask to all available cpus) so we can pair it > with pthread_setaffinity_np helper in prog_tests/... > files. > 3. clean up prog_tests/... files which have pthread_setaffinity_np. > 4. use helpers 1/2 with loop bound 1000000 for send_signal test. > The implementation here will be consistent with > other NMI tests. Hopefully the test can consistent > pass similar to other NMI tests. > > WDYT? Hi Yonghong, I have tried this approach in the send_signal test without much success unfortunately (different CPUs and configurations options). It is required though for perf_branches test, yet to understand why. In the V2 of this patch, I used modified approach when we will stop crunching volatile variable when needed condition became true. I hope this will be an acceptable middle ground in this case. Thanks! > >>> >>>> >>>>> + volatile_variable++; >>>>> >>>>> buf[0] = sigusr1_received ? '2' : '0'; >>>>> + ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); >>>>> + >>>>> ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); >>>>> >>>>> /* wait for parent notification and exit */ >>>>> @@ -110,9 +116,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, >>>>> ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read"); >>>>> >>>>> /* trigger the bpf send_signal */ >>>>> + skel->bss->signal_thread = signal_thread; >>>>> skel->bss->pid = pid; >>>>> skel->bss->sig = SIGUSR1; >>>>> - skel->bss->signal_thread = signal_thread; >>>>> >>>>> /* notify child that bpf program can send_signal now */ >>>>> ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write"); >>>>> -- >>>>> 2.30.2
On 2/28/22 7:45 PM, Mykola Lysenko wrote: > > >> On Feb 22, 2022, at 8:32 PM, Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 2/22/22 7:13 PM, Andrii Nakryiko wrote: >>> On Tue, Feb 22, 2022 at 12:35 PM Mykola Lysenko <mykolal@fb.com> wrote: >>>> >>>> Thanks for the review Andrii! >>>> >>>>> On Feb 19, 2022, at 8:39 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: >>>>> >>>>> On Fri, Feb 18, 2022 at 4:30 PM Mykola Lysenko <mykolal@fb.com> wrote: >>>>>> >>>>>> In send_signal, replace sleep with dummy cpu intensive computation >>>>>> to increase probability of child process being scheduled. Add few >>>>>> more asserts. >>>>>> >>>>>> In find_vma, reduce sample_freq as higher values may be rejected in >>>>>> some qemu setups, remove usleep and increase length of cpu intensive >>>>>> computation. >>>>>> >>>>>> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as >>>>>> higher values may be rejected in some qemu setups >>>>>> >>>>>> Signed-off-by: Mykola Lysenko <mykolal@fb.com> >>>>>> --- >>>>>> .../testing/selftests/bpf/prog_tests/bpf_cookie.c | 2 +- >>>>>> tools/testing/selftests/bpf/prog_tests/find_vma.c | 5 ++--- >>>>>> .../selftests/bpf/prog_tests/perf_branches.c | 4 ++-- >>>>>> tools/testing/selftests/bpf/prog_tests/perf_link.c | 2 +- >>>>>> .../testing/selftests/bpf/prog_tests/send_signal.c | 14 ++++++++++---- >>>>>> 5 files changed, 16 insertions(+), 11 deletions(-) >>>>>> >>>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >>>>>> index cd10df6cd0fc..0612e79a9281 100644 >>>>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >>>>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c >>>>>> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) >>>>>> attr.type = PERF_TYPE_SOFTWARE; >>>>>> attr.config = PERF_COUNT_SW_CPU_CLOCK; >>>>>> attr.freq = 1; >>>>>> - attr.sample_freq = 4000; >>>>>> + attr.sample_freq = 1000; >>>>>> pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); >>>>>> if (!ASSERT_GE(pfd, 0, "perf_fd")) >>>>>> goto cleanup; >>>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c >>>>>> index b74b3c0c555a..acc41223a112 100644 >>>>>> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c >>>>>> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c >>>>>> @@ -30,7 +30,7 @@ static int open_pe(void) >>>>>> attr.type = PERF_TYPE_HARDWARE; >>>>>> attr.config = PERF_COUNT_HW_CPU_CYCLES; >>>>>> attr.freq = 1; >>>>>> - attr.sample_freq = 4000; >>>>>> + attr.sample_freq = 1000; >>>>>> pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); >>>>>> >>>>>> return pfd >= 0 ? pfd : -errno; >>>>>> @@ -57,7 +57,7 @@ static void test_find_vma_pe(struct find_vma *skel) >>>>>> if (!ASSERT_OK_PTR(link, "attach_perf_event")) >>>>>> goto cleanup; >>>>>> >>>>>> - for (i = 0; i < 1000000; ++i) >>>>>> + for (i = 0; i < 1000000000; ++i) >>>>> >>>>> 1bln seems excessive... maybe 10mln would be enough? >>>> >>>> See explanation for send_signal test case below >>>> >>>>> >>>>>> ++j; >>>>>> >>>>>> test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */); >>>>> >>>>> [...] >>>>> >>>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c >>>>>> index 776916b61c40..841217bd1df6 100644 >>>>>> --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c >>>>>> +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c >>>>>> @@ -4,11 +4,12 @@ >>>>>> #include <sys/resource.h> >>>>>> #include "test_send_signal_kern.skel.h" >>>>>> >>>>>> -int sigusr1_received = 0; >>>>>> +int sigusr1_received; >>>>>> +volatile int volatile_variable; >>>>> >>>>> please make them static >>>> >>>> sure >>>> >>>>> >>>>>> >>>>>> static void sigusr1_handler(int signum) >>>>>> { >>>>>> - sigusr1_received++; >>>>>> + sigusr1_received = 1; >>>>>> } >>>>>> >>>>>> static void test_send_signal_common(struct perf_event_attr *attr, >>>>>> @@ -42,7 +43,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, >>>>>> int old_prio; >>>>>> >>>>>> /* install signal handler and notify parent */ >>>>>> + errno = 0; >>>>>> signal(SIGUSR1, sigusr1_handler); >>>>>> + ASSERT_OK(errno, "signal"); >>>>> >>>>> just ASSERT_OK(signal(...), "signal"); >>>> >>>> I am fine to merge signal and ASSERT lines, but will substitute with condition "signal(SIGUSR1, sigusr1_handler) != SIG_ERR”, sounds good? >>>> >>> Ah, signal is a bit special with return values. Yeah, >>> ASSERT_NEQ(signal(...), SIG_ERR, "signal") sounds good. >>>>> >>>>>> >>>>>> close(pipe_c2p[0]); /* close read */ >>>>>> close(pipe_p2c[1]); /* close write */ >>>>>> @@ -63,9 +66,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, >>>>>> ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); >>>>>> >>>>>> /* wait a little for signal handler */ >>>>>> - sleep(1); >>>>>> + for (int i = 0; i < 1000000000; i++) >>>>> >>>>> same about 1bln >>>> >>>> With 10mln and 100 test runs I got 86 failures >>>> 100mln - 63 failures >>>> 1bln - 0 failures on 100 runs >>>> >>>> Now, there is performance concern for this test. Running >>>> >>>> time sudo ./test_progs -t send_signal/send_signal_nmi_thread >>>> >>>> With 1bln takes ~4s >>>> 100mln - 1s. >>>> Unchanged test with sleep(1); takes ~2s. >>>> >>>> On the other hand 300mln runs ~2s, and only fails 1 time per 100 runs. As 300mln does not regress performance comparing to the current “sleep(1)” implementation, I propose to go with it. What do you think? >>> I think if we need to burn multiple seconds of CPU to make the test >>> reliable, then we should either rework or disable/remove the test. In >>> CI those billions of iterations will be much slower. And even waiting >>> for 4 seconds for just one test is painful. >>> Yonghong, WDYT? Should we just drop thi test? It has caused us a bunch >>> of flakiness and maintenance burden without actually catching any >>> issues. Maybe it's better to just get rid of it? >> >> Could we try to set affinity for the child process here? >> See perf_branches.c: >> >> ... >> /* generate some branches on cpu 0 */ >> CPU_ZERO(&cpu_set); >> CPU_SET(0, &cpu_set); >> err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); >> if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err)) >> goto out_destroy; >> /* spin the loop for a while (random high number) */ >> for (i = 0; i < 1000000; ++i) >> ++j; >> ... >> >> Binding the process (single thread) to a particular cpu can >> prevent other non-binding processes from migrating to this >> cpu and boost the chance for NMI triggered on this cpu. >> This could be the reason perf_branches.c (and a few other tests) >> does. >> >> In send_signal case, the cpu affinity probably should >> set to cpu 1 as cpu 0 has been pinned by previous tests for >> the main process and I didn't see it 'unpinned' >> (by setaffinity to ALL cpus). >> This is inconvenient. >> >> So the following is my suggestion: >> 1. abstract the above 'pthread_setaffinity_np to >> a helper to set affinity to a particular cpu as >> this function has been used in several cases. >> 2. create a new helper to undo setaffinity (set cpu >> mask to all available cpus) so we can pair it >> with pthread_setaffinity_np helper in prog_tests/... >> files. >> 3. clean up prog_tests/... files which have pthread_setaffinity_np. >> 4. use helpers 1/2 with loop bound 1000000 for send_signal test. >> The implementation here will be consistent with >> other NMI tests. Hopefully the test can consistent >> pass similar to other NMI tests. >> >> WDYT? > > Hi Yonghong, > > I have tried this approach in the send_signal test without much success unfortunately (different CPUs and configurations options). It is required though for perf_branches test, yet to understand why. Thanks for experiments. I looked at the code again. Indeed pthread_setaffinity_np is not needed for send_signal test. This is because we use perf_event_open pid/cpu config like below: pid > 0 and cpu == -1 This measures the specified process/thread on any CPU. For perf_branches, pthread_setaffinity_np is needed since it uses the perf_evnet_open pid/cpu config like below: pid == -1 and cpu >= 0 This measures all processes/threads on the specified CPU. This requires CAP_SYS_ADMIN capabil‐ ity or a /proc/sys/kernel/perf_event_paranoid value of less than 1. > > In the V2 of this patch, I used modified approach when we will stop crunching volatile variable when needed condition became true. I hope this will be an acceptable middle ground in this case. My current setup is using qemu on a physical server and cannot reproduce the issue. So I created another setup which uses qemu on a VM itseld and can actually reproduce the issue. Replacing the sleep(1) with for (int i = 0; i < 1000000000; i++) /* 1billion */ j++; /* volatile int j */ seems fixing the issue. But your change for (int i = 0; i < 100000000 && !sigusr1_received; i++) volatile_variable /= i + 1; works too and I tested it and in most cases the time for the subtest is 0.8x or 0.9x seconds. Sometimes it can be < 0.5 seconds, and occasionally it may be 1.0x seconds. Overall, this is definitely an improvement for fixing flakiness and better runtime. > > Thanks! > >> >>>> >>>>> >>>>>> + volatile_variable++; >>>>>> >>>>>> buf[0] = sigusr1_received ? '2' : '0'; >>>>>> + ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); >>>>>> + >>>>>> ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); >>>>>> >>>>>> /* wait for parent notification and exit */ >>>>>> @@ -110,9 +116,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, >>>>>> ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read"); >>>>>> >>>>>> /* trigger the bpf send_signal */ >>>>>> + skel->bss->signal_thread = signal_thread; >>>>>> skel->bss->pid = pid; >>>>>> skel->bss->sig = SIGUSR1; >>>>>> - skel->bss->signal_thread = signal_thread; >>>>>> >>>>>> /* notify child that bpf program can send_signal now */ >>>>>> ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write"); >>>>>> -- >>>>>> 2.30.2 >
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index cd10df6cd0fc..0612e79a9281 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CPU_CLOCK; attr.freq = 1; - attr.sample_freq = 4000; + attr.sample_freq = 1000; pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); if (!ASSERT_GE(pfd, 0, "perf_fd")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c index b74b3c0c555a..acc41223a112 100644 --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c @@ -30,7 +30,7 @@ static int open_pe(void) attr.type = PERF_TYPE_HARDWARE; attr.config = PERF_COUNT_HW_CPU_CYCLES; attr.freq = 1; - attr.sample_freq = 4000; + attr.sample_freq = 1000; pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); return pfd >= 0 ? pfd : -errno; @@ -57,7 +57,7 @@ static void test_find_vma_pe(struct find_vma *skel) if (!ASSERT_OK_PTR(link, "attach_perf_event")) goto cleanup; - for (i = 0; i < 1000000; ++i) + for (i = 0; i < 1000000000; ++i) ++j; test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */); @@ -108,7 +108,6 @@ void serial_test_find_vma(void) skel->bss->addr = (__u64)(uintptr_t)test_find_vma_pe; test_find_vma_pe(skel); - usleep(100000); /* allow the irq_work to finish */ test_find_vma_kprobe(skel); find_vma__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c index 12c4f45cee1a..bc24f83339d6 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c @@ -110,7 +110,7 @@ static void test_perf_branches_hw(void) attr.type = PERF_TYPE_HARDWARE; attr.config = PERF_COUNT_HW_CPU_CYCLES; attr.freq = 1; - attr.sample_freq = 4000; + attr.sample_freq = 1000; attr.sample_type = PERF_SAMPLE_BRANCH_STACK; attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY; pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); @@ -151,7 +151,7 @@ static void test_perf_branches_no_hw(void) attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CPU_CLOCK; attr.freq = 1; - attr.sample_freq = 4000; + attr.sample_freq = 1000; pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); if (CHECK(pfd < 0, "perf_event_open", "err %d\n", pfd)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c index ede07344f264..224eba6fef2e 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_link.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c @@ -39,7 +39,7 @@ void serial_test_perf_link(void) attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CPU_CLOCK; attr.freq = 1; - attr.sample_freq = 4000; + attr.sample_freq = 1000; pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); if (!ASSERT_GE(pfd, 0, "perf_fd")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 776916b61c40..841217bd1df6 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -4,11 +4,12 @@ #include <sys/resource.h> #include "test_send_signal_kern.skel.h" -int sigusr1_received = 0; +int sigusr1_received; +volatile int volatile_variable; static void sigusr1_handler(int signum) { - sigusr1_received++; + sigusr1_received = 1; } static void test_send_signal_common(struct perf_event_attr *attr, @@ -42,7 +43,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, int old_prio; /* install signal handler and notify parent */ + errno = 0; signal(SIGUSR1, sigusr1_handler); + ASSERT_OK(errno, "signal"); close(pipe_c2p[0]); /* close read */ close(pipe_p2c[1]); /* close write */ @@ -63,9 +66,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); /* wait a little for signal handler */ - sleep(1); + for (int i = 0; i < 1000000000; i++) + volatile_variable++; buf[0] = sigusr1_received ? '2' : '0'; + ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); + ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); /* wait for parent notification and exit */ @@ -110,9 +116,9 @@ static void test_send_signal_common(struct perf_event_attr *attr, ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read"); /* trigger the bpf send_signal */ + skel->bss->signal_thread = signal_thread; skel->bss->pid = pid; skel->bss->sig = SIGUSR1; - skel->bss->signal_thread = signal_thread; /* notify child that bpf program can send_signal now */ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
In send_signal, replace sleep with dummy cpu intensive computation to increase probability of child process being scheduled. Add few more asserts. In find_vma, reduce sample_freq as higher values may be rejected in some qemu setups, remove usleep and increase length of cpu intensive computation. In bpf_cookie, perf_link and perf_branches, reduce sample_freq as higher values may be rejected in some qemu setups Signed-off-by: Mykola Lysenko <mykolal@fb.com> --- .../testing/selftests/bpf/prog_tests/bpf_cookie.c | 2 +- tools/testing/selftests/bpf/prog_tests/find_vma.c | 5 ++--- .../selftests/bpf/prog_tests/perf_branches.c | 4 ++-- tools/testing/selftests/bpf/prog_tests/perf_link.c | 2 +- .../testing/selftests/bpf/prog_tests/send_signal.c | 14 ++++++++++---- 5 files changed, 16 insertions(+), 11 deletions(-)