diff mbox series

[v3,bpf-next] Improve BPF test stability (related to perf events and scheduling)

Message ID 20220302212735.3412041-1-mykolal@fb.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [v3,bpf-next] Improve BPF test stability (related to perf events and scheduling) | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 9 maintainers not CCed: linux-kselftest@vger.kernel.org sunyucong@gmail.com kpsingh@kernel.org john.fastabend@gmail.com kafai@fb.com shuah@kernel.org songliubraving@fb.com netdev@vger.kernel.org toke@redhat.com
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: Use of volatile is usually wrong: see Documentation/process/volatile-considered-harmful.rst
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next success VM_Test

Commit Message

Mykola Lysenko March 2, 2022, 9:27 p.m. UTC
In send_signal, replace sleep with dummy cpu intensive computation
to increase probability of child process being scheduled. Add few
more asserts.

In find_vma, reduce sample_freq as higher values may be rejected in
some qemu setups, remove usleep and increase length of cpu intensive
computation.

In bpf_cookie, perf_link and perf_branches, reduce sample_freq as
higher values may be rejected in some qemu setups

Signed-off-by: Mykola Lysenko <mykolal@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
---
 .../selftests/bpf/prog_tests/bpf_cookie.c       |  2 +-
 .../testing/selftests/bpf/prog_tests/find_vma.c | 13 ++++++++++---
 .../selftests/bpf/prog_tests/perf_branches.c    |  4 ++--
 .../selftests/bpf/prog_tests/perf_link.c        |  2 +-
 .../selftests/bpf/prog_tests/send_signal.c      | 17 ++++++++++-------
 .../selftests/bpf/progs/test_send_signal_kern.c |  2 +-
 6 files changed, 25 insertions(+), 15 deletions(-)

Comments

Daniel Borkmann March 3, 2022, 3:36 p.m. UTC | #1
On 3/2/22 10:27 PM, Mykola Lysenko wrote:
> In send_signal, replace sleep with dummy cpu intensive computation
> to increase probability of child process being scheduled. Add few
> more asserts.
> 
> In find_vma, reduce sample_freq as higher values may be rejected in
> some qemu setups, remove usleep and increase length of cpu intensive
> computation.
> 
> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as
> higher values may be rejected in some qemu setups
> 
> Signed-off-by: Mykola Lysenko <mykolal@fb.com>
> Acked-by: Yonghong Song <yhs@fb.com>
> ---
>   .../selftests/bpf/prog_tests/bpf_cookie.c       |  2 +-
>   .../testing/selftests/bpf/prog_tests/find_vma.c | 13 ++++++++++---
>   .../selftests/bpf/prog_tests/perf_branches.c    |  4 ++--
>   .../selftests/bpf/prog_tests/perf_link.c        |  2 +-
>   .../selftests/bpf/prog_tests/send_signal.c      | 17 ++++++++++-------
>   .../selftests/bpf/progs/test_send_signal_kern.c |  2 +-
>   6 files changed, 25 insertions(+), 15 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
> index cd10df6cd0fc..0612e79a9281 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
>   	attr.type = PERF_TYPE_SOFTWARE;
>   	attr.config = PERF_COUNT_SW_CPU_CLOCK;
>   	attr.freq = 1;
> -	attr.sample_freq = 4000;
> +	attr.sample_freq = 1000;
>   	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
>   	if (!ASSERT_GE(pfd, 0, "perf_fd"))
>   		goto cleanup;
> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
> index b74b3c0c555a..7cf4feb6464c 100644
> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c
> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
> @@ -30,12 +30,20 @@ static int open_pe(void)
>   	attr.type = PERF_TYPE_HARDWARE;
>   	attr.config = PERF_COUNT_HW_CPU_CYCLES;
>   	attr.freq = 1;
> -	attr.sample_freq = 4000;
> +	attr.sample_freq = 1000;
>   	pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
>   
>   	return pfd >= 0 ? pfd : -errno;
>   }
>   
> +static bool find_vma_pe_condition(struct find_vma *skel)
> +{
> +	return skel->bss->found_vm_exec == 0 ||
> +		skel->data->find_addr_ret != 0 ||

Should this not test for `skel->data->find_addr_ret == -1` ?

> +		skel->data->find_zero_ret == -1 ||
> +		strcmp(skel->bss->d_iname, "test_progs") != 0;
> +}
> +
Thanks,
Daniel
Mykola Lysenko March 3, 2022, 5:29 p.m. UTC | #2
> On Mar 3, 2022, at 7:36 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
> 
> On 3/2/22 10:27 PM, Mykola Lysenko wrote:
>> In send_signal, replace sleep with dummy cpu intensive computation
>> to increase probability of child process being scheduled. Add few
>> more asserts.
>> In find_vma, reduce sample_freq as higher values may be rejected in
>> some qemu setups, remove usleep and increase length of cpu intensive
>> computation.
>> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as
>> higher values may be rejected in some qemu setups
>> Signed-off-by: Mykola Lysenko <mykolal@fb.com>
>> Acked-by: Yonghong Song <yhs@fb.com>
>> ---
>>  .../selftests/bpf/prog_tests/bpf_cookie.c       |  2 +-
>>  .../testing/selftests/bpf/prog_tests/find_vma.c | 13 ++++++++++---
>>  .../selftests/bpf/prog_tests/perf_branches.c    |  4 ++--
>>  .../selftests/bpf/prog_tests/perf_link.c        |  2 +-
>>  .../selftests/bpf/prog_tests/send_signal.c      | 17 ++++++++++-------
>>  .../selftests/bpf/progs/test_send_signal_kern.c |  2 +-
>>  6 files changed, 25 insertions(+), 15 deletions(-)
>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>> index cd10df6cd0fc..0612e79a9281 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
>>  	attr.type = PERF_TYPE_SOFTWARE;
>>  	attr.config = PERF_COUNT_SW_CPU_CLOCK;
>>  	attr.freq = 1;
>> -	attr.sample_freq = 4000;
>> +	attr.sample_freq = 1000;
>>  	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
>>  	if (!ASSERT_GE(pfd, 0, "perf_fd"))
>>  		goto cleanup;
>> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
>> index b74b3c0c555a..7cf4feb6464c 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
>> @@ -30,12 +30,20 @@ static int open_pe(void)
>>  	attr.type = PERF_TYPE_HARDWARE;
>>  	attr.config = PERF_COUNT_HW_CPU_CYCLES;
>>  	attr.freq = 1;
>> -	attr.sample_freq = 4000;
>> +	attr.sample_freq = 1000;
>>  	pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
>>    	return pfd >= 0 ? pfd : -errno;
>>  }
>>  +static bool find_vma_pe_condition(struct find_vma *skel)
>> +{
>> +	return skel->bss->found_vm_exec == 0 ||
>> +		skel->data->find_addr_ret != 0 ||
> 
> Should this not test for `skel->data->find_addr_ret == -1` ?

It seems that find_addr_ret changes value few times until it gets to 0. I added print statements when value is changed:

find_addr_ret -1 => initial value
find_addr_ret -16 => -EBUSY
find_addr_ret 0 => final value

Hence, in this case I think it is better to wait for the final value. We do have time out in the loop anyways (when “i" reaches 1bn), so test would not get stuck.

TL:DR change in the test that prints these values

-       for (i = 0; i < 1000000000 && find_vma_pe_condition(skel); ++i)
+       int find_addr_ret = -1;
+       printf("find_addr_ret %d\n", skel->data->find_addr_ret);
+
+       for (i = 0; i < 1000000000 && find_vma_pe_condition(skel); ++i) {
+               if (find_addr_ret != skel->data->find_addr_ret) {
+                       find_addr_ret = skel->data->find_addr_ret;
+                       printf("find_addr_ret %d\n", skel->data->find_addr_ret);
+               }
                ++j;
+       }
+
+       printf("find_addr_ret %d\n", skel->data->find_addr_ret);

> 
>> +		skel->data->find_zero_ret == -1 ||
>> +		strcmp(skel->bss->d_iname, "test_progs") != 0;
>> +}
>> +
> Thanks,
> Daniel
Yonghong Song March 3, 2022, 8:31 p.m. UTC | #3
On 3/3/22 9:29 AM, Mykola Lysenko wrote:
> 
> 
>> On Mar 3, 2022, at 7:36 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
>>
>> On 3/2/22 10:27 PM, Mykola Lysenko wrote:
>>> In send_signal, replace sleep with dummy cpu intensive computation
>>> to increase probability of child process being scheduled. Add few
>>> more asserts.
>>> In find_vma, reduce sample_freq as higher values may be rejected in
>>> some qemu setups, remove usleep and increase length of cpu intensive
>>> computation.
>>> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as
>>> higher values may be rejected in some qemu setups
>>> Signed-off-by: Mykola Lysenko <mykolal@fb.com>
>>> Acked-by: Yonghong Song <yhs@fb.com>
>>> ---
>>>   .../selftests/bpf/prog_tests/bpf_cookie.c       |  2 +-
>>>   .../testing/selftests/bpf/prog_tests/find_vma.c | 13 ++++++++++---
>>>   .../selftests/bpf/prog_tests/perf_branches.c    |  4 ++--
>>>   .../selftests/bpf/prog_tests/perf_link.c        |  2 +-
>>>   .../selftests/bpf/prog_tests/send_signal.c      | 17 ++++++++++-------
>>>   .../selftests/bpf/progs/test_send_signal_kern.c |  2 +-
>>>   6 files changed, 25 insertions(+), 15 deletions(-)
>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>>> index cd10df6cd0fc..0612e79a9281 100644
>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>>> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
>>>   	attr.type = PERF_TYPE_SOFTWARE;
>>>   	attr.config = PERF_COUNT_SW_CPU_CLOCK;
>>>   	attr.freq = 1;
>>> -	attr.sample_freq = 4000;
>>> +	attr.sample_freq = 1000;
>>>   	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
>>>   	if (!ASSERT_GE(pfd, 0, "perf_fd"))
>>>   		goto cleanup;
>>> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
>>> index b74b3c0c555a..7cf4feb6464c 100644
>>> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c
>>> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
>>> @@ -30,12 +30,20 @@ static int open_pe(void)
>>>   	attr.type = PERF_TYPE_HARDWARE;
>>>   	attr.config = PERF_COUNT_HW_CPU_CYCLES;
>>>   	attr.freq = 1;
>>> -	attr.sample_freq = 4000;
>>> +	attr.sample_freq = 1000;
>>>   	pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
>>>     	return pfd >= 0 ? pfd : -errno;
>>>   }
>>>   +static bool find_vma_pe_condition(struct find_vma *skel)
>>> +{
>>> +	return skel->bss->found_vm_exec == 0 ||
>>> +		skel->data->find_addr_ret != 0 ||
>>
>> Should this not test for `skel->data->find_addr_ret == -1` ?
> 
> It seems that find_addr_ret changes value few times until it gets to 0. I added print statements when value is changed:
> 
> find_addr_ret -1 => initial value
> find_addr_ret -16 => -EBUSY
> find_addr_ret 0 => final value
> 
> Hence, in this case I think it is better to wait for the final value. We do have time out in the loop anyways (when “i" reaches 1bn), so test would not get stuck.

Thanks for the above information. I read the code again. I think it is 
more complicated than above. Let us look at the bpf program:

SEC("perf_event")
int handle_pe(void)
{
         struct task_struct *task = bpf_get_current_task_btf();
         struct callback_ctx data = {};

         if (task->pid != target_pid)
                 return 0;

         find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0);

         /* In NMI, this should return -EBUSY, as the previous call is using
          * the irq_work.
          */
         find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0);
         return 0;
}

Assuming task->pid == target_pid,
the first bpf program call should have
     find_addr_ret = 0     /* lock irq_work */
     find_zero_ret = -EBUSY

For the second bpf program call, there are two possibilities:
    . irq_work is unlocked, so the result will find_addr_ret = 0, 
find_zero_ret = -EBUSY
    . or irq_work is still locked, the result will be find_addr_ret = 
-EBUSY, find_zero_ret = -EBUSY

the third bpf program call will be similar to the second bpf program run.

So final validation check probably should check both 0 and -EBUSY
for find_addr_ret.

Leaving some time to potentially unlock the irq_work as in the original
code is still needed to prevent potential problem for the subsequent tests.

I think this patch can be broke into three separate commits:
   - find_vma fix
   - send_signal fix
   - other
to make changes a little bit focused.

> 
> TL:DR change in the test that prints these values
> 
> -       for (i = 0; i < 1000000000 && find_vma_pe_condition(skel); ++i)
> +       int find_addr_ret = -1;
> +       printf("find_addr_ret %d\n", skel->data->find_addr_ret);
> +
> +       for (i = 0; i < 1000000000 && find_vma_pe_condition(skel); ++i) {
> +               if (find_addr_ret != skel->data->find_addr_ret) {
> +                       find_addr_ret = skel->data->find_addr_ret;
> +                       printf("find_addr_ret %d\n", skel->data->find_addr_ret);
> +               }
>                  ++j;
> +       }
> +
> +       printf("find_addr_ret %d\n", skel->data->find_addr_ret);
> 
>>
>>> +		skel->data->find_zero_ret == -1 ||
>>> +		strcmp(skel->bss->d_iname, "test_progs") != 0;
>>> +}
>>> +
>> Thanks,
>> Daniel
>
Mykola Lysenko March 8, 2022, 5:29 p.m. UTC | #4
Thanks Yonghong,

Sorry for the delay in here.

I have split commits into 3 as you asked. Will send it out shortly. Have few questions below re: find_vma test.

> On Mar 3, 2022, at 12:31 PM, Yonghong Song <yhs@fb.com> wrote:
> 
> 
> 
> On 3/3/22 9:29 AM, Mykola Lysenko wrote:
>>> On Mar 3, 2022, at 7:36 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
>>> 
>>> On 3/2/22 10:27 PM, Mykola Lysenko wrote:
>>>> In send_signal, replace sleep with dummy cpu intensive computation
>>>> to increase probability of child process being scheduled. Add few
>>>> more asserts.
>>>> In find_vma, reduce sample_freq as higher values may be rejected in
>>>> some qemu setups, remove usleep and increase length of cpu intensive
>>>> computation.
>>>> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as
>>>> higher values may be rejected in some qemu setups
>>>> Signed-off-by: Mykola Lysenko <mykolal@fb.com>
>>>> Acked-by: Yonghong Song <yhs@fb.com>
>>>> ---
>>>>  .../selftests/bpf/prog_tests/bpf_cookie.c       |  2 +-
>>>>  .../testing/selftests/bpf/prog_tests/find_vma.c | 13 ++++++++++---
>>>>  .../selftests/bpf/prog_tests/perf_branches.c    |  4 ++--
>>>>  .../selftests/bpf/prog_tests/perf_link.c        |  2 +-
>>>>  .../selftests/bpf/prog_tests/send_signal.c      | 17 ++++++++++-------
>>>>  .../selftests/bpf/progs/test_send_signal_kern.c |  2 +-
>>>>  6 files changed, 25 insertions(+), 15 deletions(-)
>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>>>> index cd10df6cd0fc..0612e79a9281 100644
>>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>>>> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
>>>>  	attr.type = PERF_TYPE_SOFTWARE;
>>>>  	attr.config = PERF_COUNT_SW_CPU_CLOCK;
>>>>  	attr.freq = 1;
>>>> -	attr.sample_freq = 4000;
>>>> +	attr.sample_freq = 1000;
>>>>  	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
>>>>  	if (!ASSERT_GE(pfd, 0, "perf_fd"))
>>>>  		goto cleanup;
>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
>>>> index b74b3c0c555a..7cf4feb6464c 100644
>>>> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c
>>>> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
>>>> @@ -30,12 +30,20 @@ static int open_pe(void)
>>>>  	attr.type = PERF_TYPE_HARDWARE;
>>>>  	attr.config = PERF_COUNT_HW_CPU_CYCLES;
>>>>  	attr.freq = 1;
>>>> -	attr.sample_freq = 4000;
>>>> +	attr.sample_freq = 1000;
>>>>  	pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
>>>>    	return pfd >= 0 ? pfd : -errno;
>>>>  }
>>>>  +static bool find_vma_pe_condition(struct find_vma *skel)
>>>> +{
>>>> +	return skel->bss->found_vm_exec == 0 ||
>>>> +		skel->data->find_addr_ret != 0 ||
>>> 
>>> Should this not test for `skel->data->find_addr_ret == -1` ?
>> It seems that find_addr_ret changes value few times until it gets to 0. I added print statements when value is changed:
>> find_addr_ret -1 => initial value
>> find_addr_ret -16 => -EBUSY
>> find_addr_ret 0 => final value
>> Hence, in this case I think it is better to wait for the final value. We do have time out in the loop anyways (when “i" reaches 1bn), so test would not get stuck.
> 
> Thanks for the above information. I read the code again. I think it is more complicated than above. Let us look at the bpf program:
> 
> SEC("perf_event")
> int handle_pe(void)
> {
>        struct task_struct *task = bpf_get_current_task_btf();
>        struct callback_ctx data = {};
> 
>        if (task->pid != target_pid)
>                return 0;
> 
>        find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0);
> 
>        /* In NMI, this should return -EBUSY, as the previous call is using
>         * the irq_work.
>         */
>        find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0);
>        return 0;
> }
> 
> Assuming task->pid == target_pid,
> the first bpf program call should have
>    find_addr_ret = 0     /* lock irq_work */
>    find_zero_ret = -EBUSY
> 
> For the second bpf program call, there are two possibilities:
>   . irq_work is unlocked, so the result will find_addr_ret = 0, find_zero_ret = -EBUSY
>   . or irq_work is still locked, the result will be find_addr_ret = -EBUSY, find_zero_ret = -EBUSY
> 
> the third bpf program call will be similar to the second bpf program run.
> 
> So final validation check probably should check both 0 and -EBUSY
> for find_addr_ret.
> 

Do you mean we need to add additional test in test_and_reset_skel function or in find_vma_pe_condition?

Do we really need to do final check for skel->data->find_addr_ret in test_and_reset_skel if we already confirmed
It became 0 previously?


> Leaving some time to potentially unlock the irq_work as in the original
> code is still needed to prevent potential problem for the subsequent tests.

By leaving some time, do you mean to revert removal of the next line in serial_test_find_vma function?
usleep(100000); /* allow the irq_work to finish */

> 
> I think this patch can be broke into three separate commits:
>  - find_vma fix
>  - send_signal fix
>  - other
> to make changes a little bit focused.
> 
>> TL:DR change in the test that prints these values
>> -       for (i = 0; i < 1000000000 && find_vma_pe_condition(skel); ++i)
>> +       int find_addr_ret = -1;
>> +       printf("find_addr_ret %d\n", skel->data->find_addr_ret);
>> +
>> +       for (i = 0; i < 1000000000 && find_vma_pe_condition(skel); ++i) {
>> +               if (find_addr_ret != skel->data->find_addr_ret) {
>> +                       find_addr_ret = skel->data->find_addr_ret;
>> +                       printf("find_addr_ret %d\n", skel->data->find_addr_ret);
>> +               }
>>                 ++j;
>> +       }
>> +
>> +       printf("find_addr_ret %d\n", skel->data->find_addr_ret);
>>> 
>>>> +		skel->data->find_zero_ret == -1 ||
>>>> +		strcmp(skel->bss->d_iname, "test_progs") != 0;
>>>> +}
>>>> +
>>> Thanks,
>>> Daniel
Yonghong Song March 8, 2022, 6:10 p.m. UTC | #5
On 3/8/22 9:29 AM, Mykola Lysenko wrote:
> Thanks Yonghong,
> 
> Sorry for the delay in here.
> 
> I have split commits into 3 as you asked. Will send it out shortly. Have few questions below re: find_vma test.
> 
>> On Mar 3, 2022, at 12:31 PM, Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 3/3/22 9:29 AM, Mykola Lysenko wrote:
>>>> On Mar 3, 2022, at 7:36 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
>>>>
>>>> On 3/2/22 10:27 PM, Mykola Lysenko wrote:
>>>>> In send_signal, replace sleep with dummy cpu intensive computation
>>>>> to increase probability of child process being scheduled. Add few
>>>>> more asserts.
>>>>> In find_vma, reduce sample_freq as higher values may be rejected in
>>>>> some qemu setups, remove usleep and increase length of cpu intensive
>>>>> computation.
>>>>> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as
>>>>> higher values may be rejected in some qemu setups
>>>>> Signed-off-by: Mykola Lysenko <mykolal@fb.com>
>>>>> Acked-by: Yonghong Song <yhs@fb.com>
>>>>> ---
>>>>>   .../selftests/bpf/prog_tests/bpf_cookie.c       |  2 +-
>>>>>   .../testing/selftests/bpf/prog_tests/find_vma.c | 13 ++++++++++---
>>>>>   .../selftests/bpf/prog_tests/perf_branches.c    |  4 ++--
>>>>>   .../selftests/bpf/prog_tests/perf_link.c        |  2 +-
>>>>>   .../selftests/bpf/prog_tests/send_signal.c      | 17 ++++++++++-------
>>>>>   .../selftests/bpf/progs/test_send_signal_kern.c |  2 +-
>>>>>   6 files changed, 25 insertions(+), 15 deletions(-)
>>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>>>>> index cd10df6cd0fc..0612e79a9281 100644
>>>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>>>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>>>>> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
>>>>>   	attr.type = PERF_TYPE_SOFTWARE;
>>>>>   	attr.config = PERF_COUNT_SW_CPU_CLOCK;
>>>>>   	attr.freq = 1;
>>>>> -	attr.sample_freq = 4000;
>>>>> +	attr.sample_freq = 1000;
>>>>>   	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
>>>>>   	if (!ASSERT_GE(pfd, 0, "perf_fd"))
>>>>>   		goto cleanup;
>>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
>>>>> index b74b3c0c555a..7cf4feb6464c 100644
>>>>> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c
>>>>> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
>>>>> @@ -30,12 +30,20 @@ static int open_pe(void)
>>>>>   	attr.type = PERF_TYPE_HARDWARE;
>>>>>   	attr.config = PERF_COUNT_HW_CPU_CYCLES;
>>>>>   	attr.freq = 1;
>>>>> -	attr.sample_freq = 4000;
>>>>> +	attr.sample_freq = 1000;
>>>>>   	pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
>>>>>     	return pfd >= 0 ? pfd : -errno;
>>>>>   }
>>>>>   +static bool find_vma_pe_condition(struct find_vma *skel)
>>>>> +{
>>>>> +	return skel->bss->found_vm_exec == 0 ||
>>>>> +		skel->data->find_addr_ret != 0 ||
>>>>
>>>> Should this not test for `skel->data->find_addr_ret == -1` ?
>>> It seems that find_addr_ret changes value few times until it gets to 0. I added print statements when value is changed:
>>> find_addr_ret -1 => initial value
>>> find_addr_ret -16 => -EBUSY
>>> find_addr_ret 0 => final value
>>> Hence, in this case I think it is better to wait for the final value. We do have time out in the loop anyways (when “i" reaches 1bn), so test would not get stuck.
>>
>> Thanks for the above information. I read the code again. I think it is more complicated than above. Let us look at the bpf program:
>>
>> SEC("perf_event")
>> int handle_pe(void)
>> {
>>         struct task_struct *task = bpf_get_current_task_btf();
>>         struct callback_ctx data = {};
>>
>>         if (task->pid != target_pid)
>>                 return 0;
>>
>>         find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0);
>>
>>         /* In NMI, this should return -EBUSY, as the previous call is using
>>          * the irq_work.
>>          */
>>         find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0);
>>         return 0;
>> }
>>
>> Assuming task->pid == target_pid,
>> the first bpf program call should have
>>     find_addr_ret = 0     /* lock irq_work */
>>     find_zero_ret = -EBUSY
>>
>> For the second bpf program call, there are two possibilities:
>>    . irq_work is unlocked, so the result will find_addr_ret = 0, find_zero_ret = -EBUSY
>>    . or irq_work is still locked, the result will be find_addr_ret = -EBUSY, find_zero_ret = -EBUSY
>>
>> the third bpf program call will be similar to the second bpf program run.
>>
>> So final validation check probably should check both 0 and -EBUSY
>> for find_addr_ret.
>>
> 
> Do you mean we need to add additional test in test_and_reset_skel function or in find_vma_pe_condition?

No. There is no need for an additional test.

> 
> Do we really need to do final check for skel->data->find_addr_ret in test_and_reset_skel if we already confirmed
> It became 0 previously?

Good point. Yes and no.
If we did hit find_vma_pe_condition(skel) is false, then we don't need 
to do subsequent checking.
But if we didn't, checking is still needed to ensure the error is 
printed out.
You could refactor the code such that only if 
find_vma_pe_condition(skel) is false and no subsequent checking, or
unconditionally subsequent checking.
Either way is fine with me.

> 
> 
>> Leaving some time to potentially unlock the irq_work as in the original
>> code is still needed to prevent potential problem for the subsequent tests.
> 
> By leaving some time, do you mean to revert removal of the next line in serial_test_find_vma function?
> usleep(100000); /* allow the irq_work to finish */

Yes.

> 
>>
>> I think this patch can be broke into three separate commits:
>>   - find_vma fix
>>   - send_signal fix
>>   - other
>> to make changes a little bit focused.
>>
>>> TL:DR change in the test that prints these values
>>> -       for (i = 0; i < 1000000000 && find_vma_pe_condition(skel); ++i)
>>> +       int find_addr_ret = -1;
>>> +       printf("find_addr_ret %d\n", skel->data->find_addr_ret);
>>> +
>>> +       for (i = 0; i < 1000000000 && find_vma_pe_condition(skel); ++i) {
>>> +               if (find_addr_ret != skel->data->find_addr_ret) {
>>> +                       find_addr_ret = skel->data->find_addr_ret;
>>> +                       printf("find_addr_ret %d\n", skel->data->find_addr_ret);
>>> +               }
>>>                  ++j;
>>> +       }
>>> +
>>> +       printf("find_addr_ret %d\n", skel->data->find_addr_ret);
>>>>
>>>>> +		skel->data->find_zero_ret == -1 ||
>>>>> +		strcmp(skel->bss->d_iname, "test_progs") != 0;
>>>>> +}
>>>>> +
>>>> Thanks,
>>>> Daniel
>
sunyucong@gmail.com March 8, 2022, 8:19 p.m. UTC | #6
On Wed, Mar 2, 2022 at 3:53 PM Mykola Lysenko <mykolal@fb.com> wrote:
>
> In send_signal, replace sleep with dummy cpu intensive computation
> to increase probability of child process being scheduled. Add few
> more asserts.
>
> In find_vma, reduce sample_freq as higher values may be rejected in
> some qemu setups, remove usleep and increase length of cpu intensive
> computation.
>
> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as
> higher values may be rejected in some qemu setups
>
> Signed-off-by: Mykola Lysenko <mykolal@fb.com>
> Acked-by: Yonghong Song <yhs@fb.com>
> ---
>  .../selftests/bpf/prog_tests/bpf_cookie.c       |  2 +-
>  .../testing/selftests/bpf/prog_tests/find_vma.c | 13 ++++++++++---
>  .../selftests/bpf/prog_tests/perf_branches.c    |  4 ++--
>  .../selftests/bpf/prog_tests/perf_link.c        |  2 +-
>  .../selftests/bpf/prog_tests/send_signal.c      | 17 ++++++++++-------
>  .../selftests/bpf/progs/test_send_signal_kern.c |  2 +-
>  6 files changed, 25 insertions(+), 15 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
> index cd10df6cd0fc..0612e79a9281 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
>         attr.type = PERF_TYPE_SOFTWARE;
>         attr.config = PERF_COUNT_SW_CPU_CLOCK;
>         attr.freq = 1;
> -       attr.sample_freq = 4000;
> +       attr.sample_freq = 1000;
>         pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
>         if (!ASSERT_GE(pfd, 0, "perf_fd"))
>                 goto cleanup;
> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
> index b74b3c0c555a..7cf4feb6464c 100644
> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c
> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
> @@ -30,12 +30,20 @@ static int open_pe(void)
>         attr.type = PERF_TYPE_HARDWARE;
>         attr.config = PERF_COUNT_HW_CPU_CYCLES;
>         attr.freq = 1;
> -       attr.sample_freq = 4000;
> +       attr.sample_freq = 1000;

I think It's actually better to modify sysctl.
perf_event_max_sample_rate through test_progs, I had a patch do that
before, but Andrii didn't apply it. (I've forgotten why) , but this is
a recurring issue when running in VM in parallel mode.

>         pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
>
>         return pfd >= 0 ? pfd : -errno;
>  }
Mykola Lysenko March 8, 2022, 8:47 p.m. UTC | #7
> On Mar 8, 2022, at 12:19 PM, sunyucong@gmail.com wrote:
> 
> On Wed, Mar 2, 2022 at 3:53 PM Mykola Lysenko <mykolal@fb.com> wrote:
>> 
>> In send_signal, replace sleep with dummy cpu intensive computation
>> to increase probability of child process being scheduled. Add few
>> more asserts.
>> 
>> In find_vma, reduce sample_freq as higher values may be rejected in
>> some qemu setups, remove usleep and increase length of cpu intensive
>> computation.
>> 
>> In bpf_cookie, perf_link and perf_branches, reduce sample_freq as
>> higher values may be rejected in some qemu setups
>> 
>> Signed-off-by: Mykola Lysenko <mykolal@fb.com>
>> Acked-by: Yonghong Song <yhs@fb.com>
>> ---
>> .../selftests/bpf/prog_tests/bpf_cookie.c       |  2 +-
>> .../testing/selftests/bpf/prog_tests/find_vma.c | 13 ++++++++++---
>> .../selftests/bpf/prog_tests/perf_branches.c    |  4 ++--
>> .../selftests/bpf/prog_tests/perf_link.c        |  2 +-
>> .../selftests/bpf/prog_tests/send_signal.c      | 17 ++++++++++-------
>> .../selftests/bpf/progs/test_send_signal_kern.c |  2 +-
>> 6 files changed, 25 insertions(+), 15 deletions(-)
>> 
>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>> index cd10df6cd0fc..0612e79a9281 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
>> @@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
>>        attr.type = PERF_TYPE_SOFTWARE;
>>        attr.config = PERF_COUNT_SW_CPU_CLOCK;
>>        attr.freq = 1;
>> -       attr.sample_freq = 4000;
>> +       attr.sample_freq = 1000;
>>        pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
>>        if (!ASSERT_GE(pfd, 0, "perf_fd"))
>>                goto cleanup;
>> diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
>> index b74b3c0c555a..7cf4feb6464c 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
>> @@ -30,12 +30,20 @@ static int open_pe(void)
>>        attr.type = PERF_TYPE_HARDWARE;
>>        attr.config = PERF_COUNT_HW_CPU_CYCLES;
>>        attr.freq = 1;
>> -       attr.sample_freq = 4000;
>> +       attr.sample_freq = 1000;
> 
> I think It's actually better to modify sysctl.
> perf_event_max_sample_rate through test_progs, I had a patch do that
> before, but Andrii didn't apply it. (I've forgotten why) , but this is
> a recurring issue when running in VM in parallel mode.

I thought about this. But why to do it if sample_freq = 1000 is enough for our tests to function?

> 
>>        pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
>> 
>>        return pfd >= 0 ? pfd : -errno;
>> }
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index cd10df6cd0fc..0612e79a9281 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -199,7 +199,7 @@  static void pe_subtest(struct test_bpf_cookie *skel)
 	attr.type = PERF_TYPE_SOFTWARE;
 	attr.config = PERF_COUNT_SW_CPU_CLOCK;
 	attr.freq = 1;
-	attr.sample_freq = 4000;
+	attr.sample_freq = 1000;
 	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
 	if (!ASSERT_GE(pfd, 0, "perf_fd"))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
index b74b3c0c555a..7cf4feb6464c 100644
--- a/tools/testing/selftests/bpf/prog_tests/find_vma.c
+++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
@@ -30,12 +30,20 @@  static int open_pe(void)
 	attr.type = PERF_TYPE_HARDWARE;
 	attr.config = PERF_COUNT_HW_CPU_CYCLES;
 	attr.freq = 1;
-	attr.sample_freq = 4000;
+	attr.sample_freq = 1000;
 	pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
 
 	return pfd >= 0 ? pfd : -errno;
 }
 
+static bool find_vma_pe_condition(struct find_vma *skel)
+{
+	return skel->bss->found_vm_exec == 0 ||
+		skel->data->find_addr_ret != 0 ||
+		skel->data->find_zero_ret == -1 ||
+		strcmp(skel->bss->d_iname, "test_progs") != 0;
+}
+
 static void test_find_vma_pe(struct find_vma *skel)
 {
 	struct bpf_link *link = NULL;
@@ -57,7 +65,7 @@  static void test_find_vma_pe(struct find_vma *skel)
 	if (!ASSERT_OK_PTR(link, "attach_perf_event"))
 		goto cleanup;
 
-	for (i = 0; i < 1000000; ++i)
+	for (i = 0; i < 1000000000 && find_vma_pe_condition(skel); ++i)
 		++j;
 
 	test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */);
@@ -108,7 +116,6 @@  void serial_test_find_vma(void)
 	skel->bss->addr = (__u64)(uintptr_t)test_find_vma_pe;
 
 	test_find_vma_pe(skel);
-	usleep(100000); /* allow the irq_work to finish */
 	test_find_vma_kprobe(skel);
 
 	find_vma__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index 12c4f45cee1a..bc24f83339d6 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -110,7 +110,7 @@  static void test_perf_branches_hw(void)
 	attr.type = PERF_TYPE_HARDWARE;
 	attr.config = PERF_COUNT_HW_CPU_CYCLES;
 	attr.freq = 1;
-	attr.sample_freq = 4000;
+	attr.sample_freq = 1000;
 	attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
 	attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
 	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
@@ -151,7 +151,7 @@  static void test_perf_branches_no_hw(void)
 	attr.type = PERF_TYPE_SOFTWARE;
 	attr.config = PERF_COUNT_SW_CPU_CLOCK;
 	attr.freq = 1;
-	attr.sample_freq = 4000;
+	attr.sample_freq = 1000;
 	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
 	if (CHECK(pfd < 0, "perf_event_open", "err %d\n", pfd))
 		return;
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
index ede07344f264..224eba6fef2e 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c
@@ -39,7 +39,7 @@  void serial_test_perf_link(void)
 	attr.type = PERF_TYPE_SOFTWARE;
 	attr.config = PERF_COUNT_SW_CPU_CLOCK;
 	attr.freq = 1;
-	attr.sample_freq = 4000;
+	attr.sample_freq = 1000;
 	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
 	if (!ASSERT_GE(pfd, 0, "perf_fd"))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 776916b61c40..def50f1c5c31 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -4,11 +4,11 @@ 
 #include <sys/resource.h>
 #include "test_send_signal_kern.skel.h"
 
-int sigusr1_received = 0;
+static int sigusr1_received;
 
 static void sigusr1_handler(int signum)
 {
-	sigusr1_received++;
+	sigusr1_received = 1;
 }
 
 static void test_send_signal_common(struct perf_event_attr *attr,
@@ -40,9 +40,10 @@  static void test_send_signal_common(struct perf_event_attr *attr,
 
 	if (pid == 0) {
 		int old_prio;
+		volatile int j = 0;
 
 		/* install signal handler and notify parent */
-		signal(SIGUSR1, sigusr1_handler);
+		ASSERT_NEQ(signal(SIGUSR1, sigusr1_handler), SIG_ERR, "signal");
 
 		close(pipe_c2p[0]); /* close read */
 		close(pipe_p2c[1]); /* close write */
@@ -63,9 +64,11 @@  static void test_send_signal_common(struct perf_event_attr *attr,
 		ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
 
 		/* wait a little for signal handler */
-		sleep(1);
+		for (int i = 0; i < 100000000 && !sigusr1_received; i++)
+			j /= i + 1;
 
 		buf[0] = sigusr1_received ? '2' : '0';
+		ASSERT_EQ(sigusr1_received, 1, "sigusr1_received");
 		ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
 
 		/* wait for parent notification and exit */
@@ -93,7 +96,7 @@  static void test_send_signal_common(struct perf_event_attr *attr,
 			goto destroy_skel;
 		}
 	} else {
-		pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
+		pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1 /* cpu */,
 				 -1 /* group id */, 0 /* flags */);
 		if (!ASSERT_GE(pmu_fd, 0, "perf_event_open")) {
 			err = -1;
@@ -110,9 +113,9 @@  static void test_send_signal_common(struct perf_event_attr *attr,
 	ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read");
 
 	/* trigger the bpf send_signal */
-	skel->bss->pid = pid;
-	skel->bss->sig = SIGUSR1;
 	skel->bss->signal_thread = signal_thread;
+	skel->bss->sig = SIGUSR1;
+	skel->bss->pid = pid;
 
 	/* notify child that bpf program can send_signal now */
 	ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
index b4233d3efac2..92354cd72044 100644
--- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -10,7 +10,7 @@  static __always_inline int bpf_send_signal_test(void *ctx)
 {
 	int ret;
 
-	if (status != 0 || sig == 0 || pid == 0)
+	if (status != 0 || pid == 0)
 		return 0;
 
 	if ((bpf_get_current_pid_tgid() >> 32) == pid) {