diff mbox series

[bpf-next] selftests/bpf: Fix string read in strncmp benchmark

Message ID 20250312083859.1019635-1-vmalik@redhat.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [bpf-next] selftests/bpf: Fix string read in strncmp benchmark | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success Errors and warnings before: 26 (+0) this patch: 26 (+0)
netdev/cc_maintainers warning 2 maintainers not CCed: linux-kselftest@vger.kernel.org llvm@lists.linux.dev
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: Use of volatile is usually wrong: see Documentation/process/volatile-considered-harmful.rst
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-12 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-21 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-49 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-51 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-50 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-48 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-10 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-7 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests

Commit Message

Viktor Malik March 12, 2025, 8:38 a.m. UTC
The strncmp benchmark uses the bpf_strncmp helper and a hand-written
loop to compare two strings. The values of the strings are filled from
userspace. One of the strings is non-const (in .bss) while the other is
const (in .rodata) since that is the requirement of bpf_strncmp.

The problem is that in the hand-written loop, Clang optimizes the reads
from the const string to always return 0 which breaks the benchmark.

Mark the const string as volatile to avoid that.

The effect can be seen on the strncmp-no-helper variant.

Before this change:

    # ./bench strncmp-no-helper
    Setting up benchmark 'strncmp-no-helper'...
    Benchmark 'strncmp-no-helper' started.
    Iter   0 (8440.107us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
    Iter   1 (73909.374us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
    Iter   2 (-8140.994us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
    Iter   3 (3094.474us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
    Iter   4 (-2828.468us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
    Iter   5 (2635.595us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
    Iter   6 (-306.478us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
    Summary: hits    0.000 ± 0.000M/s (  0.000M/prod), drops    0.000 ± 0.000M/s, total operations    0.000 ± 0.000M/s

After this change:

    # ./bench strncmp-no-helper
    Setting up benchmark 'strncmp-no-helper'...
    Benchmark 'strncmp-no-helper' started.
    Iter   0 (21180.011us): hits    5.320M/s (  5.320M/prod), drops    0.000M/s, total operations    5.320M/s
    Iter   1 (-692.499us): hits    5.246M/s (  5.246M/prod), drops    0.000M/s, total operations    5.246M/s
    Iter   2 (-704.751us): hits    5.332M/s (  5.332M/prod), drops    0.000M/s, total operations    5.332M/s
    Iter   3 (62057.929us): hits    5.299M/s (  5.299M/prod), drops    0.000M/s, total operations    5.299M/s
    Iter   4 (-7981.421us): hits    5.303M/s (  5.303M/prod), drops    0.000M/s, total operations    5.303M/s
    Iter   5 (3500.341us): hits    5.306M/s (  5.306M/prod), drops    0.000M/s, total operations    5.306M/s
    Iter   6 (-3851.046us): hits    5.264M/s (  5.264M/prod), drops    0.000M/s, total operations    5.264M/s
    Summary: hits    5.338 ± 0.147M/s (  5.338M/prod), drops    0.000 ± 0.000M/s, total operations    5.338 ± 0.147M/s

Signed-off-by: Viktor Malik <vmalik@redhat.com>
---
 tools/testing/selftests/bpf/progs/strncmp_bench.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

Comments

Viktor Malik March 12, 2025, 1:47 p.m. UTC | #1
On 3/12/25 09:38, Viktor Malik wrote:
> The strncmp benchmark uses the bpf_strncmp helper and a hand-written
> loop to compare two strings. The values of the strings are filled from
> userspace. One of the strings is non-const (in .bss) while the other is
> const (in .rodata) since that is the requirement of bpf_strncmp.
> 
> The problem is that in the hand-written loop, Clang optimizes the reads
> from the const string to always return 0 which breaks the benchmark.
> 
> Mark the const string as volatile to avoid that.
> 
> The effect can be seen on the strncmp-no-helper variant.
> 
> Before this change:
> 
>     # ./bench strncmp-no-helper
>     Setting up benchmark 'strncmp-no-helper'...
>     Benchmark 'strncmp-no-helper' started.
>     Iter   0 (8440.107us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Iter   1 (73909.374us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Iter   2 (-8140.994us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Iter   3 (3094.474us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Iter   4 (-2828.468us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Iter   5 (2635.595us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Iter   6 (-306.478us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Summary: hits    0.000 ± 0.000M/s (  0.000M/prod), drops    0.000 ± 0.000M/s, total operations    0.000 ± 0.000M/s
> 
> After this change:
> 
>     # ./bench strncmp-no-helper
>     Setting up benchmark 'strncmp-no-helper'...
>     Benchmark 'strncmp-no-helper' started.
>     Iter   0 (21180.011us): hits    5.320M/s (  5.320M/prod), drops    0.000M/s, total operations    5.320M/s
>     Iter   1 (-692.499us): hits    5.246M/s (  5.246M/prod), drops    0.000M/s, total operations    5.246M/s
>     Iter   2 (-704.751us): hits    5.332M/s (  5.332M/prod), drops    0.000M/s, total operations    5.332M/s
>     Iter   3 (62057.929us): hits    5.299M/s (  5.299M/prod), drops    0.000M/s, total operations    5.299M/s
>     Iter   4 (-7981.421us): hits    5.303M/s (  5.303M/prod), drops    0.000M/s, total operations    5.303M/s
>     Iter   5 (3500.341us): hits    5.306M/s (  5.306M/prod), drops    0.000M/s, total operations    5.306M/s
>     Iter   6 (-3851.046us): hits    5.264M/s (  5.264M/prod), drops    0.000M/s, total operations    5.264M/s
>     Summary: hits    5.338 ± 0.147M/s (  5.338M/prod), drops    0.000 ± 0.000M/s, total operations    5.338 ± 0.147M/s
> 

Ah, forgot fixes:

Fixes: 9c42652f8be3 ("selftests/bpf: Add benchmark for bpf_strncmp() helper")

> Signed-off-by: Viktor Malik <vmalik@redhat.com>
> ---
>  tools/testing/selftests/bpf/progs/strncmp_bench.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c
> index 18373a7df76e..92a828a1ebea 100644
> --- a/tools/testing/selftests/bpf/progs/strncmp_bench.c
> +++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
> @@ -9,7 +9,7 @@
>  
>  /* Will be updated by benchmark before program loading */
>  const volatile unsigned int cmp_str_len = 1;
> -const char target[STRNCMP_STR_SZ];
> +const volatile char target[STRNCMP_STR_SZ];
>  
>  long hits = 0;
>  char str[STRNCMP_STR_SZ];
> @@ -17,7 +17,7 @@ char str[STRNCMP_STR_SZ];
>  char _license[] SEC("license") = "GPL";
>  
>  static __always_inline int local_strncmp(const char *s1, unsigned int sz,
> -					 const char *s2)
> +					 const volatile char *s2)
>  {
>  	int ret = 0;
>  	unsigned int i;
> @@ -43,7 +43,7 @@ int strncmp_no_helper(void *ctx)
>  SEC("tp/syscalls/sys_enter_getpgid")
>  int strncmp_helper(void *ctx)
>  {
> -	if (bpf_strncmp(str, cmp_str_len + 1, target) < 0)
> +	if (bpf_strncmp(str, cmp_str_len + 1, (const char *)target) < 0)
>  		__sync_add_and_fetch(&hits, 1);
>  	return 0;
>  }
Andrii Nakryiko March 12, 2025, 11:45 p.m. UTC | #2
On Wed, Mar 12, 2025 at 1:39 AM Viktor Malik <vmalik@redhat.com> wrote:
>
> The strncmp benchmark uses the bpf_strncmp helper and a hand-written
> loop to compare two strings. The values of the strings are filled from
> userspace. One of the strings is non-const (in .bss) while the other is
> const (in .rodata) since that is the requirement of bpf_strncmp.
>
> The problem is that in the hand-written loop, Clang optimizes the reads
> from the const string to always return 0 which breaks the benchmark.
>
> Mark the const string as volatile to avoid that.
>
> The effect can be seen on the strncmp-no-helper variant.
>
> Before this change:
>
>     # ./bench strncmp-no-helper
>     Setting up benchmark 'strncmp-no-helper'...
>     Benchmark 'strncmp-no-helper' started.
>     Iter   0 (8440.107us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Iter   1 (73909.374us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Iter   2 (-8140.994us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Iter   3 (3094.474us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Iter   4 (-2828.468us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Iter   5 (2635.595us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Iter   6 (-306.478us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>     Summary: hits    0.000 ± 0.000M/s (  0.000M/prod), drops    0.000 ± 0.000M/s, total operations    0.000 ± 0.000M/s
>
> After this change:
>
>     # ./bench strncmp-no-helper
>     Setting up benchmark 'strncmp-no-helper'...
>     Benchmark 'strncmp-no-helper' started.
>     Iter   0 (21180.011us): hits    5.320M/s (  5.320M/prod), drops    0.000M/s, total operations    5.320M/s
>     Iter   1 (-692.499us): hits    5.246M/s (  5.246M/prod), drops    0.000M/s, total operations    5.246M/s
>     Iter   2 (-704.751us): hits    5.332M/s (  5.332M/prod), drops    0.000M/s, total operations    5.332M/s
>     Iter   3 (62057.929us): hits    5.299M/s (  5.299M/prod), drops    0.000M/s, total operations    5.299M/s
>     Iter   4 (-7981.421us): hits    5.303M/s (  5.303M/prod), drops    0.000M/s, total operations    5.303M/s
>     Iter   5 (3500.341us): hits    5.306M/s (  5.306M/prod), drops    0.000M/s, total operations    5.306M/s
>     Iter   6 (-3851.046us): hits    5.264M/s (  5.264M/prod), drops    0.000M/s, total operations    5.264M/s
>     Summary: hits    5.338 ± 0.147M/s (  5.338M/prod), drops    0.000 ± 0.000M/s, total operations    5.338 ± 0.147M/s
>
> Signed-off-by: Viktor Malik <vmalik@redhat.com>
> ---
>  tools/testing/selftests/bpf/progs/strncmp_bench.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c
> index 18373a7df76e..92a828a1ebea 100644
> --- a/tools/testing/selftests/bpf/progs/strncmp_bench.c
> +++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
> @@ -9,7 +9,7 @@
>
>  /* Will be updated by benchmark before program loading */
>  const volatile unsigned int cmp_str_len = 1;
> -const char target[STRNCMP_STR_SZ];
> +const volatile char target[STRNCMP_STR_SZ];
>
>  long hits = 0;
>  char str[STRNCMP_STR_SZ];
> @@ -17,7 +17,7 @@ char str[STRNCMP_STR_SZ];
>  char _license[] SEC("license") = "GPL";
>
>  static __always_inline int local_strncmp(const char *s1, unsigned int sz,
> -                                        const char *s2)
> +                                        const volatile char *s2)

this will be a bit unfair to local_strncmp(), as now you'll be forcing
the compiler to re-read s1[i] twice, right? What if we do:


diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c
b/tools/testing/selftests/bpf/progs/strncmp_bench.c
index 18373a7df76e..f47bf88f8d2a 100644
--- a/tools/testing/selftests/bpf/progs/strncmp_bench.c
+++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
@@ -35,7 +35,10 @@ static __always_inline int local_strncmp(const char
*s1, unsigned int sz,
 SEC("tp/syscalls/sys_enter_getpgid")
 int strncmp_no_helper(void *ctx)
 {
-       if (local_strncmp(str, cmp_str_len + 1, target) < 0)
+       const char *target_str = target;
+
+       barrier_var(target_str);
+       if (local_strncmp(str, cmp_str_len + 1, target_str) < 0)
                __sync_add_and_fetch(&hits, 1);
        return 0;
 }


that will prevent compiler optimization as well and won't force us to
do all those casts?

pw-bot: cr


>  {
>         int ret = 0;
>         unsigned int i;
> @@ -43,7 +43,7 @@ int strncmp_no_helper(void *ctx)
>  SEC("tp/syscalls/sys_enter_getpgid")
>  int strncmp_helper(void *ctx)
>  {
> -       if (bpf_strncmp(str, cmp_str_len + 1, target) < 0)
> +       if (bpf_strncmp(str, cmp_str_len + 1, (const char *)target) < 0)
>                 __sync_add_and_fetch(&hits, 1);
>         return 0;
>  }
> --
> 2.48.1
>
Viktor Malik March 13, 2025, 12:24 p.m. UTC | #3
On 3/13/25 00:45, Andrii Nakryiko wrote:
> On Wed, Mar 12, 2025 at 1:39 AM Viktor Malik <vmalik@redhat.com> wrote:
>>
>> The strncmp benchmark uses the bpf_strncmp helper and a hand-written
>> loop to compare two strings. The values of the strings are filled from
>> userspace. One of the strings is non-const (in .bss) while the other is
>> const (in .rodata) since that is the requirement of bpf_strncmp.
>>
>> The problem is that in the hand-written loop, Clang optimizes the reads
>> from the const string to always return 0 which breaks the benchmark.
>>
>> Mark the const string as volatile to avoid that.
>>
>> The effect can be seen on the strncmp-no-helper variant.
>>
>> Before this change:
>>
>>     # ./bench strncmp-no-helper
>>     Setting up benchmark 'strncmp-no-helper'...
>>     Benchmark 'strncmp-no-helper' started.
>>     Iter   0 (8440.107us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>>     Iter   1 (73909.374us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>>     Iter   2 (-8140.994us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>>     Iter   3 (3094.474us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>>     Iter   4 (-2828.468us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>>     Iter   5 (2635.595us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>>     Iter   6 (-306.478us): hits    0.000M/s (  0.000M/prod), drops    0.000M/s, total operations    0.000M/s
>>     Summary: hits    0.000 ± 0.000M/s (  0.000M/prod), drops    0.000 ± 0.000M/s, total operations    0.000 ± 0.000M/s
>>
>> After this change:
>>
>>     # ./bench strncmp-no-helper
>>     Setting up benchmark 'strncmp-no-helper'...
>>     Benchmark 'strncmp-no-helper' started.
>>     Iter   0 (21180.011us): hits    5.320M/s (  5.320M/prod), drops    0.000M/s, total operations    5.320M/s
>>     Iter   1 (-692.499us): hits    5.246M/s (  5.246M/prod), drops    0.000M/s, total operations    5.246M/s
>>     Iter   2 (-704.751us): hits    5.332M/s (  5.332M/prod), drops    0.000M/s, total operations    5.332M/s
>>     Iter   3 (62057.929us): hits    5.299M/s (  5.299M/prod), drops    0.000M/s, total operations    5.299M/s
>>     Iter   4 (-7981.421us): hits    5.303M/s (  5.303M/prod), drops    0.000M/s, total operations    5.303M/s
>>     Iter   5 (3500.341us): hits    5.306M/s (  5.306M/prod), drops    0.000M/s, total operations    5.306M/s
>>     Iter   6 (-3851.046us): hits    5.264M/s (  5.264M/prod), drops    0.000M/s, total operations    5.264M/s
>>     Summary: hits    5.338 ± 0.147M/s (  5.338M/prod), drops    0.000 ± 0.000M/s, total operations    5.338 ± 0.147M/s
>>
>> Signed-off-by: Viktor Malik <vmalik@redhat.com>
>> ---
>>  tools/testing/selftests/bpf/progs/strncmp_bench.c | 6 +++---
>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c
>> index 18373a7df76e..92a828a1ebea 100644
>> --- a/tools/testing/selftests/bpf/progs/strncmp_bench.c
>> +++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
>> @@ -9,7 +9,7 @@
>>
>>  /* Will be updated by benchmark before program loading */
>>  const volatile unsigned int cmp_str_len = 1;
>> -const char target[STRNCMP_STR_SZ];
>> +const volatile char target[STRNCMP_STR_SZ];
>>
>>  long hits = 0;
>>  char str[STRNCMP_STR_SZ];
>> @@ -17,7 +17,7 @@ char str[STRNCMP_STR_SZ];
>>  char _license[] SEC("license") = "GPL";
>>
>>  static __always_inline int local_strncmp(const char *s1, unsigned int sz,
>> -                                        const char *s2)
>> +                                        const volatile char *s2)
> 
> this will be a bit unfair to local_strncmp(), as now you'll be forcing
> the compiler to re-read s1[i] twice, right? What if we do:

Ah, right, I didn't realize s1[i] was used twice there.

> 
> 
> diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c
> b/tools/testing/selftests/bpf/progs/strncmp_bench.c
> index 18373a7df76e..f47bf88f8d2a 100644
> --- a/tools/testing/selftests/bpf/progs/strncmp_bench.c
> +++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
> @@ -35,7 +35,10 @@ static __always_inline int local_strncmp(const char
> *s1, unsigned int sz,
>  SEC("tp/syscalls/sys_enter_getpgid")
>  int strncmp_no_helper(void *ctx)
>  {
> -       if (local_strncmp(str, cmp_str_len + 1, target) < 0)
> +       const char *target_str = target;
> +
> +       barrier_var(target_str);
> +       if (local_strncmp(str, cmp_str_len + 1, target_str) < 0)
>                 __sync_add_and_fetch(&hits, 1);
>         return 0;
>  }
> 
> 
> that will prevent compiler optimization as well and won't force us to
> do all those casts?

Yeah, that works, I'll send v2.

Thanks!

> 
> pw-bot: cr
> 
> 
>>  {
>>         int ret = 0;
>>         unsigned int i;
>> @@ -43,7 +43,7 @@ int strncmp_no_helper(void *ctx)
>>  SEC("tp/syscalls/sys_enter_getpgid")
>>  int strncmp_helper(void *ctx)
>>  {
>> -       if (bpf_strncmp(str, cmp_str_len + 1, target) < 0)
>> +       if (bpf_strncmp(str, cmp_str_len + 1, (const char *)target) < 0)
>>                 __sync_add_and_fetch(&hits, 1);
>>         return 0;
>>  }
>> --
>> 2.48.1
>>
>
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c
index 18373a7df76e..92a828a1ebea 100644
--- a/tools/testing/selftests/bpf/progs/strncmp_bench.c
+++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
@@ -9,7 +9,7 @@ 
 
 /* Will be updated by benchmark before program loading */
 const volatile unsigned int cmp_str_len = 1;
-const char target[STRNCMP_STR_SZ];
+const volatile char target[STRNCMP_STR_SZ];
 
 long hits = 0;
 char str[STRNCMP_STR_SZ];
@@ -17,7 +17,7 @@  char str[STRNCMP_STR_SZ];
 char _license[] SEC("license") = "GPL";
 
 static __always_inline int local_strncmp(const char *s1, unsigned int sz,
-					 const char *s2)
+					 const volatile char *s2)
 {
 	int ret = 0;
 	unsigned int i;
@@ -43,7 +43,7 @@  int strncmp_no_helper(void *ctx)
 SEC("tp/syscalls/sys_enter_getpgid")
 int strncmp_helper(void *ctx)
 {
-	if (bpf_strncmp(str, cmp_str_len + 1, target) < 0)
+	if (bpf_strncmp(str, cmp_str_len + 1, (const char *)target) < 0)
 		__sync_add_and_fetch(&hits, 1);
 	return 0;
 }