diff mbox series

[v3,bpf-next,2/6] bpf: Introduce "volatile compare" macros

Message ID 20231226191148.48536-3-alexei.starovoitov@gmail.com (mailing list archive)
State Accepted
Commit a8b242d77bd72556b7a9d8be779f7d27b95ba73c
Delegated to: BPF
Headers show
Series bpf: volatile compare | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers warning 15 maintainers not CCed: ndesaulniers@google.com sdf@google.com llvm@lists.linux.dev kpsingh@kernel.org shuah@kernel.org justinstitt@google.com yonghong.song@linux.dev linux-kselftest@vger.kernel.org nathan@kernel.org morbo@google.com davemarchevsky@fb.com haoluo@google.com martin.lau@linux.dev mykolal@fb.com song@kernel.org
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch fail CHECK: No space is necessary after a cast ERROR: Macros with complex values should be enclosed in parentheses WARNING: Macros with flow control statements should be avoided WARNING: line length of 83 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns WARNING: please, no spaces at the start of a line WARNING: sizeof(& should be avoided WARNING: space prohibited between function name and open parenthesis '('
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-13 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-14 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization

Commit Message

Alexei Starovoitov Dec. 26, 2023, 7:11 p.m. UTC
From: Alexei Starovoitov <ast@kernel.org>

Compilers optimize conditional operators at will, but often bpf programmers
want to force compilers to keep the same operator in asm as it's written in C.
Introduce bpf_cmp_likely/unlikely(var1, conditional_op, var2) macros that can be used as:

-               if (seen >= 1000)
+               if (bpf_cmp_unlikely(seen, >=, 1000))

The macros take advantage of BPF assembly that is C like.

The macros check the sign of variable 'seen' and emits either
signed or unsigned compare.

For example:
int a;
bpf_cmp_unlikely(a, >, 0) will be translated to 'if rX s> 0 goto' in BPF assembly.

unsigned int a;
bpf_cmp_unlikely(a, >, 0) will be translated to 'if rX > 0 goto' in BPF assembly.

C type conversions coupled with comparison operator are tricky.
  int i = -1;
  unsigned int j = 1;
  if (i < j) // this is false.

  long i = -1;
  unsigned int j = 1;
  if (i < j) // this is true.

Make sure BPF program is compiled with -Wsign-compare then the macros will catch
the mistake.

The macros check LHS (left hand side) only to figure out the sign of compare.

'if 0 < rX goto' is not allowed in the assembly, so the users
have to use a variable on LHS anyway.

The patch updates few tests to demonstrate the use of the macros.

The macro allows to use BPF_JSET in C code, since LLVM doesn't generate it at
present. For example:

if (i & j) compiles into r0 &= r1; if r0 == 0 goto

while

if (bpf_cmp_unlikely(i, &, j)) compiles into if r0 & r1 goto

Note that the macros has to be careful with RHS assembly predicate.
Since:
u64 __rhs = 1ull << 42;
asm goto("if r0 < %[rhs] goto +1" :: [rhs] "ri" (__rhs));
LLVM will silently truncate 64-bit constant into s32 imm.

Note that [lhs] "r"((short)LHS) the type cast is a workaround for LLVM issue.
When LHS is exactly 32-bit LLVM emits redundant <<=32, >>=32 to zero upper 32-bits.
When LHS is 64 or 16 or 8-bit variable there are no shifts.
When LHS is 32-bit the (u64) cast doesn't help. Hence use (short) cast.
It does _not_ truncate the variable before it's assigned to a register.

Traditional likely()/unlikely() macros that use __builtin_expect(!!(x), 1 or 0)
have no effect on these macros, hence macros implement the logic manually.
bpf_cmp_unlikely() macro preserves compare operator as-is while
bpf_cmp_likely() macro flips the compare.

Consider two cases:
A.
  for() {
    if (foo >= 10) {
      bar += foo;
    }
    other code;
  }

B.
  for() {
    if (foo >= 10)
       break;
    other code;
  }

It's ok to use either bpf_cmp_likely or bpf_cmp_unlikely macros in both cases,
but consider that 'break' is effectively 'goto out_of_the_loop'.
Hence it's better to use bpf_cmp_unlikely in the B case.
While 'bar += foo' is better to keep as 'fallthrough' == likely code path in the A case.

When it's written as:
A.
  for() {
    if (bpf_cmp_likely(foo, >=, 10)) {
      bar += foo;
    }
    other code;
  }

B.
  for() {
    if (bpf_cmp_unlikely(foo, >=, 10))
       break;
    other code;
  }

The assembly will look like:
A.
  for() {
    if r1 < 10 goto L1;
      bar += foo;
  L1:
    other code;
  }

B.
  for() {
    if r1 >= 10 goto L2;
    other code;
  }
  L2:

The bpf_cmp_likely vs bpf_cmp_unlikely changes basic block layout, hence it will
greatly influence the verification process. The number of processed instructions
will be different, since the verifier walks the fallthrough first.

Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/bpf_experimental.h  | 69 +++++++++++++++++++
 .../testing/selftests/bpf/progs/exceptions.c  | 20 +++---
 .../selftests/bpf/progs/iters_task_vma.c      |  3 +-
 3 files changed, 80 insertions(+), 12 deletions(-)

Comments

Andrii Nakryiko Jan. 3, 2024, 7:20 p.m. UTC | #1
On Tue, Dec 26, 2023 at 11:12 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> From: Alexei Starovoitov <ast@kernel.org>
>
> Compilers optimize conditional operators at will, but often bpf programmers
> want to force compilers to keep the same operator in asm as it's written in C.
> Introduce bpf_cmp_likely/unlikely(var1, conditional_op, var2) macros that can be used as:
>
> -               if (seen >= 1000)
> +               if (bpf_cmp_unlikely(seen, >=, 1000))
>
> The macros take advantage of BPF assembly that is C like.
>
> The macros check the sign of variable 'seen' and emits either
> signed or unsigned compare.
>
> For example:
> int a;
> bpf_cmp_unlikely(a, >, 0) will be translated to 'if rX s> 0 goto' in BPF assembly.
>
> unsigned int a;
> bpf_cmp_unlikely(a, >, 0) will be translated to 'if rX > 0 goto' in BPF assembly.
>
> C type conversions coupled with comparison operator are tricky.
>   int i = -1;
>   unsigned int j = 1;
>   if (i < j) // this is false.
>
>   long i = -1;
>   unsigned int j = 1;
>   if (i < j) // this is true.
>
> Make sure BPF program is compiled with -Wsign-compare then the macros will catch
> the mistake.
>
> The macros check LHS (left hand side) only to figure out the sign of compare.
>
> 'if 0 < rX goto' is not allowed in the assembly, so the users
> have to use a variable on LHS anyway.
>
> The patch updates few tests to demonstrate the use of the macros.
>
> The macro allows to use BPF_JSET in C code, since LLVM doesn't generate it at
> present. For example:
>
> if (i & j) compiles into r0 &= r1; if r0 == 0 goto
>
> while
>
> if (bpf_cmp_unlikely(i, &, j)) compiles into if r0 & r1 goto
>
> Note that the macros has to be careful with RHS assembly predicate.
> Since:
> u64 __rhs = 1ull << 42;
> asm goto("if r0 < %[rhs] goto +1" :: [rhs] "ri" (__rhs));
> LLVM will silently truncate 64-bit constant into s32 imm.
>
> Note that [lhs] "r"((short)LHS) the type cast is a workaround for LLVM issue.
> When LHS is exactly 32-bit LLVM emits redundant <<=32, >>=32 to zero upper 32-bits.
> When LHS is 64 or 16 or 8-bit variable there are no shifts.
> When LHS is 32-bit the (u64) cast doesn't help. Hence use (short) cast.
> It does _not_ truncate the variable before it's assigned to a register.
>
> Traditional likely()/unlikely() macros that use __builtin_expect(!!(x), 1 or 0)
> have no effect on these macros, hence macros implement the logic manually.
> bpf_cmp_unlikely() macro preserves compare operator as-is while
> bpf_cmp_likely() macro flips the compare.
>
> Consider two cases:
> A.
>   for() {
>     if (foo >= 10) {
>       bar += foo;
>     }
>     other code;
>   }
>
> B.
>   for() {
>     if (foo >= 10)
>        break;
>     other code;
>   }
>
> It's ok to use either bpf_cmp_likely or bpf_cmp_unlikely macros in both cases,
> but consider that 'break' is effectively 'goto out_of_the_loop'.
> Hence it's better to use bpf_cmp_unlikely in the B case.
> While 'bar += foo' is better to keep as 'fallthrough' == likely code path in the A case.
>
> When it's written as:
> A.
>   for() {
>     if (bpf_cmp_likely(foo, >=, 10)) {
>       bar += foo;
>     }
>     other code;
>   }
>
> B.
>   for() {
>     if (bpf_cmp_unlikely(foo, >=, 10))
>        break;
>     other code;
>   }
>
> The assembly will look like:
> A.
>   for() {
>     if r1 < 10 goto L1;
>       bar += foo;
>   L1:
>     other code;
>   }
>
> B.
>   for() {
>     if r1 >= 10 goto L2;
>     other code;
>   }
>   L2:
>
> The bpf_cmp_likely vs bpf_cmp_unlikely changes basic block layout, hence it will
> greatly influence the verification process. The number of processed instructions
> will be different, since the verifier walks the fallthrough first.
>
> Acked-by: Daniel Borkmann <daniel@iogearbox.net>
> Acked-by: Jiri Olsa <jolsa@kernel.org>
> Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>  .../testing/selftests/bpf/bpf_experimental.h  | 69 +++++++++++++++++++
>  .../testing/selftests/bpf/progs/exceptions.c  | 20 +++---
>  .../selftests/bpf/progs/iters_task_vma.c      |  3 +-
>  3 files changed, 80 insertions(+), 12 deletions(-)
>

Likely/unlikely distinctions make 100% sense. Last year when I was
looking into improving verifier heuristics I noticed how sensitive and
important it is for verifier to first verify code path that is most
generic. This significantly aids state pruning. So code generation
makes a big difference, and if an "unlikely" short-cut (usually error
handling or early exit) condition is verified first, it will cause
verifier more work. Which totally explains the regressions you were
seeing in the last patch.

Let's hope users will actually make the right likely/unlikely decision
in their code when using bpf_cmp() :)


I left a few more comments below, but they can be a follow up. This
looks great, we should move this to bpf_helpers.h in libbpf after a
bit more testing in real-world production use cases.

I also aligned all those \ terminators in macros, as they are very
distracting when reading the code and I'd want to do that before
moving into bpf_helpers.h anyways :) hope you don't mind (and there
were some inconsistencies with single space and no space before \
anyways; it's hard to keep track of that)


> diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
> index 1386baf9ae4a..789abf316ad4 100644
> --- a/tools/testing/selftests/bpf/bpf_experimental.h
> +++ b/tools/testing/selftests/bpf/bpf_experimental.h
> @@ -254,6 +254,75 @@ extern void bpf_throw(u64 cookie) __ksym;
>                 }                                                                       \
>          })
>
> +#define __cmp_cannot_be_signed(x) \
> +       __builtin_strcmp(#x, "==") == 0 || __builtin_strcmp(#x, "!=") == 0 || \
> +       __builtin_strcmp(#x, "&") == 0
> +
> +#define __is_signed_type(type) (((type)(-1)) < (type)1)
> +
> +#define __bpf_cmp(LHS, OP, SIGN, PRED, RHS, DEFAULT) \
> +       ({ \
> +               __label__ l_true; \
> +               bool ret = DEFAULT; \
> +               asm volatile goto("if %[lhs] " SIGN #OP " %[rhs] goto %l[l_true]" \
> +                                 :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true); \
> +               ret = !DEFAULT; \
> +l_true:\
> +               ret;\
> +       })
> +
> +/* C type conversions coupled with comparison operator are tricky.
> + * Make sure BPF program is compiled with -Wsign-compre then

fixed typo while applying: compare

> + * __lhs OP __rhs below will catch the mistake.
> + * Be aware that we check only __lhs to figure out the sign of compare.
> + */
> +#define _bpf_cmp(LHS, OP, RHS, NOFLIP) \

nit: NOFLIP name is absolutely confusing, tbh, it would be nice to
rename it to something more easily understood (DEFAULT is IMO better)

> +       ({ \
> +               typeof(LHS) __lhs = (LHS); \
> +               typeof(RHS) __rhs = (RHS); \
> +               bool ret; \
> +               _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \
> +               (void)(__lhs OP __rhs); \

what is this part for? Is this what "__lhs OP __rhs below will catch
the mistake" in the comment refers to?

BTW, I think we hit this one when invalid OP is specified, before we
get to (void)"bug" (see below). So maybe it would be better to push it
deeper into __bpf_cmp itself?

> +               if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) {\
> +                       if (sizeof(__rhs) == 8) \
> +                               ret = __bpf_cmp(__lhs, OP, "", "r", __rhs, NOFLIP); \
> +                       else \
> +                               ret = __bpf_cmp(__lhs, OP, "", "i", __rhs, NOFLIP); \

tbh, I'm also not 100% sure why this sizeof() == 8 and corresponding r
vs i change? Can you please elaborate (and add a comment in a follow
up, perhaps?)

> +               } else { \
> +                       if (sizeof(__rhs) == 8) \
> +                               ret = __bpf_cmp(__lhs, OP, "s", "r", __rhs, NOFLIP); \
> +                       else \
> +                               ret = __bpf_cmp(__lhs, OP, "s", "i", __rhs, NOFLIP); \

one simplification that would reduce number of arguments to __bpf_cmp:

in __bpf_cmp (drop # before OP):

asm volatile goto("if %[lhs] " OP " %[rhs] goto %l[l_true]"


And here you just stringify operator, appending "s" if necessary:

ret = __bpf_cmp(__lhs, #OP, "i", __rhs, NOFLIP);

or

ret = __bpf_cmp(__lhs, "s"#OP, "r", __rhs, NOFLIP);

Consider for a follow up, if you agree it is a simplification.

> +               } \
> +               ret; \
> +       })
> +
> +#ifndef bpf_cmp_unlikely
> +#define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true)
> +#endif
> +
> +#ifndef bpf_cmp_likely
> +#define bpf_cmp_likely(LHS, OP, RHS) \
> +       ({ \
> +               bool ret; \
> +               if (__builtin_strcmp(#OP, "==") == 0) \
> +                       ret = _bpf_cmp(LHS, !=, RHS, false); \
> +               else if (__builtin_strcmp(#OP, "!=") == 0) \
> +                       ret = _bpf_cmp(LHS, ==, RHS, false); \
> +               else if (__builtin_strcmp(#OP, "<=") == 0) \
> +                       ret = _bpf_cmp(LHS, >, RHS, false); \
> +               else if (__builtin_strcmp(#OP, "<") == 0) \
> +                       ret = _bpf_cmp(LHS, >=, RHS, false); \
> +               else if (__builtin_strcmp(#OP, ">") == 0) \
> +                       ret = _bpf_cmp(LHS, <=, RHS, false); \
> +               else if (__builtin_strcmp(#OP, ">=") == 0) \
> +                       ret = _bpf_cmp(LHS, <, RHS, false); \
> +               else \
> +                       (void) "bug"; \

doesn't seem like this is doing anything, I tried using wrong OP and I'm getting

progs/iters_task_vma.c:31:32: error: expected expression
   31 |                 if (bpf_cmp_unlikely(seen, >==, 1000))
      |                                              ^

regardless of this (void)"bug" business. It doesn't hurt, but also
doesn't seem to be doing anything.


If I uses some text instead of operator, I get different one:

progs/iters_task_vma.c:31:30: error: expected ')'
   31 |                 if (bpf_cmp_unlikely(seen, op, 1000))
      |                                            ^
progs/iters_task_vma.c:31:7: note: to match this '('
   31 |                 if (bpf_cmp_unlikely(seen, op, 1000))
      |                     ^
/data/users/andriin/linux/tools/testing/selftests/bpf/bpf_experimental.h:301:40:
note: expanded from macro 'bpf_cmp_unlikely'
  301 | #define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true)
      |                                        ^
/data/users/andriin/linux/tools/testing/selftests/bpf/bpf_experimental.h:285:9:
note: expanded from macro '_bpf_cmp'
  285 |                 (void)(__lhs OP __rhs); \
      |                       ^
1 error generated.


But still, (void)"bug" doesn't change anything (updated realization I
got while finishing this email: see below about difference between
likely/unlikely).


And just to complete exploration, if we use valid C operator that's
not supported in assembly (like <<), we can see different error still:

progs/iters_task_vma.c:31:7: error: invalid operand for instruction
   31 |                 if (bpf_cmp_unlikely(seen, <<, 1000))
      |                     ^
/data/users/andriin/linux/tools/testing/selftests/bpf/bpf_experimental.h:301:40:
note: expanded from macro 'bpf_cmp_unlikely'
  301 | #define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true)
      |                                        ^
/data/users/andriin/linux/tools/testing/selftests/bpf/bpf_experimental.h:290:11:
note: expanded from macro '_bpf_cmp'
  290 |                                 ret = __bpf_cmp(__lhs, OP, "",
"i", __rhs, NOFLIP); \
      |                                       ^
/data/users/andriin/linux/tools/testing/selftests/bpf/bpf_experimental.h:267:21:
note: expanded from macro '__bpf_cmp'
  267 |                 asm volatile goto("if %[lhs] " SIGN #OP "
%[rhs] goto %l[l_true]" \
      |                                   ^
<inline asm>:1:8: note: instantiated into assembly here
    1 |         if r6 << 1000 goto LBB0_6
      |               ^

This one is kind of surprising, we got to asm even though the operator
was wrong. And it took me a bit to realize a huge asymmetry between
bpf_cmp_likely and bpf_cmp_unlikely. likely variant explicitly lists
supported operators and should bail out early on unrecognized one
((void)"bug") part. But unlikely variant just passes through
everything user provided directly into asm-generating macro.

I actually like a more verbose but also more explicit way of likely
implementation, listing explicitly supported operators.  It will also
be easier for users to check what they are supposed to pass. So that's
another thing to maybe do in the follow up?



> +               ret; \
> +       })
> +#endif
> +
>  /* Description
>   *     Assert that a conditional expression is true.
>   * Returns

[...]
Alexei Starovoitov Jan. 4, 2024, 6:03 a.m. UTC | #2
On Wed, Jan 3, 2024 at 11:20 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
> > +
> > +/* C type conversions coupled with comparison operator are tricky.
> > + * Make sure BPF program is compiled with -Wsign-compre then
>
> fixed typo while applying: compare

ohh. I cannot even copy-paste properly.

> > + * __lhs OP __rhs below will catch the mistake.
> > + * Be aware that we check only __lhs to figure out the sign of compare.
> > + */
> > +#define _bpf_cmp(LHS, OP, RHS, NOFLIP) \
>
> nit: NOFLIP name is absolutely confusing, tbh, it would be nice to
> rename it to something more easily understood (DEFAULT is IMO better)

I actually had it as DEFAULT, but it was less clear.
NOFLIP means whether the condition should be flipped or not.
DEFAULT is too ambiguous.

> > +       ({ \
> > +               typeof(LHS) __lhs = (LHS); \
> > +               typeof(RHS) __rhs = (RHS); \
> > +               bool ret; \
> > +               _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \
> > +               (void)(__lhs OP __rhs); \
>
> what is this part for? Is this what "__lhs OP __rhs below will catch
> the mistake" in the comment refers to?

Yes. This one will give proper error either on GCC -Wall
or with clang -Wall -Wsign-compare.

>
> BTW, I think we hit this one when invalid OP is specified, before we
> get to (void)"bug" (see below). So maybe it would be better to push it
> deeper into __bpf_cmp itself?
>
> > +               if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) {\
> > +                       if (sizeof(__rhs) == 8) \
> > +                               ret = __bpf_cmp(__lhs, OP, "", "r", __rhs, NOFLIP); \
> > +                       else \
> > +                               ret = __bpf_cmp(__lhs, OP, "", "i", __rhs, NOFLIP); \
>
> tbh, I'm also not 100% sure why this sizeof() == 8 and corresponding r
> vs i change? Can you please elaborate (and add a comment in a follow
> up, perhaps?)

That was in the commit log:
"
Note that the macros has to be careful with RHS assembly predicate.
Since:
u64 __rhs = 1ull << 42;
asm goto("if r0 < %[rhs] goto +1" :: [rhs] "ri" (__rhs));
LLVM will silently truncate 64-bit constant into s32 imm.
"

I will add a comment to the code as well.

>
> > +               } else { \
> > +                       if (sizeof(__rhs) == 8) \
> > +                               ret = __bpf_cmp(__lhs, OP, "s", "r", __rhs, NOFLIP); \
> > +                       else \
> > +                               ret = __bpf_cmp(__lhs, OP, "s", "i", __rhs, NOFLIP); \
>
> one simplification that would reduce number of arguments to __bpf_cmp:
>
> in __bpf_cmp (drop # before OP):
>
> asm volatile goto("if %[lhs] " OP " %[rhs] goto %l[l_true]"
>
>
> And here you just stringify operator, appending "s" if necessary:
>
> ret = __bpf_cmp(__lhs, #OP, "i", __rhs, NOFLIP);
>
> or
>
> ret = __bpf_cmp(__lhs, "s"#OP, "r", __rhs, NOFLIP);
>
> Consider for a follow up, if you agree it is a simplification.

Just to reduce the number of arguments? I will give it a try.

> I actually like a more verbose but also more explicit way of likely
> implementation, listing explicitly supported operators.  It will also
> be easier for users to check what they are supposed to pass. So that's
> another thing to maybe do in the follow up?

I thought about making unlikely explicit, but it looked weird and noisy:
                if (__builtin_strcmp(#OP, "==") == 0 ||
                    __builtin_strcmp(#OP, "!=") == 0 ||
                    __builtin_strcmp(#OP, "<") == 0  ||
...
and all the noise just to make unlikely match likely.

I felt it's not worth it and the error in the current form as you noticed:

> progs/iters_task_vma.c:31:7: error: invalid operand for instruction
>    31 |                 if (bpf_cmp_unlikely(seen, <<, 1000))
> <inline asm>:1:8: note: instantiated into assembly here
>    1 |         if r6 << 1000 goto LBB0_6

is much cleaner instead of (void) return.

I've tried many different ways to have a helper macro
#define flip_cmp_opcode(OP)
and use it inside the bpf_cmp, but couldn't make it work.
This is the shortest version of macros I came up with.
Andrii Nakryiko Jan. 4, 2024, 7:04 p.m. UTC | #3
On Wed, Jan 3, 2024 at 10:04 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Wed, Jan 3, 2024 at 11:20 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> > > +
> > > +/* C type conversions coupled with comparison operator are tricky.
> > > + * Make sure BPF program is compiled with -Wsign-compre then
> >
> > fixed typo while applying: compare
>
> ohh. I cannot even copy-paste properly.
>
> > > + * __lhs OP __rhs below will catch the mistake.
> > > + * Be aware that we check only __lhs to figure out the sign of compare.
> > > + */
> > > +#define _bpf_cmp(LHS, OP, RHS, NOFLIP) \
> >
> > nit: NOFLIP name is absolutely confusing, tbh, it would be nice to
> > rename it to something more easily understood (DEFAULT is IMO better)
>
> I actually had it as DEFAULT, but it was less clear.
> NOFLIP means whether the condition should be flipped or not.
> DEFAULT is too ambiguous.

LIKELY (or UNLIKELY, whichever makes sense) might be another
suggestion. But it's minor, so feel free to ignore.

>
> > > +       ({ \
> > > +               typeof(LHS) __lhs = (LHS); \
> > > +               typeof(RHS) __rhs = (RHS); \
> > > +               bool ret; \
> > > +               _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \
> > > +               (void)(__lhs OP __rhs); \
> >
> > what is this part for? Is this what "__lhs OP __rhs below will catch
> > the mistake" in the comment refers to?
>
> Yes. This one will give proper error either on GCC -Wall
> or with clang -Wall -Wsign-compare.
>
> >
> > BTW, I think we hit this one when invalid OP is specified, before we
> > get to (void)"bug" (see below). So maybe it would be better to push it
> > deeper into __bpf_cmp itself?
> >
> > > +               if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) {\
> > > +                       if (sizeof(__rhs) == 8) \
> > > +                               ret = __bpf_cmp(__lhs, OP, "", "r", __rhs, NOFLIP); \
> > > +                       else \
> > > +                               ret = __bpf_cmp(__lhs, OP, "", "i", __rhs, NOFLIP); \
> >
> > tbh, I'm also not 100% sure why this sizeof() == 8 and corresponding r
> > vs i change? Can you please elaborate (and add a comment in a follow
> > up, perhaps?)
>
> That was in the commit log:
> "
> Note that the macros has to be careful with RHS assembly predicate.
> Since:
> u64 __rhs = 1ull << 42;
> asm goto("if r0 < %[rhs] goto +1" :: [rhs] "ri" (__rhs));
> LLVM will silently truncate 64-bit constant into s32 imm.
> "
>
> I will add a comment to the code as well.

ah, ok, it didn't click for me, thanks for adding a comment

while on the topic, is there a problem specifying "ri" for sizeof() <
8 case? At least for alu32 cases, shouldn't we allow w1 < w2 kind of
situations?

>
> >
> > > +               } else { \
> > > +                       if (sizeof(__rhs) == 8) \
> > > +                               ret = __bpf_cmp(__lhs, OP, "s", "r", __rhs, NOFLIP); \
> > > +                       else \
> > > +                               ret = __bpf_cmp(__lhs, OP, "s", "i", __rhs, NOFLIP); \
> >
> > one simplification that would reduce number of arguments to __bpf_cmp:
> >
> > in __bpf_cmp (drop # before OP):
> >
> > asm volatile goto("if %[lhs] " OP " %[rhs] goto %l[l_true]"
> >
> >
> > And here you just stringify operator, appending "s" if necessary:
> >
> > ret = __bpf_cmp(__lhs, #OP, "i", __rhs, NOFLIP);
> >
> > or
> >
> > ret = __bpf_cmp(__lhs, "s"#OP, "r", __rhs, NOFLIP);
> >
> > Consider for a follow up, if you agree it is a simplification.
>
> Just to reduce the number of arguments? I will give it a try.

yeah, pretty much just for that

>
> > I actually like a more verbose but also more explicit way of likely
> > implementation, listing explicitly supported operators.  It will also
> > be easier for users to check what they are supposed to pass. So that's
> > another thing to maybe do in the follow up?
>
> I thought about making unlikely explicit, but it looked weird and noisy:
>                 if (__builtin_strcmp(#OP, "==") == 0 ||
>                     __builtin_strcmp(#OP, "!=") == 0 ||
>                     __builtin_strcmp(#OP, "<") == 0  ||
> ...
> and all the noise just to make unlikely match likely.
>
> I felt it's not worth it and the error in the current form as you noticed:
>
> > progs/iters_task_vma.c:31:7: error: invalid operand for instruction
> >    31 |                 if (bpf_cmp_unlikely(seen, <<, 1000))
> > <inline asm>:1:8: note: instantiated into assembly here
> >    1 |         if r6 << 1000 goto LBB0_6
>
> is much cleaner instead of (void) return.

Right, I only played with unlikely last time. Trying the same with
likely right now, it's not that great:

In file included from progs/profiler2.c:6:
progs/profiler.inc.h:225:7: error: variable 'ret' is uninitialized
when used here [-Werror,-Wuninitialized]
  225 |                 if (bpf_cmp_likely(filepart_length, <==, MAX_PATH)) {
      |                     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/data/users/andriin/linux/tools/testing/selftests/bpf/bpf_experimental.h:322:3:
note: expanded from macro 'bpf_cmp_likely'
  322 |                 ret;
                                 \
      |                 ^~~
progs/profiler.inc.h:225:7: note: variable 'ret' is declared here
/data/users/andriin/linux/tools/testing/selftests/bpf/bpf_experimental.h:307:3:
note: expanded from macro 'bpf_cmp_likely'
  307 |                 bool ret;
                                 \
      |                 ^

I tried adding _Static_assert, and it is triggered for all valid cases
as well, so it seems like the compiler doesn't detect this last branch
as dead code, unfortunately.

>
> I've tried many different ways to have a helper macro
> #define flip_cmp_opcode(OP)
> and use it inside the bpf_cmp, but couldn't make it work.
> This is the shortest version of macros I came up with.
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 1386baf9ae4a..789abf316ad4 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -254,6 +254,75 @@  extern void bpf_throw(u64 cookie) __ksym;
 		}									\
 	 })
 
+#define __cmp_cannot_be_signed(x) \
+	__builtin_strcmp(#x, "==") == 0 || __builtin_strcmp(#x, "!=") == 0 || \
+	__builtin_strcmp(#x, "&") == 0
+
+#define __is_signed_type(type) (((type)(-1)) < (type)1)
+
+#define __bpf_cmp(LHS, OP, SIGN, PRED, RHS, DEFAULT) \
+	({ \
+		__label__ l_true; \
+		bool ret = DEFAULT; \
+		asm volatile goto("if %[lhs] " SIGN #OP " %[rhs] goto %l[l_true]" \
+				  :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true); \
+		ret = !DEFAULT; \
+l_true:\
+		ret;\
+       })
+
+/* C type conversions coupled with comparison operator are tricky.
+ * Make sure BPF program is compiled with -Wsign-compre then
+ * __lhs OP __rhs below will catch the mistake.
+ * Be aware that we check only __lhs to figure out the sign of compare.
+ */
+#define _bpf_cmp(LHS, OP, RHS, NOFLIP) \
+	({ \
+		typeof(LHS) __lhs = (LHS); \
+		typeof(RHS) __rhs = (RHS); \
+		bool ret; \
+		_Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \
+		(void)(__lhs OP __rhs); \
+		if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) {\
+			if (sizeof(__rhs) == 8) \
+				ret = __bpf_cmp(__lhs, OP, "", "r", __rhs, NOFLIP); \
+			else \
+				ret = __bpf_cmp(__lhs, OP, "", "i", __rhs, NOFLIP); \
+		} else { \
+			if (sizeof(__rhs) == 8) \
+				ret = __bpf_cmp(__lhs, OP, "s", "r", __rhs, NOFLIP); \
+			else \
+				ret = __bpf_cmp(__lhs, OP, "s", "i", __rhs, NOFLIP); \
+		} \
+		ret; \
+       })
+
+#ifndef bpf_cmp_unlikely
+#define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true)
+#endif
+
+#ifndef bpf_cmp_likely
+#define bpf_cmp_likely(LHS, OP, RHS) \
+	({ \
+		bool ret; \
+		if (__builtin_strcmp(#OP, "==") == 0) \
+			ret = _bpf_cmp(LHS, !=, RHS, false); \
+		else if (__builtin_strcmp(#OP, "!=") == 0) \
+			ret = _bpf_cmp(LHS, ==, RHS, false); \
+		else if (__builtin_strcmp(#OP, "<=") == 0) \
+			ret = _bpf_cmp(LHS, >, RHS, false); \
+		else if (__builtin_strcmp(#OP, "<") == 0) \
+			ret = _bpf_cmp(LHS, >=, RHS, false); \
+		else if (__builtin_strcmp(#OP, ">") == 0) \
+			ret = _bpf_cmp(LHS, <=, RHS, false); \
+		else if (__builtin_strcmp(#OP, ">=") == 0) \
+			ret = _bpf_cmp(LHS, <, RHS, false); \
+		else \
+			(void) "bug"; \
+		ret; \
+       })
+#endif
+
 /* Description
  *	Assert that a conditional expression is true.
  * Returns
diff --git a/tools/testing/selftests/bpf/progs/exceptions.c b/tools/testing/selftests/bpf/progs/exceptions.c
index 2811ee842b01..f09cd14d8e04 100644
--- a/tools/testing/selftests/bpf/progs/exceptions.c
+++ b/tools/testing/selftests/bpf/progs/exceptions.c
@@ -210,7 +210,7 @@  __noinline int assert_zero_gfunc(u64 c)
 {
 	volatile u64 cookie = c;
 
-	bpf_assert_eq(cookie, 0);
+	bpf_assert(bpf_cmp_unlikely(cookie, ==, 0));
 	return 0;
 }
 
@@ -218,7 +218,7 @@  __noinline int assert_neg_gfunc(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_lt(cookie, 0);
+	bpf_assert(bpf_cmp_unlikely(cookie, <, 0));
 	return 0;
 }
 
@@ -226,7 +226,7 @@  __noinline int assert_pos_gfunc(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_gt(cookie, 0);
+	bpf_assert(bpf_cmp_unlikely(cookie, >, 0));
 	return 0;
 }
 
@@ -234,7 +234,7 @@  __noinline int assert_negeq_gfunc(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_le(cookie, -1);
+	bpf_assert(bpf_cmp_unlikely(cookie, <=, -1));
 	return 0;
 }
 
@@ -242,7 +242,7 @@  __noinline int assert_poseq_gfunc(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_ge(cookie, 1);
+	bpf_assert(bpf_cmp_unlikely(cookie, >=, 1));
 	return 0;
 }
 
@@ -258,7 +258,7 @@  __noinline int assert_zero_gfunc_with(u64 c)
 {
 	volatile u64 cookie = c;
 
-	bpf_assert_eq_with(cookie, 0, cookie + 100);
+	bpf_assert_with(bpf_cmp_unlikely(cookie, ==, 0), cookie + 100);
 	return 0;
 }
 
@@ -266,7 +266,7 @@  __noinline int assert_neg_gfunc_with(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_lt_with(cookie, 0, cookie + 100);
+	bpf_assert_with(bpf_cmp_unlikely(cookie, <, 0), cookie + 100);
 	return 0;
 }
 
@@ -274,7 +274,7 @@  __noinline int assert_pos_gfunc_with(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_gt_with(cookie, 0, cookie + 100);
+	bpf_assert_with(bpf_cmp_unlikely(cookie, >, 0), cookie + 100);
 	return 0;
 }
 
@@ -282,7 +282,7 @@  __noinline int assert_negeq_gfunc_with(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_le_with(cookie, -1, cookie + 100);
+	bpf_assert_with(bpf_cmp_unlikely(cookie, <=, -1), cookie + 100);
 	return 0;
 }
 
@@ -290,7 +290,7 @@  __noinline int assert_poseq_gfunc_with(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_ge_with(cookie, 1, cookie + 100);
+	bpf_assert_with(bpf_cmp_unlikely(cookie, >=, 1), cookie + 100);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c
index e085a51d153e..dc0c3691dcc2 100644
--- a/tools/testing/selftests/bpf/progs/iters_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c
@@ -28,9 +28,8 @@  int iter_task_vma_for_each(const void *ctx)
 		return 0;
 
 	bpf_for_each(task_vma, vma, task, 0) {
-		if (seen >= 1000)
+		if (bpf_cmp_unlikely(seen, >=, 1000))
 			break;
-		barrier_var(seen);
 
 		vm_ranges[seen].vm_start = vma->vm_start;
 		vm_ranges[seen].vm_end = vma->vm_end;