diff mbox series

[v6,bpf-next,3/4] bpf: Add cond_break macro

Message ID 20240306031929.42666-4-alexei.starovoitov@gmail.com (mailing list archive)
State Accepted
Commit 7825948e135bdb4dcd9198121bdfaf6bd9e4f0a9
Delegated to: BPF
Headers show
Series bpf: Introduce may_goto and cond_break | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / veristat
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 15 maintainers not CCed: morbo@google.com ndesaulniers@google.com llvm@lists.linux.dev jolsa@kernel.org haoluo@google.com song@kernel.org yonghong.song@linux.dev linux-kselftest@vger.kernel.org sdf@google.com martin.lau@linux.dev justinstitt@google.com kpsingh@kernel.org shuah@kernel.org mykolal@fb.com nathan@kernel.org
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: Avoid line continuations in quoted strings WARNING: Avoid unnecessary line continuations WARNING: Macros with flow control statements should be avoided WARNING: labels should not be indented
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-14 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-PR fail merge-conflict
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18

Commit Message

Alexei Starovoitov March 6, 2024, 3:19 a.m. UTC
From: Alexei Starovoitov <ast@kernel.org>

Use may_goto instruction to implement cond_break macro.
Ideally the macro should be written as:
  asm volatile goto(".byte 0xe5;
                     .byte 0;
                     .short %l[l_break] ...
                     .long 0;
but LLVM doesn't recognize fixup of 2 byte PC relative yet.
Hence use
  asm volatile goto(".byte 0xe5;
                     .byte 0;
                     .long %l[l_break] ...
                     .short 0;
that produces correct asm on little endian.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_experimental.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

Comments

Eduard Zingerman March 6, 2024, 1:26 p.m. UTC | #1
On Tue, 2024-03-05 at 19:19 -0800, Alexei Starovoitov wrote:
> From: Alexei Starovoitov <ast@kernel.org>
> 
> Use may_goto instruction to implement cond_break macro.
> Ideally the macro should be written as:
>   asm volatile goto(".byte 0xe5;
>                      .byte 0;
>                      .short %l[l_break] ...
>                      .long 0;
> but LLVM doesn't recognize fixup of 2 byte PC relative yet.
> Hence use
>   asm volatile goto(".byte 0xe5;
>                      .byte 0;
>                      .long %l[l_break] ...
>                      .short 0;
> that produces correct asm on little endian.
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>

Acked-by: Eduard Zingerman <eddyz87@gmail.com>

I tried rewriting with offset +1 and an additional goto:

	({ __label__ l_break, l_continue;		\
	 asm volatile goto("%[jcond]; goto %l[l_break];"\
			   :: __imm_insn(jcond, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 1, 0)) \
			   :: l_break);			\
	goto l_continue;				\
	l_break: break;					\
	l_continue:;					\
	})

But BPF_RAW_INSN needs filter.h, which can't be included because of vmlinux.h,
unfortunate :(
Alexei Starovoitov March 6, 2024, 4:51 p.m. UTC | #2
On Wed, Mar 6, 2024 at 5:26 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Tue, 2024-03-05 at 19:19 -0800, Alexei Starovoitov wrote:
> > From: Alexei Starovoitov <ast@kernel.org>
> >
> > Use may_goto instruction to implement cond_break macro.
> > Ideally the macro should be written as:
> >   asm volatile goto(".byte 0xe5;
> >                      .byte 0;
> >                      .short %l[l_break] ...
> >                      .long 0;
> > but LLVM doesn't recognize fixup of 2 byte PC relative yet.
> > Hence use
> >   asm volatile goto(".byte 0xe5;
> >                      .byte 0;
> >                      .long %l[l_break] ...
> >                      .short 0;
> > that produces correct asm on little endian.
> >
> > Signed-off-by: Alexei Starovoitov <ast@kernel.org>
>
> Acked-by: Eduard Zingerman <eddyz87@gmail.com>
>
> I tried rewriting with offset +1 and an additional goto:
>
>         ({ __label__ l_break, l_continue;               \
>          asm volatile goto("%[jcond]; goto %l[l_break];"\
>                            :: __imm_insn(jcond, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 1, 0)) \
>                            :: l_break);                 \
>         goto l_continue;                                \
>         l_break: break;                                 \
>         l_continue:;                                    \
>         })

It probably works, but the generated code is not pretty.
That's the reason why this macro has two labels l_break and l_continue.
It could have been written with a single label, like:

/* bad asm */
#define cond_break \
({ __label__ l_continue; \
asm volatile goto("may_goto %l[l_continue]" :::: l_continue); \
break; \
l_continue: ; \
})

but generated code isn't great.
It's similar to bpf_cmp_likely vs bpf_cmp_unlikely.
The C statement that llvm sees right after 'asm volatile goto'
is important for codegen.

The macro in this patch is effectively this
/* good asm */
#define cond_break \
({ __label__ l_break, l_continue; \
asm volatile goto("may_goto %l[l_break]" :::: l_break); \
goto l_continue; \
l_break: break; \
l_continue: ; \
})

To visualize see:
https://godbolt.org/z/98bcbrxaE

The next step is to do several fixes in llvm:
- allow pcrel fixup of 2 bytes
- introduce builtin_may_goto()
- recognize may_goto in inline asm and llvm-objdump disasm

With that the macro will be cleaned up and the big-endian issue
will be addressed.
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 0d749006d107..bc9a0832ae72 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -326,6 +326,18 @@  l_true:												\
        })
 #endif
 
+#define cond_break					\
+	({ __label__ l_break, l_continue;		\
+	 asm volatile goto("1:.byte 0xe5;			\
+		      .byte 0;				\
+		      .long ((%l[l_break] - 1b - 8) / 8) & 0xffff;	\
+		      .short 0"				\
+		      :::: l_break);			\
+	goto l_continue;				\
+	l_break: break;					\
+	l_continue:;					\
+	})
+
 #ifndef bpf_nop_mov
 #define bpf_nop_mov(var) \
 	asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))