diff mbox series

[bpf-next,09/13] selftests/bpf: Use 5-byte nop for x86 usdt probes

Message ID 20241211133403.208920-10-jolsa@kernel.org (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series uprobes: Add support to optimize usdt probes on x86_64 | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success Errors and warnings before: 0 (+0) this patch: 0 (+0)
netdev/cc_maintainers warning 11 maintainers not CCed: mykolal@fb.com yonghong.song@linux.dev ast@kernel.org kpsingh@kernel.org song@kernel.org sdf@fomichev.me martin.lau@linux.dev eddyz87@gmail.com linux-kselftest@vger.kernel.org shuah@kernel.org daniel@iogearbox.net
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch fail ERROR: spaces required around that ':' (ctx:VxW) WARNING: please, no spaces at the start of a line
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-19 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Jiri Olsa Dec. 11, 2024, 1:33 p.m. UTC
Using 5-byte nop for x86 usdt probes so we can switch
to optimized uprobe them.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 tools/testing/selftests/bpf/sdt.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

Comments

Andrii Nakryiko Dec. 13, 2024, 9:58 p.m. UTC | #1
On Wed, Dec 11, 2024 at 5:35 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> Using 5-byte nop for x86 usdt probes so we can switch
> to optimized uprobe them.
>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  tools/testing/selftests/bpf/sdt.h | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
>

This change will make it impossible to run latest selftests on older
kernels. Let's do what we did with -DENABLE_ATOMICS_TESTS and allow to
disable this through Makefile, ok?


> diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
> index ca0162b4dc57..7ac9291f45f1 100644
> --- a/tools/testing/selftests/bpf/sdt.h
> +++ b/tools/testing/selftests/bpf/sdt.h
> @@ -234,6 +234,13 @@ __extension__ extern unsigned long long __sdt_unsp;
>  #define _SDT_NOP       nop
>  #endif
>
> +/* Use 5 byte nop for x86_64 to allow optimizing uprobes. */
> +#if defined(__x86_64__)
> +# define _SDT_DEF_NOP _SDT_ASM_5(990:  .byte 0x0f, 0x1f, 0x44, 0x00, 0x00)
> +#else
> +# define _SDT_DEF_NOP _SDT_ASM_1(990:  _SDT_NOP)
> +#endif
> +
>  #define _SDT_NOTE_NAME "stapsdt"
>  #define _SDT_NOTE_TYPE 3
>
> @@ -286,7 +293,7 @@ __extension__ extern unsigned long long __sdt_unsp;
>
>  #define _SDT_ASM_BODY(provider, name, pack_args, args, ...)                  \
>    _SDT_DEF_MACROS                                                            \
> -  _SDT_ASM_1(990:      _SDT_NOP)                                             \
> +  _SDT_DEF_NOP                                                               \
>    _SDT_ASM_3(          .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \
>    _SDT_ASM_1(          .balign 4)                                            \
>    _SDT_ASM_3(          .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE)          \
> --
> 2.47.0
>
Jiri Olsa Dec. 16, 2024, 8:32 a.m. UTC | #2
On Fri, Dec 13, 2024 at 01:58:33PM -0800, Andrii Nakryiko wrote:
> On Wed, Dec 11, 2024 at 5:35 AM Jiri Olsa <jolsa@kernel.org> wrote:
> >
> > Using 5-byte nop for x86 usdt probes so we can switch
> > to optimized uprobe them.
> >
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >  tools/testing/selftests/bpf/sdt.h | 9 ++++++++-
> >  1 file changed, 8 insertions(+), 1 deletion(-)
> >
> 
> This change will make it impossible to run latest selftests on older
> kernels. Let's do what we did with -DENABLE_ATOMICS_TESTS and allow to
> disable this through Makefile, ok?

ok, I wanted to start addressing this after this version

so the problem is using this macro with nop5 in application running
on older kernels that do not have nop5 emulation and uprobe syscall
optimization, because uprobe/usdt on top of nop5 (single-stepped)
will be slower than on top of current nop1 (emulated)

AFAICS selftests should still work, just bit slower due to nop5 emulation

one part of the solution would be to backport [1] to stable kernels
which is an easy fix (even though it needs changes now)

if that's not enough we'd need to come up with that nop1/nop5 macro
solution, where tooling (libbpf with extra data in usdt note) would
install uprobe on top of nop1 on older kernels and on top of nop5 on
new ones.. but that'd need more work of course

jirka


[1] patch#7 - uprobes/x86: Add support to emulate nop5 instruction


> 
> 
> > diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
> > index ca0162b4dc57..7ac9291f45f1 100644
> > --- a/tools/testing/selftests/bpf/sdt.h
> > +++ b/tools/testing/selftests/bpf/sdt.h
> > @@ -234,6 +234,13 @@ __extension__ extern unsigned long long __sdt_unsp;
> >  #define _SDT_NOP       nop
> >  #endif
> >
> > +/* Use 5 byte nop for x86_64 to allow optimizing uprobes. */
> > +#if defined(__x86_64__)
> > +# define _SDT_DEF_NOP _SDT_ASM_5(990:  .byte 0x0f, 0x1f, 0x44, 0x00, 0x00)
> > +#else
> > +# define _SDT_DEF_NOP _SDT_ASM_1(990:  _SDT_NOP)
> > +#endif
> > +
> >  #define _SDT_NOTE_NAME "stapsdt"
> >  #define _SDT_NOTE_TYPE 3
> >
> > @@ -286,7 +293,7 @@ __extension__ extern unsigned long long __sdt_unsp;
> >
> >  #define _SDT_ASM_BODY(provider, name, pack_args, args, ...)                  \
> >    _SDT_DEF_MACROS                                                            \
> > -  _SDT_ASM_1(990:      _SDT_NOP)                                             \
> > +  _SDT_DEF_NOP                                                               \
> >    _SDT_ASM_3(          .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \
> >    _SDT_ASM_1(          .balign 4)                                            \
> >    _SDT_ASM_3(          .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE)          \
> > --
> > 2.47.0
> >
Andrii Nakryiko Dec. 16, 2024, 11:06 p.m. UTC | #3
On Mon, Dec 16, 2024 at 12:32 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Fri, Dec 13, 2024 at 01:58:33PM -0800, Andrii Nakryiko wrote:
> > On Wed, Dec 11, 2024 at 5:35 AM Jiri Olsa <jolsa@kernel.org> wrote:
> > >
> > > Using 5-byte nop for x86 usdt probes so we can switch
> > > to optimized uprobe them.
> > >
> > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > > ---
> > >  tools/testing/selftests/bpf/sdt.h | 9 ++++++++-
> > >  1 file changed, 8 insertions(+), 1 deletion(-)
> > >
> >
> > This change will make it impossible to run latest selftests on older
> > kernels. Let's do what we did with -DENABLE_ATOMICS_TESTS and allow to
> > disable this through Makefile, ok?
>
> ok, I wanted to start addressing this after this version
>
> so the problem is using this macro with nop5 in application running
> on older kernels that do not have nop5 emulation and uprobe syscall
> optimization, because uprobe/usdt on top of nop5 (single-stepped)
> will be slower than on top of current nop1 (emulated)
>
> AFAICS selftests should still work, just bit slower due to nop5 emulation

ah, fair enough, it won't break anything, true

>
> one part of the solution would be to backport [1] to stable kernels
> which is an easy fix (even though it needs changes now)
>
> if that's not enough we'd need to come up with that nop1/nop5 macro
> solution, where tooling (libbpf with extra data in usdt note) would
> install uprobe on top of nop1 on older kernels and on top of nop5 on
> new ones.. but that'd need more work of course

yes, I think that's the way we should go, but as you said, that's
outside of kernel and we should work on that as a follow up to this
series

>
> jirka
>
>
> [1] patch#7 - uprobes/x86: Add support to emulate nop5 instruction
>
>
> >
> >
> > > diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
> > > index ca0162b4dc57..7ac9291f45f1 100644
> > > --- a/tools/testing/selftests/bpf/sdt.h
> > > +++ b/tools/testing/selftests/bpf/sdt.h
> > > @@ -234,6 +234,13 @@ __extension__ extern unsigned long long __sdt_unsp;
> > >  #define _SDT_NOP       nop
> > >  #endif
> > >
> > > +/* Use 5 byte nop for x86_64 to allow optimizing uprobes. */
> > > +#if defined(__x86_64__)
> > > +# define _SDT_DEF_NOP _SDT_ASM_5(990:  .byte 0x0f, 0x1f, 0x44, 0x00, 0x00)
> > > +#else
> > > +# define _SDT_DEF_NOP _SDT_ASM_1(990:  _SDT_NOP)
> > > +#endif
> > > +
> > >  #define _SDT_NOTE_NAME "stapsdt"
> > >  #define _SDT_NOTE_TYPE 3
> > >
> > > @@ -286,7 +293,7 @@ __extension__ extern unsigned long long __sdt_unsp;
> > >
> > >  #define _SDT_ASM_BODY(provider, name, pack_args, args, ...)                  \
> > >    _SDT_DEF_MACROS                                                            \
> > > -  _SDT_ASM_1(990:      _SDT_NOP)                                             \
> > > +  _SDT_DEF_NOP                                                               \
> > >    _SDT_ASM_3(          .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \
> > >    _SDT_ASM_1(          .balign 4)                                            \
> > >    _SDT_ASM_3(          .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE)          \
> > > --
> > > 2.47.0
> > >
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
index ca0162b4dc57..7ac9291f45f1 100644
--- a/tools/testing/selftests/bpf/sdt.h
+++ b/tools/testing/selftests/bpf/sdt.h
@@ -234,6 +234,13 @@  __extension__ extern unsigned long long __sdt_unsp;
 #define _SDT_NOP	nop
 #endif
 
+/* Use 5 byte nop for x86_64 to allow optimizing uprobes. */
+#if defined(__x86_64__)
+# define _SDT_DEF_NOP _SDT_ASM_5(990:	.byte 0x0f, 0x1f, 0x44, 0x00, 0x00)
+#else
+# define _SDT_DEF_NOP _SDT_ASM_1(990:	_SDT_NOP)
+#endif
+
 #define _SDT_NOTE_NAME	"stapsdt"
 #define _SDT_NOTE_TYPE	3
 
@@ -286,7 +293,7 @@  __extension__ extern unsigned long long __sdt_unsp;
 
 #define _SDT_ASM_BODY(provider, name, pack_args, args, ...)		      \
   _SDT_DEF_MACROS							      \
-  _SDT_ASM_1(990:	_SDT_NOP)					      \
+  _SDT_DEF_NOP								      \
   _SDT_ASM_3(		.pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \
   _SDT_ASM_1(		.balign 4)					      \
   _SDT_ASM_3(		.4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE)	      \