Message ID | 20241211133403.208920-10-jolsa@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | uprobes: Add support to optimize usdt probes on x86_64 | expand |
On Wed, Dec 11, 2024 at 5:35 AM Jiri Olsa <jolsa@kernel.org> wrote: > > Using 5-byte nop for x86 usdt probes so we can switch > to optimized uprobe them. > > Signed-off-by: Jiri Olsa <jolsa@kernel.org> > --- > tools/testing/selftests/bpf/sdt.h | 9 ++++++++- > 1 file changed, 8 insertions(+), 1 deletion(-) > This change will make it impossible to run latest selftests on older kernels. Let's do what we did with -DENABLE_ATOMICS_TESTS and allow to disable this through Makefile, ok? > diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h > index ca0162b4dc57..7ac9291f45f1 100644 > --- a/tools/testing/selftests/bpf/sdt.h > +++ b/tools/testing/selftests/bpf/sdt.h > @@ -234,6 +234,13 @@ __extension__ extern unsigned long long __sdt_unsp; > #define _SDT_NOP nop > #endif > > +/* Use 5 byte nop for x86_64 to allow optimizing uprobes. */ > +#if defined(__x86_64__) > +# define _SDT_DEF_NOP _SDT_ASM_5(990: .byte 0x0f, 0x1f, 0x44, 0x00, 0x00) > +#else > +# define _SDT_DEF_NOP _SDT_ASM_1(990: _SDT_NOP) > +#endif > + > #define _SDT_NOTE_NAME "stapsdt" > #define _SDT_NOTE_TYPE 3 > > @@ -286,7 +293,7 @@ __extension__ extern unsigned long long __sdt_unsp; > > #define _SDT_ASM_BODY(provider, name, pack_args, args, ...) \ > _SDT_DEF_MACROS \ > - _SDT_ASM_1(990: _SDT_NOP) \ > + _SDT_DEF_NOP \ > _SDT_ASM_3( .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \ > _SDT_ASM_1( .balign 4) \ > _SDT_ASM_3( .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE) \ > -- > 2.47.0 >
On Fri, Dec 13, 2024 at 01:58:33PM -0800, Andrii Nakryiko wrote: > On Wed, Dec 11, 2024 at 5:35 AM Jiri Olsa <jolsa@kernel.org> wrote: > > > > Using 5-byte nop for x86 usdt probes so we can switch > > to optimized uprobe them. > > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org> > > --- > > tools/testing/selftests/bpf/sdt.h | 9 ++++++++- > > 1 file changed, 8 insertions(+), 1 deletion(-) > > > > This change will make it impossible to run latest selftests on older > kernels. Let's do what we did with -DENABLE_ATOMICS_TESTS and allow to > disable this through Makefile, ok? ok, I wanted to start addressing this after this version so the problem is using this macro with nop5 in application running on older kernels that do not have nop5 emulation and uprobe syscall optimization, because uprobe/usdt on top of nop5 (single-stepped) will be slower than on top of current nop1 (emulated) AFAICS selftests should still work, just bit slower due to nop5 emulation one part of the solution would be to backport [1] to stable kernels which is an easy fix (even though it needs changes now) if that's not enough we'd need to come up with that nop1/nop5 macro solution, where tooling (libbpf with extra data in usdt note) would install uprobe on top of nop1 on older kernels and on top of nop5 on new ones.. but that'd need more work of course jirka [1] patch#7 - uprobes/x86: Add support to emulate nop5 instruction > > > > diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h > > index ca0162b4dc57..7ac9291f45f1 100644 > > --- a/tools/testing/selftests/bpf/sdt.h > > +++ b/tools/testing/selftests/bpf/sdt.h > > @@ -234,6 +234,13 @@ __extension__ extern unsigned long long __sdt_unsp; > > #define _SDT_NOP nop > > #endif > > > > +/* Use 5 byte nop for x86_64 to allow optimizing uprobes. */ > > +#if defined(__x86_64__) > > +# define _SDT_DEF_NOP _SDT_ASM_5(990: .byte 0x0f, 0x1f, 0x44, 0x00, 0x00) > > +#else > > +# define _SDT_DEF_NOP _SDT_ASM_1(990: _SDT_NOP) > > +#endif > > + > > #define _SDT_NOTE_NAME "stapsdt" > > #define _SDT_NOTE_TYPE 3 > > > > @@ -286,7 +293,7 @@ __extension__ extern unsigned long long __sdt_unsp; > > > > #define _SDT_ASM_BODY(provider, name, pack_args, args, ...) \ > > _SDT_DEF_MACROS \ > > - _SDT_ASM_1(990: _SDT_NOP) \ > > + _SDT_DEF_NOP \ > > _SDT_ASM_3( .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \ > > _SDT_ASM_1( .balign 4) \ > > _SDT_ASM_3( .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE) \ > > -- > > 2.47.0 > >
On Mon, Dec 16, 2024 at 12:32 AM Jiri Olsa <olsajiri@gmail.com> wrote: > > On Fri, Dec 13, 2024 at 01:58:33PM -0800, Andrii Nakryiko wrote: > > On Wed, Dec 11, 2024 at 5:35 AM Jiri Olsa <jolsa@kernel.org> wrote: > > > > > > Using 5-byte nop for x86 usdt probes so we can switch > > > to optimized uprobe them. > > > > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org> > > > --- > > > tools/testing/selftests/bpf/sdt.h | 9 ++++++++- > > > 1 file changed, 8 insertions(+), 1 deletion(-) > > > > > > > This change will make it impossible to run latest selftests on older > > kernels. Let's do what we did with -DENABLE_ATOMICS_TESTS and allow to > > disable this through Makefile, ok? > > ok, I wanted to start addressing this after this version > > so the problem is using this macro with nop5 in application running > on older kernels that do not have nop5 emulation and uprobe syscall > optimization, because uprobe/usdt on top of nop5 (single-stepped) > will be slower than on top of current nop1 (emulated) > > AFAICS selftests should still work, just bit slower due to nop5 emulation ah, fair enough, it won't break anything, true > > one part of the solution would be to backport [1] to stable kernels > which is an easy fix (even though it needs changes now) > > if that's not enough we'd need to come up with that nop1/nop5 macro > solution, where tooling (libbpf with extra data in usdt note) would > install uprobe on top of nop1 on older kernels and on top of nop5 on > new ones.. but that'd need more work of course yes, I think that's the way we should go, but as you said, that's outside of kernel and we should work on that as a follow up to this series > > jirka > > > [1] patch#7 - uprobes/x86: Add support to emulate nop5 instruction > > > > > > > > > diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h > > > index ca0162b4dc57..7ac9291f45f1 100644 > > > --- a/tools/testing/selftests/bpf/sdt.h > > > +++ b/tools/testing/selftests/bpf/sdt.h > > > @@ -234,6 +234,13 @@ __extension__ extern unsigned long long __sdt_unsp; > > > #define _SDT_NOP nop > > > #endif > > > > > > +/* Use 5 byte nop for x86_64 to allow optimizing uprobes. */ > > > +#if defined(__x86_64__) > > > +# define _SDT_DEF_NOP _SDT_ASM_5(990: .byte 0x0f, 0x1f, 0x44, 0x00, 0x00) > > > +#else > > > +# define _SDT_DEF_NOP _SDT_ASM_1(990: _SDT_NOP) > > > +#endif > > > + > > > #define _SDT_NOTE_NAME "stapsdt" > > > #define _SDT_NOTE_TYPE 3 > > > > > > @@ -286,7 +293,7 @@ __extension__ extern unsigned long long __sdt_unsp; > > > > > > #define _SDT_ASM_BODY(provider, name, pack_args, args, ...) \ > > > _SDT_DEF_MACROS \ > > > - _SDT_ASM_1(990: _SDT_NOP) \ > > > + _SDT_DEF_NOP \ > > > _SDT_ASM_3( .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \ > > > _SDT_ASM_1( .balign 4) \ > > > _SDT_ASM_3( .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE) \ > > > -- > > > 2.47.0 > > >
diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h index ca0162b4dc57..7ac9291f45f1 100644 --- a/tools/testing/selftests/bpf/sdt.h +++ b/tools/testing/selftests/bpf/sdt.h @@ -234,6 +234,13 @@ __extension__ extern unsigned long long __sdt_unsp; #define _SDT_NOP nop #endif +/* Use 5 byte nop for x86_64 to allow optimizing uprobes. */ +#if defined(__x86_64__) +# define _SDT_DEF_NOP _SDT_ASM_5(990: .byte 0x0f, 0x1f, 0x44, 0x00, 0x00) +#else +# define _SDT_DEF_NOP _SDT_ASM_1(990: _SDT_NOP) +#endif + #define _SDT_NOTE_NAME "stapsdt" #define _SDT_NOTE_TYPE 3 @@ -286,7 +293,7 @@ __extension__ extern unsigned long long __sdt_unsp; #define _SDT_ASM_BODY(provider, name, pack_args, args, ...) \ _SDT_DEF_MACROS \ - _SDT_ASM_1(990: _SDT_NOP) \ + _SDT_DEF_NOP \ _SDT_ASM_3( .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \ _SDT_ASM_1( .balign 4) \ _SDT_ASM_3( .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE) \
Using 5-byte nop for x86 usdt probes so we can switch to optimized uprobe them. Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/testing/selftests/bpf/sdt.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)