diff mbox series

[v4,5/9] ftrace: Add ftrace_partial_regs() for converting ftrace_regs to pt_regs

Message ID 169280378611.282662.4078983611827223131.stgit@devnote2 (mailing list archive)
State Superseded
Headers show
Series bpf: fprobe: rethook: Use ftrace_regs instead of pt_regs | expand

Commit Message

Masami Hiramatsu (Google) Aug. 23, 2023, 3:16 p.m. UTC
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Add ftrace_partial_regs() which converts the ftrace_regs to pt_regs.
If the architecture defines its own ftrace_regs, this copies partial
registers to pt_regs and returns it. If not, ftrace_regs is the same as
pt_regs and ftrace_partial_regs() will return ftrace_regs::regs.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Florent Revest <revest@chromium.org>
---
 Changes in v3:
  - Fix to use pt_regs::regs instead of x.
  - Return ftrace_regs::regs forcibly if HAVE_PT_REGS_COMPAT_FTRACE_REGS=y.
  - Fix typo.
  - Fix to copy correct registers to the pt_regs on arm64.
 Changes in v4:
  - Change the patch order in the series so that fprobe event can use this.
---
 arch/arm64/include/asm/ftrace.h |   11 +++++++++++
 include/linux/ftrace.h          |   17 +++++++++++++++++
 2 files changed, 28 insertions(+)

Comments

Andrii Nakryiko Aug. 25, 2023, 9:49 p.m. UTC | #1
On Wed, Aug 23, 2023 at 8:16 AM Masami Hiramatsu (Google)
<mhiramat@kernel.org> wrote:
>
> From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
>
> Add ftrace_partial_regs() which converts the ftrace_regs to pt_regs.
> If the architecture defines its own ftrace_regs, this copies partial
> registers to pt_regs and returns it. If not, ftrace_regs is the same as
> pt_regs and ftrace_partial_regs() will return ftrace_regs::regs.
>
> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> Acked-by: Florent Revest <revest@chromium.org>
> ---
>  Changes in v3:
>   - Fix to use pt_regs::regs instead of x.
>   - Return ftrace_regs::regs forcibly if HAVE_PT_REGS_COMPAT_FTRACE_REGS=y.
>   - Fix typo.
>   - Fix to copy correct registers to the pt_regs on arm64.
>  Changes in v4:
>   - Change the patch order in the series so that fprobe event can use this.
> ---
>  arch/arm64/include/asm/ftrace.h |   11 +++++++++++
>  include/linux/ftrace.h          |   17 +++++++++++++++++
>  2 files changed, 28 insertions(+)
>
> diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
> index ab158196480c..5ad24f315d52 100644
> --- a/arch/arm64/include/asm/ftrace.h
> +++ b/arch/arm64/include/asm/ftrace.h
> @@ -137,6 +137,17 @@ ftrace_override_function_with_return(struct ftrace_regs *fregs)
>         fregs->pc = fregs->lr;
>  }
>
> +static __always_inline struct pt_regs *
> +ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
> +{
> +       memcpy(regs->regs, fregs->regs, sizeof(u64) * 9);
> +       regs->sp = fregs->sp;
> +       regs->pc = fregs->pc;
> +       regs->regs[29] = fregs->fp;
> +       regs->regs[30] = fregs->lr;

I see that orig_x0 from pt_regs is used on arm64 to get syscall's
first parameter. And it seems like this ftrace_regs to pt_regs
conversion doesn't touch orig_x0 at all. Is it maintained/preserved
somewhere else, or will we lose the ability to get the first syscall's
argument?

Looking at libbpf's bpf_tracing.h, other than orig_x0, I think all the
other registers are still preserved, so this seems to be the only
potential problem.


> +       return regs;
> +}
> +
>  int ftrace_regs_query_register_offset(const char *name);
>
>  int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index c0a42d0860b8..a6ed2aa71efc 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -165,6 +165,23 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs
>         return arch_ftrace_get_regs(fregs);
>  }
>
> +#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
> +       defined(CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST)
> +
> +static __always_inline struct pt_regs *
> +ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
> +{
> +       /*
> +        * If CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y, ftrace_regs memory
> +        * layout is the same as pt_regs. So always returns that address.
> +        * Since arch_ftrace_get_regs() will check some members and may return
> +        * NULL, we can not use it.
> +        */
> +       return &fregs->regs;
> +}
> +
> +#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
> +
>  /*
>   * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
>   * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.
>
>
Masami Hiramatsu (Google) Aug. 26, 2023, 1:56 a.m. UTC | #2
On Fri, 25 Aug 2023 14:49:48 -0700
Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:

> On Wed, Aug 23, 2023 at 8:16 AM Masami Hiramatsu (Google)
> <mhiramat@kernel.org> wrote:
> >
> > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> >
> > Add ftrace_partial_regs() which converts the ftrace_regs to pt_regs.
> > If the architecture defines its own ftrace_regs, this copies partial
> > registers to pt_regs and returns it. If not, ftrace_regs is the same as
> > pt_regs and ftrace_partial_regs() will return ftrace_regs::regs.
> >
> > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > Acked-by: Florent Revest <revest@chromium.org>
> > ---
> >  Changes in v3:
> >   - Fix to use pt_regs::regs instead of x.
> >   - Return ftrace_regs::regs forcibly if HAVE_PT_REGS_COMPAT_FTRACE_REGS=y.
> >   - Fix typo.
> >   - Fix to copy correct registers to the pt_regs on arm64.
> >  Changes in v4:
> >   - Change the patch order in the series so that fprobe event can use this.
> > ---
> >  arch/arm64/include/asm/ftrace.h |   11 +++++++++++
> >  include/linux/ftrace.h          |   17 +++++++++++++++++
> >  2 files changed, 28 insertions(+)
> >
> > diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
> > index ab158196480c..5ad24f315d52 100644
> > --- a/arch/arm64/include/asm/ftrace.h
> > +++ b/arch/arm64/include/asm/ftrace.h
> > @@ -137,6 +137,17 @@ ftrace_override_function_with_return(struct ftrace_regs *fregs)
> >         fregs->pc = fregs->lr;
> >  }
> >
> > +static __always_inline struct pt_regs *
> > +ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
> > +{
> > +       memcpy(regs->regs, fregs->regs, sizeof(u64) * 9);
> > +       regs->sp = fregs->sp;
> > +       regs->pc = fregs->pc;
> > +       regs->regs[29] = fregs->fp;
> > +       regs->regs[30] = fregs->lr;
> 
> I see that orig_x0 from pt_regs is used on arm64 to get syscall's
> first parameter. And it seems like this ftrace_regs to pt_regs
> conversion doesn't touch orig_x0 at all. Is it maintained/preserved
> somewhere else, or will we lose the ability to get the first syscall's
> argument?

Thanks for checking it!

Does BPF uses kprobe probe to trace syscalls? Since we have raw_syscall
trace events, no need to use kprobe to do that. (and I don't recommend to
use kprobe to do such fixed event)

> 
> Looking at libbpf's bpf_tracing.h, other than orig_x0, I think all the
> other registers are still preserved, so this seems to be the only
> potential problem.

Great!

Thank you,

> 
> 
> > +       return regs;
> > +}
> > +
> >  int ftrace_regs_query_register_offset(const char *name);
> >
> >  int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
> > diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> > index c0a42d0860b8..a6ed2aa71efc 100644
> > --- a/include/linux/ftrace.h
> > +++ b/include/linux/ftrace.h
> > @@ -165,6 +165,23 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs
> >         return arch_ftrace_get_regs(fregs);
> >  }
> >
> > +#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
> > +       defined(CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST)
> > +
> > +static __always_inline struct pt_regs *
> > +ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
> > +{
> > +       /*
> > +        * If CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y, ftrace_regs memory
> > +        * layout is the same as pt_regs. So always returns that address.
> > +        * Since arch_ftrace_get_regs() will check some members and may return
> > +        * NULL, we can not use it.
> > +        */
> > +       return &fregs->regs;
> > +}
> > +
> > +#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
> > +
> >  /*
> >   * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
> >   * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.
> >
> >
Andrii Nakryiko Sept. 5, 2023, 7:50 p.m. UTC | #3
On Fri, Aug 25, 2023 at 6:56 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Fri, 25 Aug 2023 14:49:48 -0700
> Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> > On Wed, Aug 23, 2023 at 8:16 AM Masami Hiramatsu (Google)
> > <mhiramat@kernel.org> wrote:
> > >
> > > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > >
> > > Add ftrace_partial_regs() which converts the ftrace_regs to pt_regs.
> > > If the architecture defines its own ftrace_regs, this copies partial
> > > registers to pt_regs and returns it. If not, ftrace_regs is the same as
> > > pt_regs and ftrace_partial_regs() will return ftrace_regs::regs.
> > >
> > > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > Acked-by: Florent Revest <revest@chromium.org>
> > > ---
> > >  Changes in v3:
> > >   - Fix to use pt_regs::regs instead of x.
> > >   - Return ftrace_regs::regs forcibly if HAVE_PT_REGS_COMPAT_FTRACE_REGS=y.
> > >   - Fix typo.
> > >   - Fix to copy correct registers to the pt_regs on arm64.
> > >  Changes in v4:
> > >   - Change the patch order in the series so that fprobe event can use this.
> > > ---
> > >  arch/arm64/include/asm/ftrace.h |   11 +++++++++++
> > >  include/linux/ftrace.h          |   17 +++++++++++++++++
> > >  2 files changed, 28 insertions(+)
> > >
> > > diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
> > > index ab158196480c..5ad24f315d52 100644
> > > --- a/arch/arm64/include/asm/ftrace.h
> > > +++ b/arch/arm64/include/asm/ftrace.h
> > > @@ -137,6 +137,17 @@ ftrace_override_function_with_return(struct ftrace_regs *fregs)
> > >         fregs->pc = fregs->lr;
> > >  }
> > >
> > > +static __always_inline struct pt_regs *
> > > +ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
> > > +{
> > > +       memcpy(regs->regs, fregs->regs, sizeof(u64) * 9);
> > > +       regs->sp = fregs->sp;
> > > +       regs->pc = fregs->pc;
> > > +       regs->regs[29] = fregs->fp;
> > > +       regs->regs[30] = fregs->lr;
> >
> > I see that orig_x0 from pt_regs is used on arm64 to get syscall's
> > first parameter. And it seems like this ftrace_regs to pt_regs
> > conversion doesn't touch orig_x0 at all. Is it maintained/preserved
> > somewhere else, or will we lose the ability to get the first syscall's
> > argument?
>
> Thanks for checking it!
>
> Does BPF uses kprobe probe to trace syscalls? Since we have raw_syscall
> trace events, no need to use kprobe to do that. (and I don't recommend to
> use kprobe to do such fixed event)

Yeah, lots of tools and projects actually trace syscalls with kprobes.
I don't think there is anything we can do to quickly change that, so
we should avoid breaking all of them.

So getting back to my original question, is it possible to preserve orig_x0?

>
> >
> > Looking at libbpf's bpf_tracing.h, other than orig_x0, I think all the
> > other registers are still preserved, so this seems to be the only
> > potential problem.
>
> Great!
>
> Thank you,
>
> >
> >
> > > +       return regs;
> > > +}
> > > +
> > >  int ftrace_regs_query_register_offset(const char *name);
> > >
> > >  int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
> > > diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> > > index c0a42d0860b8..a6ed2aa71efc 100644
> > > --- a/include/linux/ftrace.h
> > > +++ b/include/linux/ftrace.h
> > > @@ -165,6 +165,23 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs
> > >         return arch_ftrace_get_regs(fregs);
> > >  }
> > >
> > > +#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
> > > +       defined(CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST)
> > > +
> > > +static __always_inline struct pt_regs *
> > > +ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
> > > +{
> > > +       /*
> > > +        * If CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y, ftrace_regs memory
> > > +        * layout is the same as pt_regs. So always returns that address.
> > > +        * Since arch_ftrace_get_regs() will check some members and may return
> > > +        * NULL, we can not use it.
> > > +        */
> > > +       return &fregs->regs;
> > > +}
> > > +
> > > +#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
> > > +
> > >  /*
> > >   * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
> > >   * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.
> > >
> > >
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
Masami Hiramatsu (Google) Sept. 6, 2023, 12:28 a.m. UTC | #4
On Tue, 5 Sep 2023 12:50:28 -0700
Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:

> On Fri, Aug 25, 2023 at 6:56 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> >
> > On Fri, 25 Aug 2023 14:49:48 -0700
> > Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> >
> > > On Wed, Aug 23, 2023 at 8:16 AM Masami Hiramatsu (Google)
> > > <mhiramat@kernel.org> wrote:
> > > >
> > > > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > >
> > > > Add ftrace_partial_regs() which converts the ftrace_regs to pt_regs.
> > > > If the architecture defines its own ftrace_regs, this copies partial
> > > > registers to pt_regs and returns it. If not, ftrace_regs is the same as
> > > > pt_regs and ftrace_partial_regs() will return ftrace_regs::regs.
> > > >
> > > > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > > Acked-by: Florent Revest <revest@chromium.org>
> > > > ---
> > > >  Changes in v3:
> > > >   - Fix to use pt_regs::regs instead of x.
> > > >   - Return ftrace_regs::regs forcibly if HAVE_PT_REGS_COMPAT_FTRACE_REGS=y.
> > > >   - Fix typo.
> > > >   - Fix to copy correct registers to the pt_regs on arm64.
> > > >  Changes in v4:
> > > >   - Change the patch order in the series so that fprobe event can use this.
> > > > ---
> > > >  arch/arm64/include/asm/ftrace.h |   11 +++++++++++
> > > >  include/linux/ftrace.h          |   17 +++++++++++++++++
> > > >  2 files changed, 28 insertions(+)
> > > >
> > > > diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
> > > > index ab158196480c..5ad24f315d52 100644
> > > > --- a/arch/arm64/include/asm/ftrace.h
> > > > +++ b/arch/arm64/include/asm/ftrace.h
> > > > @@ -137,6 +137,17 @@ ftrace_override_function_with_return(struct ftrace_regs *fregs)
> > > >         fregs->pc = fregs->lr;
> > > >  }
> > > >
> > > > +static __always_inline struct pt_regs *
> > > > +ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
> > > > +{
> > > > +       memcpy(regs->regs, fregs->regs, sizeof(u64) * 9);
> > > > +       regs->sp = fregs->sp;
> > > > +       regs->pc = fregs->pc;
> > > > +       regs->regs[29] = fregs->fp;
> > > > +       regs->regs[30] = fregs->lr;
> > >
> > > I see that orig_x0 from pt_regs is used on arm64 to get syscall's
> > > first parameter. And it seems like this ftrace_regs to pt_regs
> > > conversion doesn't touch orig_x0 at all. Is it maintained/preserved
> > > somewhere else, or will we lose the ability to get the first syscall's
> > > argument?
> >
> > Thanks for checking it!
> >
> > Does BPF uses kprobe probe to trace syscalls? Since we have raw_syscall
> > trace events, no need to use kprobe to do that. (and I don't recommend to
> > use kprobe to do such fixed event)
> 
> Yeah, lots of tools and projects actually trace syscalls with kprobes.
> I don't think there is anything we can do to quickly change that, so
> we should avoid breaking all of them.

Yes, ah, but anyway, this is the fprobe case, not kprobe. Do you use
multi_kprobes for tracing syscalls? 

Jiri, do you know when the multi-kprobe feature is used? Is that used
implicitly or explicitly?

> 
> So getting back to my original question, is it possible to preserve orig_x0?

I'm curious that the orig_x0 is stored to pt_regs of the kprobes itself
because it is not a real register. There is no way to access it. You can
use regs->x0 instead of that.

I think the orig_x0 is stored when the syscall happened because it has
another user pt_regs on the stack, right?

If so, we don't need to save orig_x0 on the pt_regs for kprobes, but user can
dig the stack to find the orig_x0. Here the arm64, syscall entry handler,

static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
                           const syscall_fn_t syscall_table[])
{
        unsigned long flags = read_thread_flags();

        regs->orig_x0 = regs->regs[0];
        regs->syscallno = scno;

(BTW, it seems syscall number is saved on regs->syscallno.)

It seems that if you probe the el0_svc_common() for tracing syscall,
all you need is tracing $arg1 == pt_regs and $arg2 == syscall number.

Thank you,

> 
> >
> > >
> > > Looking at libbpf's bpf_tracing.h, other than orig_x0, I think all the
> > > other registers are still preserved, so this seems to be the only
> > > potential problem.
> >
> > Great!
> >
> > Thank you,
> >
> > >
> > >
> > > > +       return regs;
> > > > +}
> > > > +
> > > >  int ftrace_regs_query_register_offset(const char *name);
> > > >
> > > >  int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
> > > > diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> > > > index c0a42d0860b8..a6ed2aa71efc 100644
> > > > --- a/include/linux/ftrace.h
> > > > +++ b/include/linux/ftrace.h
> > > > @@ -165,6 +165,23 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs
> > > >         return arch_ftrace_get_regs(fregs);
> > > >  }
> > > >
> > > > +#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
> > > > +       defined(CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST)
> > > > +
> > > > +static __always_inline struct pt_regs *
> > > > +ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
> > > > +{
> > > > +       /*
> > > > +        * If CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y, ftrace_regs memory
> > > > +        * layout is the same as pt_regs. So always returns that address.
> > > > +        * Since arch_ftrace_get_regs() will check some members and may return
> > > > +        * NULL, we can not use it.
> > > > +        */
> > > > +       return &fregs->regs;
> > > > +}
> > > > +
> > > > +#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
> > > > +
> > > >  /*
> > > >   * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
> > > >   * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.
> > > >
> > > >
> >
> >
> > --
> > Masami Hiramatsu (Google) <mhiramat@kernel.org>
Andrii Nakryiko Sept. 8, 2023, 10:56 p.m. UTC | #5
On Tue, Sep 5, 2023 at 5:28 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Tue, 5 Sep 2023 12:50:28 -0700
> Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> > On Fri, Aug 25, 2023 at 6:56 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > >
> > > On Fri, 25 Aug 2023 14:49:48 -0700
> > > Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > >
> > > > On Wed, Aug 23, 2023 at 8:16 AM Masami Hiramatsu (Google)
> > > > <mhiramat@kernel.org> wrote:
> > > > >
> > > > > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > > >
> > > > > Add ftrace_partial_regs() which converts the ftrace_regs to pt_regs.
> > > > > If the architecture defines its own ftrace_regs, this copies partial
> > > > > registers to pt_regs and returns it. If not, ftrace_regs is the same as
> > > > > pt_regs and ftrace_partial_regs() will return ftrace_regs::regs.
> > > > >
> > > > > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > > > Acked-by: Florent Revest <revest@chromium.org>
> > > > > ---
> > > > >  Changes in v3:
> > > > >   - Fix to use pt_regs::regs instead of x.
> > > > >   - Return ftrace_regs::regs forcibly if HAVE_PT_REGS_COMPAT_FTRACE_REGS=y.
> > > > >   - Fix typo.
> > > > >   - Fix to copy correct registers to the pt_regs on arm64.
> > > > >  Changes in v4:
> > > > >   - Change the patch order in the series so that fprobe event can use this.
> > > > > ---
> > > > >  arch/arm64/include/asm/ftrace.h |   11 +++++++++++
> > > > >  include/linux/ftrace.h          |   17 +++++++++++++++++
> > > > >  2 files changed, 28 insertions(+)
> > > > >
> > > > > diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
> > > > > index ab158196480c..5ad24f315d52 100644
> > > > > --- a/arch/arm64/include/asm/ftrace.h
> > > > > +++ b/arch/arm64/include/asm/ftrace.h
> > > > > @@ -137,6 +137,17 @@ ftrace_override_function_with_return(struct ftrace_regs *fregs)
> > > > >         fregs->pc = fregs->lr;
> > > > >  }
> > > > >
> > > > > +static __always_inline struct pt_regs *
> > > > > +ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
> > > > > +{
> > > > > +       memcpy(regs->regs, fregs->regs, sizeof(u64) * 9);
> > > > > +       regs->sp = fregs->sp;
> > > > > +       regs->pc = fregs->pc;
> > > > > +       regs->regs[29] = fregs->fp;
> > > > > +       regs->regs[30] = fregs->lr;
> > > >
> > > > I see that orig_x0 from pt_regs is used on arm64 to get syscall's
> > > > first parameter. And it seems like this ftrace_regs to pt_regs
> > > > conversion doesn't touch orig_x0 at all. Is it maintained/preserved
> > > > somewhere else, or will we lose the ability to get the first syscall's
> > > > argument?
> > >
> > > Thanks for checking it!
> > >
> > > Does BPF uses kprobe probe to trace syscalls? Since we have raw_syscall
> > > trace events, no need to use kprobe to do that. (and I don't recommend to
> > > use kprobe to do such fixed event)
> >
> > Yeah, lots of tools and projects actually trace syscalls with kprobes.
> > I don't think there is anything we can do to quickly change that, so
> > we should avoid breaking all of them.
>
> Yes, ah, but anyway, this is the fprobe case, not kprobe. Do you use
> multi_kprobes for tracing syscalls?
>
> Jiri, do you know when the multi-kprobe feature is used? Is that used
> implicitly or explicitly?

ah, ok, so all this fprobe machinery is not used for (single-)kprobes?
This makes it a bit less painful for end users, I believe most syscall
tracing kprobes right now are not multi-kprobes.

>
> >
> > So getting back to my original question, is it possible to preserve orig_x0?
>
> I'm curious that the orig_x0 is stored to pt_regs of the kprobes itself
> because it is not a real register. There is no way to access it. You can
> use regs->x0 instead of that.
>
> I think the orig_x0 is stored when the syscall happened because it has
> another user pt_regs on the stack, right?
>
> If so, we don't need to save orig_x0 on the pt_regs for kprobes, but user can
> dig the stack to find the orig_x0. Here the arm64, syscall entry handler,
>
> static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
>                            const syscall_fn_t syscall_table[])
> {
>         unsigned long flags = read_thread_flags();
>
>         regs->orig_x0 = regs->regs[0];
>         regs->syscallno = scno;
>
> (BTW, it seems syscall number is saved on regs->syscallno.)
>
> It seems that if you probe the el0_svc_common() for tracing syscall,
> all you need is tracing $arg1 == pt_regs and $arg2 == syscall number.
>
> Thank you,
>
> >
> > >
> > > >
> > > > Looking at libbpf's bpf_tracing.h, other than orig_x0, I think all the
> > > > other registers are still preserved, so this seems to be the only
> > > > potential problem.
> > >
> > > Great!
> > >
> > > Thank you,
> > >
> > > >
> > > >
> > > > > +       return regs;
> > > > > +}
> > > > > +
> > > > >  int ftrace_regs_query_register_offset(const char *name);
> > > > >
> > > > >  int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
> > > > > diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> > > > > index c0a42d0860b8..a6ed2aa71efc 100644
> > > > > --- a/include/linux/ftrace.h
> > > > > +++ b/include/linux/ftrace.h
> > > > > @@ -165,6 +165,23 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs
> > > > >         return arch_ftrace_get_regs(fregs);
> > > > >  }
> > > > >
> > > > > +#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
> > > > > +       defined(CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST)
> > > > > +
> > > > > +static __always_inline struct pt_regs *
> > > > > +ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
> > > > > +{
> > > > > +       /*
> > > > > +        * If CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y, ftrace_regs memory
> > > > > +        * layout is the same as pt_regs. So always returns that address.
> > > > > +        * Since arch_ftrace_get_regs() will check some members and may return
> > > > > +        * NULL, we can not use it.
> > > > > +        */
> > > > > +       return &fregs->regs;
> > > > > +}
> > > > > +
> > > > > +#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
> > > > > +
> > > > >  /*
> > > > >   * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
> > > > >   * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.
> > > > >
> > > > >
> > >
> > >
> > > --
> > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index ab158196480c..5ad24f315d52 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -137,6 +137,17 @@  ftrace_override_function_with_return(struct ftrace_regs *fregs)
 	fregs->pc = fregs->lr;
 }
 
+static __always_inline struct pt_regs *
+ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+	memcpy(regs->regs, fregs->regs, sizeof(u64) * 9);
+	regs->sp = fregs->sp;
+	regs->pc = fregs->pc;
+	regs->regs[29] = fregs->fp;
+	regs->regs[30] = fregs->lr;
+	return regs;
+}
+
 int ftrace_regs_query_register_offset(const char *name);
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index c0a42d0860b8..a6ed2aa71efc 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -165,6 +165,23 @@  static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs
 	return arch_ftrace_get_regs(fregs);
 }
 
+#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
+	defined(CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST)
+
+static __always_inline struct pt_regs *
+ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+	/*
+	 * If CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y, ftrace_regs memory
+	 * layout is the same as pt_regs. So always returns that address.
+	 * Since arch_ftrace_get_regs() will check some members and may return
+	 * NULL, we can not use it.
+	 */
+	return &fregs->regs;
+}
+
+#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
+
 /*
  * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
  * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.