diff mbox series

[v7,3/3] arm64: implement KPROBES_ON_FTRACE

Message ID 20191225173001.6c0e3fb2@xhacker.debian (mailing list archive)
State New, archived
Headers show
Series arm64: implement KPROBES_ON_FTRACE | expand

Commit Message

Jisheng Zhang Dec. 25, 2019, 9:44 a.m. UTC
KPROBES_ON_FTRACE avoids much of the overhead with regular kprobes as it
eliminates the need for a trap, as well as the need to emulate or
single-step instructions.

Tested on berlin arm64 platform.

~ # mount -t debugfs debugfs /sys/kernel/debug/
~ # cd /sys/kernel/debug/
/sys/kernel/debug # echo 'p _do_fork' > tracing/kprobe_events

before the patch:

/sys/kernel/debug # cat kprobes/list
ffffff801009fe28  k  _do_fork+0x0    [DISABLED]

after the patch:

/sys/kernel/debug # cat kprobes/list
ffffff801009ff54  k  _do_fork+0x0    [DISABLED][FTRACE]

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
---
 .../debug/kprobes-on-ftrace/arch-support.txt  |  2 +-
 arch/arm64/Kconfig                            |  1 +
 arch/arm64/include/asm/ftrace.h               |  1 +
 arch/arm64/kernel/probes/Makefile             |  1 +
 arch/arm64/kernel/probes/ftrace.c             | 78 +++++++++++++++++++
 5 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kernel/probes/ftrace.c

Comments

Masami Hiramatsu (Google) Dec. 26, 2019, 2:57 a.m. UTC | #1
Hi Jisheng,

On Wed, 25 Dec 2019 09:44:21 +0000
Jisheng Zhang <Jisheng.Zhang@synaptics.com> wrote:

> KPROBES_ON_FTRACE avoids much of the overhead with regular kprobes as it
> eliminates the need for a trap, as well as the need to emulate or
> single-step instructions.
> 
> Tested on berlin arm64 platform.
> 
> ~ # mount -t debugfs debugfs /sys/kernel/debug/
> ~ # cd /sys/kernel/debug/
> /sys/kernel/debug # echo 'p _do_fork' > tracing/kprobe_events
> 
> before the patch:
> 
> /sys/kernel/debug # cat kprobes/list
> ffffff801009fe28  k  _do_fork+0x0    [DISABLED]
> 
> after the patch:
> 
> /sys/kernel/debug # cat kprobes/list
> ffffff801009ff54  k  _do_fork+0x0    [DISABLED][FTRACE]

What happens if user puts a probe on _do_fork+4?
Is that return -EILSEQ correctly?

> 
> Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
> ---
>  .../debug/kprobes-on-ftrace/arch-support.txt  |  2 +-
>  arch/arm64/Kconfig                            |  1 +
>  arch/arm64/include/asm/ftrace.h               |  1 +
>  arch/arm64/kernel/probes/Makefile             |  1 +
>  arch/arm64/kernel/probes/ftrace.c             | 78 +++++++++++++++++++
>  5 files changed, 82 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/kernel/probes/ftrace.c
> 
> diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
> index 4fae0464ddff..f9dd9dd91e0c 100644
> --- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
> +++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
> @@ -9,7 +9,7 @@
>      |       alpha: | TODO |
>      |         arc: | TODO |
>      |         arm: | TODO |
> -    |       arm64: | TODO |
> +    |       arm64: |  ok  |
>      |         c6x: | TODO |
>      |        csky: | TODO |
>      |       h8300: | TODO |
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index b1b4476ddb83..92b9882889ac 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -166,6 +166,7 @@ config ARM64
>  	select HAVE_STACKPROTECTOR
>  	select HAVE_SYSCALL_TRACEPOINTS
>  	select HAVE_KPROBES
> +	select HAVE_KPROBES_ON_FTRACE
>  	select HAVE_KRETPROBES
>  	select HAVE_GENERIC_VDSO
>  	select IOMMU_DMA if IOMMU_SUPPORT
> diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
> index 91fa4baa1a93..875aeb839654 100644
> --- a/arch/arm64/include/asm/ftrace.h
> +++ b/arch/arm64/include/asm/ftrace.h
> @@ -20,6 +20,7 @@
>  
>  /* The BL at the callsite's adjusted rec->ip */
>  #define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE
> +#define FTRACE_IP_EXTENSION	MCOUNT_INSN_SIZE
>  
>  #define FTRACE_PLT_IDX		0
>  #define FTRACE_REGS_PLT_IDX	1
> diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile
> index 8e4be92e25b1..4020cfc66564 100644
> --- a/arch/arm64/kernel/probes/Makefile
> +++ b/arch/arm64/kernel/probes/Makefile
> @@ -4,3 +4,4 @@ obj-$(CONFIG_KPROBES)		+= kprobes.o decode-insn.o	\
>  				   simulate-insn.o
>  obj-$(CONFIG_UPROBES)		+= uprobes.o decode-insn.o	\
>  				   simulate-insn.o
> +obj-$(CONFIG_KPROBES_ON_FTRACE)	+= ftrace.o
> diff --git a/arch/arm64/kernel/probes/ftrace.c b/arch/arm64/kernel/probes/ftrace.c
> new file mode 100644
> index 000000000000..0643aa2dacdb
> --- /dev/null
> +++ b/arch/arm64/kernel/probes/ftrace.c
> @@ -0,0 +1,78 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Dynamic Ftrace based Kprobes Optimization
> + *
> + * Copyright (C) Hitachi Ltd., 2012
> + * Copyright (C) 2019 Jisheng Zhang <jszhang@kernel.org>
> + *		      Synaptics Incorporated
> + */
> +
> +#include <linux/kprobes.h>
> +
> +/*
> + * In arm64 FTRACE_WITH_REGS implementation, we patch two nop instructions:
> + * the lr saver and bl ftrace-entry. Both these instructions are claimed
> + * by ftrace and we should allow probing on either instruction.

No, the 2nd bl ftrace-entry must not be probed.
The pair of lr-saver and bl ftrace-entry is tightly coupled. You can not
decouple it.

> + */
> +int arch_check_ftrace_location(struct kprobe *p)
> +{
> +	if (ftrace_location((unsigned long)p->addr))
> +		p->flags |= KPROBE_FLAG_FTRACE;
> +	return 0;
> +}

Thus, this must return -EILSEQ if user puts a probe on the bl.

> +
> +/* Ftrace callback handler for kprobes -- called under preepmt disabed */
> +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
> +			   struct ftrace_ops *ops, struct pt_regs *regs)
> +{
> +	bool lr_saver = false;
> +	struct kprobe *p;
> +	struct kprobe_ctlblk *kcb;
> +
> +	/* Preempt is disabled by ftrace */
> +	p = get_kprobe((kprobe_opcode_t *)ip);
> +	if (!p) {
> +		p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE));
> +		if (unlikely(!p) || kprobe_disabled(p))
> +			return;
> +		lr_saver = true;

Then, this can be removed.

> +	}
> +
> +	kcb = get_kprobe_ctlblk();
> +	if (kprobe_running()) {
> +		kprobes_inc_nmissed_count(p);
> +	} else {
> +		unsigned long orig_ip = instruction_pointer(regs);
> +
> +		if (lr_saver)
> +			ip -= MCOUNT_INSN_SIZE;

Ditto.

Thank you,

> +		instruction_pointer_set(regs, ip);
> +		__this_cpu_write(current_kprobe, p);
> +		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
> +		if (!p->pre_handler || !p->pre_handler(p, regs)) {
> +			/*
> +			 * Emulate singlestep (and also recover regs->pc)
> +			 * as if there is a nop
> +			 */
> +			instruction_pointer_set(regs,
> +				(unsigned long)p->addr + MCOUNT_INSN_SIZE);
> +			if (unlikely(p->post_handler)) {
> +				kcb->kprobe_status = KPROBE_HIT_SSDONE;
> +				p->post_handler(p, regs, 0);
> +			}
> +			instruction_pointer_set(regs, orig_ip);
> +		}
> +		/*
> +		 * If pre_handler returns !0, it changes regs->pc. We have to
> +		 * skip emulating post_handler.
> +		 */
> +		__this_cpu_write(current_kprobe, NULL);
> +	}
> +}
> +NOKPROBE_SYMBOL(kprobe_ftrace_handler);
> +
> +int arch_prepare_kprobe_ftrace(struct kprobe *p)
> +{
> +	p->ainsn.api.insn = NULL;
> +	return 0;
> +}
> -- 
> 2.24.1
>
Jisheng Zhang Dec. 26, 2019, 3:18 a.m. UTC | #2
Hi

On Thu, 26 Dec 2019 11:57:07 +0900 Masami Hiramatsu wrote:

> 
> Hi Jisheng,
> 
> On Wed, 25 Dec 2019 09:44:21 +0000
> Jisheng Zhang <Jisheng.Zhang@synaptics.com> wrote:
> 
> > KPROBES_ON_FTRACE avoids much of the overhead with regular kprobes as it
> > eliminates the need for a trap, as well as the need to emulate or
> > single-step instructions.
> >
> > Tested on berlin arm64 platform.
> >
> > ~ # mount -t debugfs debugfs /sys/kernel/debug/
> > ~ # cd /sys/kernel/debug/
> > /sys/kernel/debug # echo 'p _do_fork' > tracing/kprobe_events
> >
> > before the patch:
> >
> > /sys/kernel/debug # cat kprobes/list
> > ffffff801009fe28  k  _do_fork+0x0    [DISABLED]
> >
> > after the patch:
> >
> > /sys/kernel/debug # cat kprobes/list
> > ffffff801009ff54  k  _do_fork+0x0    [DISABLED][FTRACE]  
> 
> What happens if user puts a probe on _do_fork+4?
> Is that return -EILSEQ correctly?

_do_fork+4 can be probed successfully.

> 
> >
> > Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
> > ---
> >  .../debug/kprobes-on-ftrace/arch-support.txt  |  2 +-
> >  arch/arm64/Kconfig                            |  1 +
> >  arch/arm64/include/asm/ftrace.h               |  1 +
> >  arch/arm64/kernel/probes/Makefile             |  1 +
> >  arch/arm64/kernel/probes/ftrace.c             | 78 +++++++++++++++++++
> >  5 files changed, 82 insertions(+), 1 deletion(-)
> >  create mode 100644 arch/arm64/kernel/probes/ftrace.c
> >
> > diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
> > index 4fae0464ddff..f9dd9dd91e0c 100644
> > --- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
> > +++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
> > @@ -9,7 +9,7 @@
> >      |       alpha: | TODO |
> >      |         arc: | TODO |
> >      |         arm: | TODO |
> > -    |       arm64: | TODO |
> > +    |       arm64: |  ok  |
> >      |         c6x: | TODO |
> >      |        csky: | TODO |
> >      |       h8300: | TODO |
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index b1b4476ddb83..92b9882889ac 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -166,6 +166,7 @@ config ARM64
> >       select HAVE_STACKPROTECTOR
> >       select HAVE_SYSCALL_TRACEPOINTS
> >       select HAVE_KPROBES
> > +     select HAVE_KPROBES_ON_FTRACE
> >       select HAVE_KRETPROBES
> >       select HAVE_GENERIC_VDSO
> >       select IOMMU_DMA if IOMMU_SUPPORT
> > diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
> > index 91fa4baa1a93..875aeb839654 100644
> > --- a/arch/arm64/include/asm/ftrace.h
> > +++ b/arch/arm64/include/asm/ftrace.h
> > @@ -20,6 +20,7 @@
> >
> >  /* The BL at the callsite's adjusted rec->ip */
> >  #define MCOUNT_INSN_SIZE     AARCH64_INSN_SIZE
> > +#define FTRACE_IP_EXTENSION  MCOUNT_INSN_SIZE
> >
> >  #define FTRACE_PLT_IDX               0
> >  #define FTRACE_REGS_PLT_IDX  1
> > diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile
> > index 8e4be92e25b1..4020cfc66564 100644
> > --- a/arch/arm64/kernel/probes/Makefile
> > +++ b/arch/arm64/kernel/probes/Makefile
> > @@ -4,3 +4,4 @@ obj-$(CONFIG_KPROBES)         += kprobes.o decode-insn.o      \
> >                                  simulate-insn.o
> >  obj-$(CONFIG_UPROBES)                += uprobes.o decode-insn.o      \
> >                                  simulate-insn.o
> > +obj-$(CONFIG_KPROBES_ON_FTRACE)      += ftrace.o
> > diff --git a/arch/arm64/kernel/probes/ftrace.c b/arch/arm64/kernel/probes/ftrace.c
> > new file mode 100644
> > index 000000000000..0643aa2dacdb
> > --- /dev/null
> > +++ b/arch/arm64/kernel/probes/ftrace.c
> > @@ -0,0 +1,78 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * Dynamic Ftrace based Kprobes Optimization
> > + *
> > + * Copyright (C) Hitachi Ltd., 2012
> > + * Copyright (C) 2019 Jisheng Zhang <jszhang@kernel.org>
> > + *                 Synaptics Incorporated
> > + */
> > +
> > +#include <linux/kprobes.h>
> > +
> > +/*
> > + * In arm64 FTRACE_WITH_REGS implementation, we patch two nop instructions:
> > + * the lr saver and bl ftrace-entry. Both these instructions are claimed
> > + * by ftrace and we should allow probing on either instruction.  
> 
> No, the 2nd bl ftrace-entry must not be probed.
> The pair of lr-saver and bl ftrace-entry is tightly coupled. You can not
> decouple it.

This is the key. different viewing of this results in different implementation.
I'm just wondering why are the two instructions considered as coupled. I think
here we met similar situation as powerpc: https://lkml.org/lkml/2019/6/18/646
the "mflr r0" equals to lr-saver here, branch to _mcount equals to bl ftrace-entry
could you please kindly comment more?

Thanks in advance



> 
> > + */
> > +int arch_check_ftrace_location(struct kprobe *p)
> > +{
> > +     if (ftrace_location((unsigned long)p->addr))
> > +             p->flags |= KPROBE_FLAG_FTRACE;
> > +     return 0;
> > +}  
> 
> Thus, this must return -EILSEQ if user puts a probe on the bl.
> 
> > +
> > +/* Ftrace callback handler for kprobes -- called under preepmt disabed */
> > +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
> > +                        struct ftrace_ops *ops, struct pt_regs *regs)
> > +{
> > +     bool lr_saver = false;
> > +     struct kprobe *p;
> > +     struct kprobe_ctlblk *kcb;
> > +
> > +     /* Preempt is disabled by ftrace */
> > +     p = get_kprobe((kprobe_opcode_t *)ip);
> > +     if (!p) {
> > +             p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE));
> > +             if (unlikely(!p) || kprobe_disabled(p))
> > +                     return;
> > +             lr_saver = true;  
> 
> Then, this can be removed.
> 
> > +     }
> > +
> > +     kcb = get_kprobe_ctlblk();
> > +     if (kprobe_running()) {
> > +             kprobes_inc_nmissed_count(p);
> > +     } else {
> > +             unsigned long orig_ip = instruction_pointer(regs);
> > +
> > +             if (lr_saver)
> > +                     ip -= MCOUNT_INSN_SIZE;  
> 
> Ditto.
> 
> Thank you,
> 
> > +             instruction_pointer_set(regs, ip);
> > +             __this_cpu_write(current_kprobe, p);
> > +             kcb->kprobe_status = KPROBE_HIT_ACTIVE;
> > +             if (!p->pre_handler || !p->pre_handler(p, regs)) {
> > +                     /*
> > +                      * Emulate singlestep (and also recover regs->pc)
> > +                      * as if there is a nop
> > +                      */
> > +                     instruction_pointer_set(regs,
> > +                             (unsigned long)p->addr + MCOUNT_INSN_SIZE);
> > +                     if (unlikely(p->post_handler)) {
> > +                             kcb->kprobe_status = KPROBE_HIT_SSDONE;
> > +                             p->post_handler(p, regs, 0);
> > +                     }
> > +                     instruction_pointer_set(regs, orig_ip);
> > +             }
> > +             /*
> > +              * If pre_handler returns !0, it changes regs->pc. We have to
> > +              * skip emulating post_handler.
> > +              */
> > +             __this_cpu_write(current_kprobe, NULL);
> > +     }
> > +}
> > +NOKPROBE_SYMBOL(kprobe_ftrace_handler);
> > +
> > +int arch_prepare_kprobe_ftrace(struct kprobe *p)
> > +{
> > +     p->ainsn.api.insn = NULL;
> > +     return 0;
> > +}
> > --
> > 2.24.1
> >  
> 
> 
> --
> Masami Hiramatsu <mhiramat@kernel.org>
Jisheng Zhang Dec. 26, 2019, 4:25 a.m. UTC | #3
On Thu, 26 Dec 2019 03:18:07 +0000 Jisheng Zhang wrote:


> 
> 
> Hi
> 
> On Thu, 26 Dec 2019 11:57:07 +0900 Masami Hiramatsu wrote:
> 
> >
> > Hi Jisheng,
> >
> > On Wed, 25 Dec 2019 09:44:21 +0000
> > Jisheng Zhang <Jisheng.Zhang@synaptics.com> wrote:
> >  
> > > KPROBES_ON_FTRACE avoids much of the overhead with regular kprobes as it
> > > eliminates the need for a trap, as well as the need to emulate or
> > > single-step instructions.
> > >
> > > Tested on berlin arm64 platform.
> > >
> > > ~ # mount -t debugfs debugfs /sys/kernel/debug/
> > > ~ # cd /sys/kernel/debug/
> > > /sys/kernel/debug # echo 'p _do_fork' > tracing/kprobe_events
> > >
> > > before the patch:
> > >
> > > /sys/kernel/debug # cat kprobes/list
> > > ffffff801009fe28  k  _do_fork+0x0    [DISABLED]
> > >
> > > after the patch:
> > >
> > > /sys/kernel/debug # cat kprobes/list
> > > ffffff801009ff54  k  _do_fork+0x0    [DISABLED][FTRACE]  
> >
> > What happens if user puts a probe on _do_fork+4?
> > Is that return -EILSEQ correctly?  
> 
> _do_fork+4 can be probed successfully.
> 
> >  
> > >
> > > Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
> > > ---
> > >  .../debug/kprobes-on-ftrace/arch-support.txt  |  2 +-
> > >  arch/arm64/Kconfig                            |  1 +
> > >  arch/arm64/include/asm/ftrace.h               |  1 +
> > >  arch/arm64/kernel/probes/Makefile             |  1 +
> > >  arch/arm64/kernel/probes/ftrace.c             | 78 +++++++++++++++++++
> > >  5 files changed, 82 insertions(+), 1 deletion(-)
> > >  create mode 100644 arch/arm64/kernel/probes/ftrace.c
> > >
> > > diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
> > > index 4fae0464ddff..f9dd9dd91e0c 100644
> > > --- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
> > > +++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
> > > @@ -9,7 +9,7 @@
> > >      |       alpha: | TODO |
> > >      |         arc: | TODO |
> > >      |         arm: | TODO |
> > > -    |       arm64: | TODO |
> > > +    |       arm64: |  ok  |
> > >      |         c6x: | TODO |
> > >      |        csky: | TODO |
> > >      |       h8300: | TODO |
> > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > > index b1b4476ddb83..92b9882889ac 100644
> > > --- a/arch/arm64/Kconfig
> > > +++ b/arch/arm64/Kconfig
> > > @@ -166,6 +166,7 @@ config ARM64
> > >       select HAVE_STACKPROTECTOR
> > >       select HAVE_SYSCALL_TRACEPOINTS
> > >       select HAVE_KPROBES
> > > +     select HAVE_KPROBES_ON_FTRACE
> > >       select HAVE_KRETPROBES
> > >       select HAVE_GENERIC_VDSO
> > >       select IOMMU_DMA if IOMMU_SUPPORT
> > > diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
> > > index 91fa4baa1a93..875aeb839654 100644
> > > --- a/arch/arm64/include/asm/ftrace.h
> > > +++ b/arch/arm64/include/asm/ftrace.h
> > > @@ -20,6 +20,7 @@
> > >
> > >  /* The BL at the callsite's adjusted rec->ip */
> > >  #define MCOUNT_INSN_SIZE     AARCH64_INSN_SIZE
> > > +#define FTRACE_IP_EXTENSION  MCOUNT_INSN_SIZE
> > >
> > >  #define FTRACE_PLT_IDX               0
> > >  #define FTRACE_REGS_PLT_IDX  1
> > > diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile
> > > index 8e4be92e25b1..4020cfc66564 100644
> > > --- a/arch/arm64/kernel/probes/Makefile
> > > +++ b/arch/arm64/kernel/probes/Makefile
> > > @@ -4,3 +4,4 @@ obj-$(CONFIG_KPROBES)         += kprobes.o decode-insn.o      \
> > >                                  simulate-insn.o
> > >  obj-$(CONFIG_UPROBES)                += uprobes.o decode-insn.o      \
> > >                                  simulate-insn.o
> > > +obj-$(CONFIG_KPROBES_ON_FTRACE)      += ftrace.o
> > > diff --git a/arch/arm64/kernel/probes/ftrace.c b/arch/arm64/kernel/probes/ftrace.c
> > > new file mode 100644
> > > index 000000000000..0643aa2dacdb
> > > --- /dev/null
> > > +++ b/arch/arm64/kernel/probes/ftrace.c
> > > @@ -0,0 +1,78 @@
> > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > +/*
> > > + * Dynamic Ftrace based Kprobes Optimization
> > > + *
> > > + * Copyright (C) Hitachi Ltd., 2012
> > > + * Copyright (C) 2019 Jisheng Zhang <jszhang@kernel.org>
> > > + *                 Synaptics Incorporated
> > > + */
> > > +
> > > +#include <linux/kprobes.h>
> > > +
> > > +/*
> > > + * In arm64 FTRACE_WITH_REGS implementation, we patch two nop instructions:
> > > + * the lr saver and bl ftrace-entry. Both these instructions are claimed
> > > + * by ftrace and we should allow probing on either instruction.  
> >
> > No, the 2nd bl ftrace-entry must not be probed.
> > The pair of lr-saver and bl ftrace-entry is tightly coupled. You can not
> > decouple it.  
> 
> This is the key. different viewing of this results in different implementation.
> I'm just wondering why are the two instructions considered as coupled. I think
> here we met similar situation as powerpc: https://lkml.org/lkml/2019/6/18/646
> the "mflr r0" equals to lr-saver here, branch to _mcount equals to bl ftrace-entry
> could you please kindly comment more?
> 
> Thanks in advance
> 

hmm, I think I may get some part of your opinion. In v7 implementation:

if probe on func+4, that's bl ftrace-entry, similar as mcount call on
other architectures, we allow this probe as normal.

if probe on func+0, the first param ip in kprobe_ftrace_handler() points
to func+4(this is adjusted by ftrace), regs->ip points to func+8, so in
kprobe_ftrace_handler() we modify regs->ip to func+0 to call kprobe
pre handler, then modify regs->ip to func+8 to call kprobe post handler.
As can be seen, the first two instructions are considered as a virtual
mcount call. From this point of view, lr saver and the bl <ftrace-entry>
is coupled.

If we split patch3 into two:
one to support kprobes func+4
the second to support kprobe on func+0
it would be much clearer.

Then the key here is whether we could allow both kprobes on func+0 and func+4

Thanks
Masami Hiramatsu (Google) Dec. 26, 2019, 9:26 a.m. UTC | #4
On Thu, 26 Dec 2019 04:25:24 +0000
Jisheng Zhang <Jisheng.Zhang@synaptics.com> wrote:

> > > > +/*
> > > > + * In arm64 FTRACE_WITH_REGS implementation, we patch two nop instructions:
> > > > + * the lr saver and bl ftrace-entry. Both these instructions are claimed
> > > > + * by ftrace and we should allow probing on either instruction.  
> > >
> > > No, the 2nd bl ftrace-entry must not be probed.
> > > The pair of lr-saver and bl ftrace-entry is tightly coupled. You can not
> > > decouple it.  
> > 
> > This is the key. different viewing of this results in different implementation.
> > I'm just wondering why are the two instructions considered as coupled. I think
> > here we met similar situation as powerpc: https://lkml.org/lkml/2019/6/18/646
> > the "mflr r0" equals to lr-saver here, branch to _mcount equals to bl ftrace-entry
> > could you please kindly comment more?
> > 
> > Thanks in advance
> > 
> 
> hmm, I think I may get some part of your opinion. In v7 implementation:
> 
> if probe on func+4, that's bl ftrace-entry, similar as mcount call on
> other architectures, we allow this probe as normal.
> 
> if probe on func+0, the first param ip in kprobe_ftrace_handler() points
> to func+4(this is adjusted by ftrace), regs->ip points to func+8, so in
> kprobe_ftrace_handler() we modify regs->ip to func+0 to call kprobe
> pre handler, then modify regs->ip to func+8 to call kprobe post handler.
> As can be seen, the first two instructions are considered as a virtual
> mcount call. From this point of view, lr saver and the bl <ftrace-entry>
> is coupled.

Yes, this is good. But probing on func+4 is meaningless. Both func+0 and
func+4 call a handler with same pt_regs. And it should have the stack
pointer which is NOT modified by lr-saver and regs->lr must point original
call address. (ftrace regs caller must do this fixup for supporting live
patching correctly)

And in this case, func+4 has fake pt_regs because it skips lr-saver's
effects.

And even if you fixed up the pt_regs, there is another problem of what
user expects on the target instructions.

As you know, dynamic ftrace will fill the instruction with NOP (2 NOPs
in arm64), in this case, maybe pt_regs are same except pc on func+0 and
func+4. But if ftrace already enabled on the function, user will see
there are lr-saver and bl, oops. In this case we have to change pt_regs
between func+0 and func+4. So it depends on the current mode.

However, IMHO, it is not worth to pay such simulation cost. No one want
to probe such simulated intermediate address. It is easy to expect the
result from the code. Moreover, the func+4 will not appear on debuginfo
because those 2 special insturctions are just appended by the compiler,
not generated by the code.

So I don't think we need to support func+4. We only need func+0, or func+8
(this must be same as func+0 except regs->pc anyway)

Thank you,

> 
> If we split patch3 into two:
> one to support kprobes func+4
> the second to support kprobe on func+0
> it would be much clearer.
> 
> Then the key here is whether we could allow both kprobes on func+0 and func+4
> 
> Thanks
Mark Rutland Feb. 28, 2020, 3:31 p.m. UTC | #5
Hi,

This has been on my list to review for a while. Given Masami's comments,
I was waiting for a new version -- is there any plan to respin this?

Otherwise, I have some comments below.

On Wed, Dec 25, 2019 at 09:44:21AM +0000, Jisheng Zhang wrote:
> KPROBES_ON_FTRACE avoids much of the overhead with regular kprobes as it
> eliminates the need for a trap, as well as the need to emulate or
> single-step instructions.

Where does this overhead matter?

> Tested on berlin arm64 platform.
> 
> ~ # mount -t debugfs debugfs /sys/kernel/debug/
> ~ # cd /sys/kernel/debug/
> /sys/kernel/debug # echo 'p _do_fork' > tracing/kprobe_events
> 
> before the patch:
> 
> /sys/kernel/debug # cat kprobes/list
> ffffff801009fe28  k  _do_fork+0x0    [DISABLED]
> 
> after the patch:
> 
> /sys/kernel/debug # cat kprobes/list
> ffffff801009ff54  k  _do_fork+0x0    [DISABLED][FTRACE]

Just to check, how is the kprobe addresss expected to relate to the
function address? For any of {mcount, mfentry, patchable-function-entry}
there are some number of instructions prior to the call instruction.
Does the user have to provide that address? 

How does this work on other architectures?

> 
> Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
> ---
>  .../debug/kprobes-on-ftrace/arch-support.txt  |  2 +-
>  arch/arm64/Kconfig                            |  1 +
>  arch/arm64/include/asm/ftrace.h               |  1 +
>  arch/arm64/kernel/probes/Makefile             |  1 +
>  arch/arm64/kernel/probes/ftrace.c             | 78 +++++++++++++++++++
>  5 files changed, 82 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/kernel/probes/ftrace.c
> 
> diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
> index 4fae0464ddff..f9dd9dd91e0c 100644
> --- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
> +++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
> @@ -9,7 +9,7 @@
>      |       alpha: | TODO |
>      |         arc: | TODO |
>      |         arm: | TODO |
> -    |       arm64: | TODO |
> +    |       arm64: |  ok  |
>      |         c6x: | TODO |
>      |        csky: | TODO |
>      |       h8300: | TODO |
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index b1b4476ddb83..92b9882889ac 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -166,6 +166,7 @@ config ARM64
>  	select HAVE_STACKPROTECTOR
>  	select HAVE_SYSCALL_TRACEPOINTS
>  	select HAVE_KPROBES
> +	select HAVE_KPROBES_ON_FTRACE

The rest of the code seems to presume FTRACE_WITH_REGS, but you haven't
made that dependency explicit here.

>  	select HAVE_KRETPROBES
>  	select HAVE_GENERIC_VDSO
>  	select IOMMU_DMA if IOMMU_SUPPORT
> diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
> index 91fa4baa1a93..875aeb839654 100644
> --- a/arch/arm64/include/asm/ftrace.h
> +++ b/arch/arm64/include/asm/ftrace.h
> @@ -20,6 +20,7 @@
>  
>  /* The BL at the callsite's adjusted rec->ip */
>  #define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE
> +#define FTRACE_IP_EXTENSION	MCOUNT_INSN_SIZE

I'm confused by what exactly this is meant to represent. At runtime our
rec->ip is always the BL, so what exactly is this attempting to account
for?

How does this work when using mcount rather than
patchable-function-entry?

>  
>  #define FTRACE_PLT_IDX		0
>  #define FTRACE_REGS_PLT_IDX	1
> diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile
> index 8e4be92e25b1..4020cfc66564 100644
> --- a/arch/arm64/kernel/probes/Makefile
> +++ b/arch/arm64/kernel/probes/Makefile
> @@ -4,3 +4,4 @@ obj-$(CONFIG_KPROBES)		+= kprobes.o decode-insn.o	\
>  				   simulate-insn.o
>  obj-$(CONFIG_UPROBES)		+= uprobes.o decode-insn.o	\
>  				   simulate-insn.o
> +obj-$(CONFIG_KPROBES_ON_FTRACE)	+= ftrace.o
> diff --git a/arch/arm64/kernel/probes/ftrace.c b/arch/arm64/kernel/probes/ftrace.c
> new file mode 100644
> index 000000000000..0643aa2dacdb
> --- /dev/null
> +++ b/arch/arm64/kernel/probes/ftrace.c
> @@ -0,0 +1,78 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Dynamic Ftrace based Kprobes Optimization
> + *
> + * Copyright (C) Hitachi Ltd., 2012
> + * Copyright (C) 2019 Jisheng Zhang <jszhang@kernel.org>
> + *		      Synaptics Incorporated
> + */
> +
> +#include <linux/kprobes.h>
> +
> +/*
> + * In arm64 FTRACE_WITH_REGS implementation, we patch two nop instructions:
> + * the lr saver and bl ftrace-entry. Both these instructions are claimed
> + * by ftrace and we should allow probing on either instruction.
> + */
> +int arch_check_ftrace_location(struct kprobe *p)
> +{
> +	if (ftrace_location((unsigned long)p->addr))
> +		p->flags |= KPROBE_FLAG_FTRACE;
> +	return 0;
> +}

What about when not using patchable-function-entry?

Why do we need to allow probing both?

> +
> +/* Ftrace callback handler for kprobes -- called under preepmt disabed */
> +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
> +			   struct ftrace_ops *ops, struct pt_regs *regs)
> +{
> +	bool lr_saver = false;
> +	struct kprobe *p;
> +	struct kprobe_ctlblk *kcb;
> +
> +	/* Preempt is disabled by ftrace */
> +	p = get_kprobe((kprobe_opcode_t *)ip);
> +	if (!p) {
> +		p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE));
> +		if (unlikely(!p) || kprobe_disabled(p))
> +			return;
> +		lr_saver = true;
> +	}

This complexity worries me. Is it really necessary to allow kprobing on
either instruction?

> +
> +	kcb = get_kprobe_ctlblk();
> +	if (kprobe_running()) {
> +		kprobes_inc_nmissed_count(p);
> +	} else {
> +		unsigned long orig_ip = instruction_pointer(regs);
> +
> +		if (lr_saver)
> +			ip -= MCOUNT_INSN_SIZE;
> +		instruction_pointer_set(regs, ip);
> +		__this_cpu_write(current_kprobe, p);
> +		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
> +		if (!p->pre_handler || !p->pre_handler(p, regs)) {
> +			/*
> +			 * Emulate singlestep (and also recover regs->pc)
> +			 * as if there is a nop
> +			 */
> +			instruction_pointer_set(regs,
> +				(unsigned long)p->addr + MCOUNT_INSN_SIZE);
> +			if (unlikely(p->post_handler)) {
> +				kcb->kprobe_status = KPROBE_HIT_SSDONE;
> +				p->post_handler(p, regs, 0);
> +			}
> +			instruction_pointer_set(regs, orig_ip);

If you're going to mess with the PC then you also need to adjust the
hardware single-step state machine.

Thanks,
Mark.

> +		}
> +		/*
> +		 * If pre_handler returns !0, it changes regs->pc. We have to
> +		 * skip emulating post_handler.
> +		 */
> +		__this_cpu_write(current_kprobe, NULL);
> +	}
> +}
> +NOKPROBE_SYMBOL(kprobe_ftrace_handler);
> +
> +int arch_prepare_kprobe_ftrace(struct kprobe *p)
> +{
> +	p->ainsn.api.insn = NULL;
> +	return 0;
> +}
> -- 
> 2.24.1
>
Masami Hiramatsu (Google) July 21, 2020, 1:24 p.m. UTC | #6
Hi Jisheng,

Would you be still working on this series?

If you are still want to put a probe on func+4, it is OK if you can
completely emulate the 1st instruction. (lr save on the stack and
change the regs->sp)

Thank you,

On Thu, 26 Dec 2019 18:26:07 +0900
Masami Hiramatsu <mhiramat@kernel.org> wrote:

> On Thu, 26 Dec 2019 04:25:24 +0000
> Jisheng Zhang <Jisheng.Zhang@synaptics.com> wrote:
> 
> > > > > +/*
> > > > > + * In arm64 FTRACE_WITH_REGS implementation, we patch two nop instructions:
> > > > > + * the lr saver and bl ftrace-entry. Both these instructions are claimed
> > > > > + * by ftrace and we should allow probing on either instruction.  
> > > >
> > > > No, the 2nd bl ftrace-entry must not be probed.
> > > > The pair of lr-saver and bl ftrace-entry is tightly coupled. You can not
> > > > decouple it.  
> > > 
> > > This is the key. different viewing of this results in different implementation.
> > > I'm just wondering why are the two instructions considered as coupled. I think
> > > here we met similar situation as powerpc: https://lkml.org/lkml/2019/6/18/646
> > > the "mflr r0" equals to lr-saver here, branch to _mcount equals to bl ftrace-entry
> > > could you please kindly comment more?
> > > 
> > > Thanks in advance
> > > 
> > 
> > hmm, I think I may get some part of your opinion. In v7 implementation:
> > 
> > if probe on func+4, that's bl ftrace-entry, similar as mcount call on
> > other architectures, we allow this probe as normal.
> > 
> > if probe on func+0, the first param ip in kprobe_ftrace_handler() points
> > to func+4(this is adjusted by ftrace), regs->ip points to func+8, so in
> > kprobe_ftrace_handler() we modify regs->ip to func+0 to call kprobe
> > pre handler, then modify regs->ip to func+8 to call kprobe post handler.
> > As can be seen, the first two instructions are considered as a virtual
> > mcount call. From this point of view, lr saver and the bl <ftrace-entry>
> > is coupled.
> 
> Yes, this is good. But probing on func+4 is meaningless. Both func+0 and
> func+4 call a handler with same pt_regs. And it should have the stack
> pointer which is NOT modified by lr-saver and regs->lr must point original
> call address. (ftrace regs caller must do this fixup for supporting live
> patching correctly)
> 
> And in this case, func+4 has fake pt_regs because it skips lr-saver's
> effects.
> 
> And even if you fixed up the pt_regs, there is another problem of what
> user expects on the target instructions.
> 
> As you know, dynamic ftrace will fill the instruction with NOP (2 NOPs
> in arm64), in this case, maybe pt_regs are same except pc on func+0 and
> func+4. But if ftrace already enabled on the function, user will see
> there are lr-saver and bl, oops. In this case we have to change pt_regs
> between func+0 and func+4. So it depends on the current mode.
> 
> However, IMHO, it is not worth to pay such simulation cost. No one want
> to probe such simulated intermediate address. It is easy to expect the
> result from the code. Moreover, the func+4 will not appear on debuginfo
> because those 2 special insturctions are just appended by the compiler,
> not generated by the code.
> 
> So I don't think we need to support func+4. We only need func+0, or func+8
> (this must be same as func+0 except regs->pc anyway)
> 
> Thank you,
> 
> > 
> > If we split patch3 into two:
> > one to support kprobes func+4
> > the second to support kprobe on func+0
> > it would be much clearer.
> > 
> > Then the key here is whether we could allow both kprobes on func+0 and func+4
> > 
> > Thanks
> 
> 
> -- 
> Masami Hiramatsu <mhiramat@kernel.org>
Jisheng Zhang July 24, 2020, 7:06 a.m. UTC | #7
On Tue, 21 Jul 2020 22:24:55 +0900 Masami Hiramatsu wrote:

> 
> 
> Hi Jisheng,

Hi,

> 
> Would you be still working on this series?

I will rebase the implementation on the latest code, then try to address
your comments and Mark's comments. I will send out patches in this weekend.

> 
> If you are still want to put a probe on func+4, it is OK if you can
> completely emulate the 1st instruction. (lr save on the stack and
> change the regs->sp)

Will check which is the better solution.

Thank you very much
Masami Hiramatsu (Google) July 24, 2020, 4:54 p.m. UTC | #8
On Fri, 24 Jul 2020 15:06:11 +0800
Jisheng Zhang <Jisheng.Zhang@synaptics.com> wrote:

> 
> On Tue, 21 Jul 2020 22:24:55 +0900 Masami Hiramatsu wrote:
> 
> > 
> > 
> > Hi Jisheng,
> 
> Hi,
> 
> > 
> > Would you be still working on this series?
> 
> I will rebase the implementation on the latest code, then try to address
> your comments and Mark's comments. I will send out patches in this weekend.
> 
> > 
> > If you are still want to put a probe on func+4, it is OK if you can
> > completely emulate the 1st instruction. (lr save on the stack and
> > change the regs->sp)
> 
> Will check which is the better solution.

Thanks Jisheng!

What I'm considering is the consistency of pre_handler()@addr and 
post_handler()@addr+4. Also, whether the value of regs (and stacks) is
same as the user expected.

Thank you,
diff mbox series

Patch

diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
index 4fae0464ddff..f9dd9dd91e0c 100644
--- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
+++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
@@ -9,7 +9,7 @@ 
     |       alpha: | TODO |
     |         arc: | TODO |
     |         arm: | TODO |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
     |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b1b4476ddb83..92b9882889ac 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -166,6 +166,7 @@  config ARM64
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_KPROBES
+	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_KRETPROBES
 	select HAVE_GENERIC_VDSO
 	select IOMMU_DMA if IOMMU_SUPPORT
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 91fa4baa1a93..875aeb839654 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -20,6 +20,7 @@ 
 
 /* The BL at the callsite's adjusted rec->ip */
 #define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE
+#define FTRACE_IP_EXTENSION	MCOUNT_INSN_SIZE
 
 #define FTRACE_PLT_IDX		0
 #define FTRACE_REGS_PLT_IDX	1
diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile
index 8e4be92e25b1..4020cfc66564 100644
--- a/arch/arm64/kernel/probes/Makefile
+++ b/arch/arm64/kernel/probes/Makefile
@@ -4,3 +4,4 @@  obj-$(CONFIG_KPROBES)		+= kprobes.o decode-insn.o	\
 				   simulate-insn.o
 obj-$(CONFIG_UPROBES)		+= uprobes.o decode-insn.o	\
 				   simulate-insn.o
+obj-$(CONFIG_KPROBES_ON_FTRACE)	+= ftrace.o
diff --git a/arch/arm64/kernel/probes/ftrace.c b/arch/arm64/kernel/probes/ftrace.c
new file mode 100644
index 000000000000..0643aa2dacdb
--- /dev/null
+++ b/arch/arm64/kernel/probes/ftrace.c
@@ -0,0 +1,78 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Dynamic Ftrace based Kprobes Optimization
+ *
+ * Copyright (C) Hitachi Ltd., 2012
+ * Copyright (C) 2019 Jisheng Zhang <jszhang@kernel.org>
+ *		      Synaptics Incorporated
+ */
+
+#include <linux/kprobes.h>
+
+/*
+ * In arm64 FTRACE_WITH_REGS implementation, we patch two nop instructions:
+ * the lr saver and bl ftrace-entry. Both these instructions are claimed
+ * by ftrace and we should allow probing on either instruction.
+ */
+int arch_check_ftrace_location(struct kprobe *p)
+{
+	if (ftrace_location((unsigned long)p->addr))
+		p->flags |= KPROBE_FLAG_FTRACE;
+	return 0;
+}
+
+/* Ftrace callback handler for kprobes -- called under preepmt disabed */
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+			   struct ftrace_ops *ops, struct pt_regs *regs)
+{
+	bool lr_saver = false;
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+
+	/* Preempt is disabled by ftrace */
+	p = get_kprobe((kprobe_opcode_t *)ip);
+	if (!p) {
+		p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE));
+		if (unlikely(!p) || kprobe_disabled(p))
+			return;
+		lr_saver = true;
+	}
+
+	kcb = get_kprobe_ctlblk();
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(p);
+	} else {
+		unsigned long orig_ip = instruction_pointer(regs);
+
+		if (lr_saver)
+			ip -= MCOUNT_INSN_SIZE;
+		instruction_pointer_set(regs, ip);
+		__this_cpu_write(current_kprobe, p);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		if (!p->pre_handler || !p->pre_handler(p, regs)) {
+			/*
+			 * Emulate singlestep (and also recover regs->pc)
+			 * as if there is a nop
+			 */
+			instruction_pointer_set(regs,
+				(unsigned long)p->addr + MCOUNT_INSN_SIZE);
+			if (unlikely(p->post_handler)) {
+				kcb->kprobe_status = KPROBE_HIT_SSDONE;
+				p->post_handler(p, regs, 0);
+			}
+			instruction_pointer_set(regs, orig_ip);
+		}
+		/*
+		 * If pre_handler returns !0, it changes regs->pc. We have to
+		 * skip emulating post_handler.
+		 */
+		__this_cpu_write(current_kprobe, NULL);
+	}
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	p->ainsn.api.insn = NULL;
+	return 0;
+}