[v3,21/27] x86/ftrace: Adapt function tracing for PIE support
diff mbox

Message ID 20180523195421.180248-22-thgarnie@google.com
State New
Headers show

Commit Message

Thomas Garnier May 23, 2018, 7:54 p.m. UTC
When using -fPIE/PIC with function tracing, the compiler generates a
call through the GOT (call *__fentry__@GOTPCREL). This instruction
takes 6 bytes instead of 5 on the usual relative call.

If PIE is enabled, replace the 6th byte of the GOT call by a 1-byte nop
so ftrace can handle the previous 5-bytes as before.

Position Independent Executable (PIE) support will allow to extended the
KASLR randomization range below the -2G memory limit.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
 arch/x86/include/asm/ftrace.h   |  6 +++--
 arch/x86/include/asm/sections.h |  4 ++++
 arch/x86/kernel/ftrace.c        | 42 +++++++++++++++++++++++++++++++--
 3 files changed, 48 insertions(+), 4 deletions(-)

Comments

Petr Mladek May 24, 2018, 11:40 a.m. UTC | #1
On Wed 2018-05-23 12:54:15, Thomas Garnier wrote:
> When using -fPIE/PIC with function tracing, the compiler generates a
> call through the GOT (call *__fentry__@GOTPCREL). This instruction
> takes 6 bytes instead of 5 on the usual relative call.
> 
> If PIE is enabled, replace the 6th byte of the GOT call by a 1-byte nop
> so ftrace can handle the previous 5-bytes as before.
> 
> Position Independent Executable (PIE) support will allow to extended the
> KASLR randomization range below the -2G memory limit.
> 
> Signed-off-by: Thomas Garnier <thgarnie@google.com>
> ---
>  arch/x86/include/asm/ftrace.h   |  6 +++--
>  arch/x86/include/asm/sections.h |  4 ++++
>  arch/x86/kernel/ftrace.c        | 42 +++++++++++++++++++++++++++++++--
>  3 files changed, 48 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
> index c18ed65287d5..8f2decce38d8 100644
> --- a/arch/x86/include/asm/ftrace.h
> +++ b/arch/x86/include/asm/ftrace.h
> @@ -25,9 +25,11 @@ extern void __fentry__(void);
>  static inline unsigned long ftrace_call_adjust(unsigned long addr)
>  {
>  	/*
> -	 * addr is the address of the mcount call instruction.
> -	 * recordmcount does the necessary offset calculation.
> +	 * addr is the address of the mcount call instruction. PIE has always a
> +	 * byte added to the start of the function.
>  	 */
> +	if (IS_ENABLED(CONFIG_X86_PIE))
> +		addr -= 1;

This seems to modify the address even for modules that are _not_ compiled with
PIE, see below.

>  	return addr;
>  }
>  
> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 01ebcb6f263e..73b3c30cb7a3 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -135,6 +135,44 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
>  	return 0;
>  }
>  
> +/* Bytes before call GOT offset */
> +const unsigned char got_call_preinsn[] = { 0xff, 0x15 };
> +
> +static int
> +ftrace_modify_initial_code(unsigned long ip, unsigned const char *old_code,
> +			   unsigned const char *new_code)
> +{
> +	unsigned char replaced[MCOUNT_INSN_SIZE + 1];
> +
> +	ftrace_expected = old_code;
> +
> +	/*
> +	 * If PIE is not enabled or no GOT call was found, default to the
> +	 * original approach to code modification.
> +	 */
> +	if (!IS_ENABLED(CONFIG_X86_PIE) ||
> +	    probe_kernel_read(replaced, (void *)ip, sizeof(replaced)) ||
> +	    memcmp(replaced, got_call_preinsn, sizeof(got_call_preinsn)))
> +		return ftrace_modify_code_direct(ip, old_code, new_code);

And this looks like an attempt to handle modules compiled without
PIE. Does it works with the right ip in that case?

I wonder if a better solution would be to update
scripts/recordmcount.c to store the incremented location into the module.

IMPORTANT: I have only vague picture about how this all works. It is
possible that I am completely wrong. The code might be correct,
especially if you tested this situation.

Best Regards,
Petr
Steven Rostedt May 24, 2018, 8:16 p.m. UTC | #2
On Thu, 24 May 2018 13:40:24 +0200
Petr Mladek <pmladek@suse.com> wrote:

> On Wed 2018-05-23 12:54:15, Thomas Garnier wrote:
> > When using -fPIE/PIC with function tracing, the compiler generates a
> > call through the GOT (call *__fentry__@GOTPCREL). This instruction
> > takes 6 bytes instead of 5 on the usual relative call.
> > 
> > If PIE is enabled, replace the 6th byte of the GOT call by a 1-byte nop
> > so ftrace can handle the previous 5-bytes as before.
> > 
> > Position Independent Executable (PIE) support will allow to extended the
> > KASLR randomization range below the -2G memory limit.
> > 
> > Signed-off-by: Thomas Garnier <thgarnie@google.com>
> > ---
> >  arch/x86/include/asm/ftrace.h   |  6 +++--
> >  arch/x86/include/asm/sections.h |  4 ++++
> >  arch/x86/kernel/ftrace.c        | 42 +++++++++++++++++++++++++++++++--
> >  3 files changed, 48 insertions(+), 4 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
> > index c18ed65287d5..8f2decce38d8 100644
> > --- a/arch/x86/include/asm/ftrace.h
> > +++ b/arch/x86/include/asm/ftrace.h
> > @@ -25,9 +25,11 @@ extern void __fentry__(void);
> >  static inline unsigned long ftrace_call_adjust(unsigned long addr)
> >  {
> >  	/*
> > -	 * addr is the address of the mcount call instruction.
> > -	 * recordmcount does the necessary offset calculation.
> > +	 * addr is the address of the mcount call instruction. PIE has always a
> > +	 * byte added to the start of the function.
> >  	 */
> > +	if (IS_ENABLED(CONFIG_X86_PIE))
> > +		addr -= 1;  
> 
> This seems to modify the address even for modules that are _not_ compiled with
> PIE, see below.

Can one load a module not compiled for PIE in a kernel with PIE?

> 
> >  	return addr;
> >  }
> >  
> > diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> > index 01ebcb6f263e..73b3c30cb7a3 100644
> > --- a/arch/x86/kernel/ftrace.c
> > +++ b/arch/x86/kernel/ftrace.c
> > @@ -135,6 +135,44 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
> >  	return 0;
> >  }
> >  
> > +/* Bytes before call GOT offset */
> > +const unsigned char got_call_preinsn[] = { 0xff, 0x15 };
> > +
> > +static int
> > +ftrace_modify_initial_code(unsigned long ip, unsigned const char *old_code,
> > +			   unsigned const char *new_code)
> > +{
> > +	unsigned char replaced[MCOUNT_INSN_SIZE + 1];
> > +
> > +	ftrace_expected = old_code;
> > +
> > +	/*
> > +	 * If PIE is not enabled or no GOT call was found, default to the
> > +	 * original approach to code modification.
> > +	 */
> > +	if (!IS_ENABLED(CONFIG_X86_PIE) ||
> > +	    probe_kernel_read(replaced, (void *)ip, sizeof(replaced)) ||
> > +	    memcmp(replaced, got_call_preinsn, sizeof(got_call_preinsn)))
> > +		return ftrace_modify_code_direct(ip, old_code, new_code);  
> 
> And this looks like an attempt to handle modules compiled without
> PIE. Does it works with the right ip in that case?

I'm guessing the || is for the "or no GOT call was found", but it
doesn't explain why no GOT would be found.

> 
> I wonder if a better solution would be to update
> scripts/recordmcount.c to store the incremented location into the module.

If recordmcount.c can handle this, then I think that's the preferred
approach. Thanks!

-- Steve

> 
> IMPORTANT: I have only vague picture about how this all works. It is
> possible that I am completely wrong. The code might be correct,
> especially if you tested this situation.
> 
> Best Regards,
> Petr
Thomas Garnier May 24, 2018, 8:41 p.m. UTC | #3
On Thu, May 24, 2018 at 1:16 PM Steven Rostedt <rostedt@goodmis.org> wrote:

> On Thu, 24 May 2018 13:40:24 +0200
> Petr Mladek <pmladek@suse.com> wrote:

> > On Wed 2018-05-23 12:54:15, Thomas Garnier wrote:
> > > When using -fPIE/PIC with function tracing, the compiler generates a
> > > call through the GOT (call *__fentry__@GOTPCREL). This instruction
> > > takes 6 bytes instead of 5 on the usual relative call.
> > >
> > > If PIE is enabled, replace the 6th byte of the GOT call by a 1-byte
nop
> > > so ftrace can handle the previous 5-bytes as before.
> > >
> > > Position Independent Executable (PIE) support will allow to extended
the
> > > KASLR randomization range below the -2G memory limit.
> > >
> > > Signed-off-by: Thomas Garnier <thgarnie@google.com>
> > > ---
> > >  arch/x86/include/asm/ftrace.h   |  6 +++--
> > >  arch/x86/include/asm/sections.h |  4 ++++
> > >  arch/x86/kernel/ftrace.c        | 42
+++++++++++++++++++++++++++++++--
> > >  3 files changed, 48 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/arch/x86/include/asm/ftrace.h
b/arch/x86/include/asm/ftrace.h
> > > index c18ed65287d5..8f2decce38d8 100644
> > > --- a/arch/x86/include/asm/ftrace.h
> > > +++ b/arch/x86/include/asm/ftrace.h
> > > @@ -25,9 +25,11 @@ extern void __fentry__(void);
> > >  static inline unsigned long ftrace_call_adjust(unsigned long addr)
> > >  {
> > >     /*
> > > -    * addr is the address of the mcount call instruction.
> > > -    * recordmcount does the necessary offset calculation.
> > > +    * addr is the address of the mcount call instruction. PIE has
always a
> > > +    * byte added to the start of the function.
> > >      */
> > > +   if (IS_ENABLED(CONFIG_X86_PIE))
> > > +           addr -= 1;
> >
> > This seems to modify the address even for modules that are _not_
compiled with
> > PIE, see below.

> Can one load a module not compiled for PIE in a kernel with PIE?

> >
> > >     return addr;
> > >  }
> > >
> > > diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> > > index 01ebcb6f263e..73b3c30cb7a3 100644
> > > --- a/arch/x86/kernel/ftrace.c
> > > +++ b/arch/x86/kernel/ftrace.c
> > > @@ -135,6 +135,44 @@ ftrace_modify_code_direct(unsigned long ip,
unsigned const char *old_code,
> > >     return 0;
> > >  }
> > >
> > > +/* Bytes before call GOT offset */
> > > +const unsigned char got_call_preinsn[] = { 0xff, 0x15 };
> > > +
> > > +static int
> > > +ftrace_modify_initial_code(unsigned long ip, unsigned const char
*old_code,
> > > +                      unsigned const char *new_code)
> > > +{
> > > +   unsigned char replaced[MCOUNT_INSN_SIZE + 1];
> > > +
> > > +   ftrace_expected = old_code;
> > > +
> > > +   /*
> > > +    * If PIE is not enabled or no GOT call was found, default to the
> > > +    * original approach to code modification.
> > > +    */
> > > +   if (!IS_ENABLED(CONFIG_X86_PIE) ||
> > > +       probe_kernel_read(replaced, (void *)ip, sizeof(replaced)) ||
> > > +       memcmp(replaced, got_call_preinsn, sizeof(got_call_preinsn)))
> > > +           return ftrace_modify_code_direct(ip, old_code, new_code);
> >
> > And this looks like an attempt to handle modules compiled without
> > PIE. Does it works with the right ip in that case?

> I'm guessing the || is for the "or no GOT call was found", but it
> doesn't explain why no GOT would be found.

Yes, maybe I could have made it work by using text_ip_addr earlier.


> >
> > I wonder if a better solution would be to update
> > scripts/recordmcount.c to store the incremented location into the
module.

I will look into it.


> If recordmcount.c can handle this, then I think that's the preferred
> approach. Thanks!

> -- Steve

> >
> > IMPORTANT: I have only vague picture about how this all works. It is
> > possible that I am completely wrong. The code might be correct,
> > especially if you tested this situation.
> >
> > Best Regards,
> > Petr
Thomas Garnier May 29, 2018, 6:37 p.m. UTC | #4
On Thu, May 24, 2018 at 1:41 PM Thomas Garnier <thgarnie@google.com> wrote:


> On Thu, May 24, 2018 at 1:16 PM Steven Rostedt <rostedt@goodmis.org>
wrote:

> > On Thu, 24 May 2018 13:40:24 +0200
> > Petr Mladek <pmladek@suse.com> wrote:

> > > On Wed 2018-05-23 12:54:15, Thomas Garnier wrote:
> > > > When using -fPIE/PIC with function tracing, the compiler generates a
> > > > call through the GOT (call *__fentry__@GOTPCREL). This instruction
> > > > takes 6 bytes instead of 5 on the usual relative call.
> > > >
> > > > If PIE is enabled, replace the 6th byte of the GOT call by a 1-byte
> nop
> > > > so ftrace can handle the previous 5-bytes as before.
> > > >
> > > > Position Independent Executable (PIE) support will allow to extended
> the
> > > > KASLR randomization range below the -2G memory limit.
> > > >
> > > > Signed-off-by: Thomas Garnier <thgarnie@google.com>
> > > > ---
> > > >  arch/x86/include/asm/ftrace.h   |  6 +++--
> > > >  arch/x86/include/asm/sections.h |  4 ++++
> > > >  arch/x86/kernel/ftrace.c        | 42
> +++++++++++++++++++++++++++++++--
> > > >  3 files changed, 48 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/arch/x86/include/asm/ftrace.h
> b/arch/x86/include/asm/ftrace.h
> > > > index c18ed65287d5..8f2decce38d8 100644
> > > > --- a/arch/x86/include/asm/ftrace.h
> > > > +++ b/arch/x86/include/asm/ftrace.h
> > > > @@ -25,9 +25,11 @@ extern void __fentry__(void);
> > > >  static inline unsigned long ftrace_call_adjust(unsigned long addr)
> > > >  {
> > > >     /*
> > > > -    * addr is the address of the mcount call instruction.
> > > > -    * recordmcount does the necessary offset calculation.
> > > > +    * addr is the address of the mcount call instruction. PIE has
> always a
> > > > +    * byte added to the start of the function.
> > > >      */
> > > > +   if (IS_ENABLED(CONFIG_X86_PIE))
> > > > +           addr -= 1;
> > >
> > > This seems to modify the address even for modules that are _not_
> compiled with
> > > PIE, see below.

> > Can one load a module not compiled for PIE in a kernel with PIE?

> > >
> > > >     return addr;
> > > >  }
> > > >
> > > > diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> > > > index 01ebcb6f263e..73b3c30cb7a3 100644
> > > > --- a/arch/x86/kernel/ftrace.c
> > > > +++ b/arch/x86/kernel/ftrace.c
> > > > @@ -135,6 +135,44 @@ ftrace_modify_code_direct(unsigned long ip,
> unsigned const char *old_code,
> > > >     return 0;
> > > >  }
> > > >
> > > > +/* Bytes before call GOT offset */
> > > > +const unsigned char got_call_preinsn[] = { 0xff, 0x15 };
> > > > +
> > > > +static int
> > > > +ftrace_modify_initial_code(unsigned long ip, unsigned const char
> *old_code,
> > > > +                      unsigned const char *new_code)
> > > > +{
> > > > +   unsigned char replaced[MCOUNT_INSN_SIZE + 1];
> > > > +
> > > > +   ftrace_expected = old_code;
> > > > +
> > > > +   /*
> > > > +    * If PIE is not enabled or no GOT call was found, default to
the
> > > > +    * original approach to code modification.
> > > > +    */
> > > > +   if (!IS_ENABLED(CONFIG_X86_PIE) ||
> > > > +       probe_kernel_read(replaced, (void *)ip, sizeof(replaced)) ||
> > > > +       memcmp(replaced, got_call_preinsn,
sizeof(got_call_preinsn)))
> > > > +           return ftrace_modify_code_direct(ip, old_code,
new_code);
> > >
> > > And this looks like an attempt to handle modules compiled without
> > > PIE. Does it works with the right ip in that case?

> > I'm guessing the || is for the "or no GOT call was found", but it
> > doesn't explain why no GOT would be found.

> Yes, maybe I could have made it work by using text_ip_addr earlier.


> > >
> > > I wonder if a better solution would be to update
> > > scripts/recordmcount.c to store the incremented location into the
> module.

> I will look into it.

Found a way to properly change the __mcount_loc using the preprocessing
(removing the need for -1 on the addr). It will be part of the next version.

Thanks for the feedback.



> > If recordmcount.c can handle this, then I think that's the preferred
> > approach. Thanks!

> > -- Steve

> > >
> > > IMPORTANT: I have only vague picture about how this all works. It is
> > > possible that I am completely wrong. The code might be correct,
> > > especially if you tested this situation.
> > >
> > > Best Regards,
> > > Petr



> --
> Thomas

Patch
diff mbox

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index c18ed65287d5..8f2decce38d8 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -25,9 +25,11 @@  extern void __fentry__(void);
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
 	/*
-	 * addr is the address of the mcount call instruction.
-	 * recordmcount does the necessary offset calculation.
+	 * addr is the address of the mcount call instruction. PIE has always a
+	 * byte added to the start of the function.
 	 */
+	if (IS_ENABLED(CONFIG_X86_PIE))
+		addr -= 1;
 	return addr;
 }
 
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 5c019d23d06b..da3d98bb2bcb 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -13,4 +13,8 @@  extern char __end_rodata_hpage_align[];
 extern char __entry_trampoline_start[], __entry_trampoline_end[];
 #endif
 
+#if defined(CONFIG_X86_PIE)
+extern char __start_got[], __end_got[];
+#endif
+
 #endif	/* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 01ebcb6f263e..73b3c30cb7a3 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -102,7 +102,7 @@  static const unsigned char *ftrace_nop_replace(void)
 
 static int
 ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
-		   unsigned const char *new_code)
+			  unsigned const char *new_code)
 {
 	unsigned char replaced[MCOUNT_INSN_SIZE];
 
@@ -135,6 +135,44 @@  ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
 	return 0;
 }
 
+/* Bytes before call GOT offset */
+const unsigned char got_call_preinsn[] = { 0xff, 0x15 };
+
+static int
+ftrace_modify_initial_code(unsigned long ip, unsigned const char *old_code,
+			   unsigned const char *new_code)
+{
+	unsigned char replaced[MCOUNT_INSN_SIZE + 1];
+
+	ftrace_expected = old_code;
+
+	/*
+	 * If PIE is not enabled or no GOT call was found, default to the
+	 * original approach to code modification.
+	 */
+	if (!IS_ENABLED(CONFIG_X86_PIE) ||
+	    probe_kernel_read(replaced, (void *)ip, sizeof(replaced)) ||
+	    memcmp(replaced, got_call_preinsn, sizeof(got_call_preinsn)))
+		return ftrace_modify_code_direct(ip, old_code, new_code);
+
+	/*
+	 * Build a nop slide with a 5-byte nop and 1-byte nop to keep the ftrace
+	 * hooking algorithm working with the expected 5 bytes instruction.
+	 */
+	memcpy(replaced, new_code, MCOUNT_INSN_SIZE);
+	replaced[MCOUNT_INSN_SIZE] = ideal_nops[1][0];
+
+	ip = text_ip_addr(ip);
+
+	if (probe_kernel_write((void *)ip, replaced, sizeof(replaced)))
+		return -EPERM;
+
+	sync_core();
+
+	return 0;
+
+}
+
 int ftrace_make_nop(struct module *mod,
 		    struct dyn_ftrace *rec, unsigned long addr)
 {
@@ -153,7 +191,7 @@  int ftrace_make_nop(struct module *mod,
 	 * just modify the code directly.
 	 */
 	if (addr == MCOUNT_ADDR)
-		return ftrace_modify_code_direct(rec->ip, old, new);
+		return ftrace_modify_initial_code(rec->ip, old, new);
 
 	ftrace_expected = NULL;