[v2,1/5] powerpc/mm: Introduce temporary mm
diff mbox series

Message ID 20200709040316.12789-2-cmr@informatik.wtf
State New
Headers show
Series
  • Use per-CPU temporary mappings for patching
Related show

Commit Message

Christopher M. Riedl July 9, 2020, 4:03 a.m. UTC
x86 supports the notion of a temporary mm which restricts access to
temporary PTEs to a single CPU. A temporary mm is useful for situations
where a CPU needs to perform sensitive operations (such as patching a
STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing
said mappings to other CPUs. A side benefit is that other CPU TLBs do
not need to be flushed when the temporary mm is torn down.

Mappings in the temporary mm can be set in the userspace portion of the
address-space.

Interrupts must be disabled while the temporary mm is in use. HW
breakpoints, which may have been set by userspace as watchpoints on
addresses now within the temporary mm, are saved and disabled when
loading the temporary mm. The HW breakpoints are restored when unloading
the temporary mm. All HW breakpoints are indiscriminately disabled while
the temporary mm is in use.

Based on x86 implementation:

commit cefa929c034e
("x86/mm: Introduce temporary mm structs")

Signed-off-by: Christopher M. Riedl <cmr@informatik.wtf>
---
 arch/powerpc/include/asm/debug.h       |  1 +
 arch/powerpc/include/asm/mmu_context.h | 64 ++++++++++++++++++++++++++
 arch/powerpc/kernel/process.c          |  5 ++
 3 files changed, 70 insertions(+)

Comments

Daniel Axtens Aug. 6, 2020, 1:27 a.m. UTC | #1
Hi Chris,
  
>  void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk);
> +void __get_breakpoint(int nr, struct arch_hw_breakpoint *brk);
>  bool ppc_breakpoint_available(void);
>  #ifdef CONFIG_PPC_ADV_DEBUG_REGS
>  extern void do_send_trap(struct pt_regs *regs, unsigned long address,
> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> index 1a474f6b1992..9269c7c7b04e 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -10,6 +10,7 @@
>  #include <asm/mmu.h>	
>  #include <asm/cputable.h>
>  #include <asm/cputhreads.h>
> +#include <asm/debug.h>
>  
>  /*
>   * Most if the context management is out of line
> @@ -300,5 +301,68 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm,
>  	return 0;
>  }
>  
> +struct temp_mm {
> +	struct mm_struct *temp;
> +	struct mm_struct *prev;
> +	bool is_kernel_thread;
> +	struct arch_hw_breakpoint brk[HBP_NUM_MAX];
> +};

This is on the nitpicky end, but I wonder if this should be named
temp_mm, or should be labelled something else to capture its broader
purpose as a context for code patching? I'm thinking that a store of
breakpoints is perhaps unusual in a memory-managment structure?

I don't have a better suggestion off the top of my head and I'm happy
for you to leave it, I just wanted to flag it as a possible way we could
be clearer.

> +
> +static inline void init_temp_mm(struct temp_mm *temp_mm, struct mm_struct *mm)
> +{
> +	temp_mm->temp = mm;
> +	temp_mm->prev = NULL;
> +	temp_mm->is_kernel_thread = false;
> +	memset(&temp_mm->brk, 0, sizeof(temp_mm->brk));
> +}
> +
> +static inline void use_temporary_mm(struct temp_mm *temp_mm)
> +{
> +	lockdep_assert_irqs_disabled();
> +
> +	temp_mm->is_kernel_thread = current->mm == NULL;
> +	if (temp_mm->is_kernel_thread)
> +		temp_mm->prev = current->active_mm;

You don't seem to restore active_mm below. I don't know what active_mm
does, so I don't know if this is a problem.

> +	else
> +		temp_mm->prev = current->mm;
> +
> +	/*
> +	 * Hash requires a non-NULL current->mm to allocate a userspace address
> +	 * when handling a page fault. Does not appear to hurt in Radix either.
> +	 */
> +	current->mm = temp_mm->temp;
> +	switch_mm_irqs_off(NULL, temp_mm->temp, current);
> +
> +	if (ppc_breakpoint_available()) {

I wondered if this could be changed during a text-patching operation.
AIUI, it potentially can on a P9 via "dawr_enable_dangerous" in debugfs.

I don't know if that's a problem. My concern is that you could turn off
breakpoints, call 'use_temporary_mm', then turn them back on again
before 'unuse_temporary_mm' and get a breakpoint while that can access
the temporary mm. Is there something else that makes that safe?
disabling IRQs maybe?

> +		struct arch_hw_breakpoint null_brk = {0};
> +		int i = 0;
> +
> +		for (; i < nr_wp_slots(); ++i) {

super nitpicky, and I'm not sure if this is actually documented, but I'd
usually see this written as:

for (i = 0; i < nr_wp_slots(); i++) {

Not sure if there's any reason that it _shouldn't_ be written the way
you've written it (and I do like initialising the variable when it's
defined!), I'm just not used to it. (Likewise with the unuse function.)

> +			__get_breakpoint(i, &temp_mm->brk[i]);
> +			if (temp_mm->brk[i].type != 0)
> +				__set_breakpoint(i, &null_brk);
> +		}
> +	}
> +}
> +

Kind regards,
Daniel

> +static inline void unuse_temporary_mm(struct temp_mm *temp_mm)
> +{
> +	lockdep_assert_irqs_disabled();
> +
> +	if (temp_mm->is_kernel_thread)
> +		current->mm = NULL;
> +	else
> +		current->mm = temp_mm->prev;
> +	switch_mm_irqs_off(NULL, temp_mm->prev, current);
> +
> +	if (ppc_breakpoint_available()) {
> +		int i = 0;
> +
> +		for (; i < nr_wp_slots(); ++i)
> +			if (temp_mm->brk[i].type != 0)
> +				__set_breakpoint(i, &temp_mm->brk[i]);
> +	}
> +}
> +
>  #endif /* __KERNEL__ */
>  #endif /* __ASM_POWERPC_MMU_CONTEXT_H */
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 4650b9bb217f..b6c123bf5edd 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -824,6 +824,11 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
>  	return 0;
>  }
>  
> +void __get_breakpoint(int nr, struct arch_hw_breakpoint *brk)
> +{
> +	memcpy(brk, this_cpu_ptr(&current_brk[nr]), sizeof(*brk));
> +}
> +
>  void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
>  {
>  	memcpy(this_cpu_ptr(&current_brk[nr]), brk, sizeof(*brk));
> -- 
> 2.27.0
Christopher M. Riedl Aug. 17, 2020, 5:16 a.m. UTC | #2
On Thu Aug 6, 2020 at 6:27 AM CDT, Daniel Axtens wrote:
> Hi Chris,
>   
> >  void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk);
> > +void __get_breakpoint(int nr, struct arch_hw_breakpoint *brk);
> >  bool ppc_breakpoint_available(void);
> >  #ifdef CONFIG_PPC_ADV_DEBUG_REGS
> >  extern void do_send_trap(struct pt_regs *regs, unsigned long address,
> > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> > index 1a474f6b1992..9269c7c7b04e 100644
> > --- a/arch/powerpc/include/asm/mmu_context.h
> > +++ b/arch/powerpc/include/asm/mmu_context.h
> > @@ -10,6 +10,7 @@
> >  #include <asm/mmu.h>	
> >  #include <asm/cputable.h>
> >  #include <asm/cputhreads.h>
> > +#include <asm/debug.h>
> >  
> >  /*
> >   * Most if the context management is out of line
> > @@ -300,5 +301,68 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm,
> >  	return 0;
> >  }
> >  
> > +struct temp_mm {
> > +	struct mm_struct *temp;
> > +	struct mm_struct *prev;
> > +	bool is_kernel_thread;
> > +	struct arch_hw_breakpoint brk[HBP_NUM_MAX];
> > +};
>
> This is on the nitpicky end, but I wonder if this should be named
> temp_mm, or should be labelled something else to capture its broader
> purpose as a context for code patching? I'm thinking that a store of
> breakpoints is perhaps unusual in a memory-managment structure?
>
> I don't have a better suggestion off the top of my head and I'm happy
> for you to leave it, I just wanted to flag it as a possible way we could
> be clearer.

First of all thank you for the review!

I had actually planned to move all this code into lib/code-patching.c
directly (and it turns out that's what x86 ended up doing as well).

>
> > +
> > +static inline void init_temp_mm(struct temp_mm *temp_mm, struct mm_struct *mm)
> > +{
> > +	temp_mm->temp = mm;
> > +	temp_mm->prev = NULL;
> > +	temp_mm->is_kernel_thread = false;
> > +	memset(&temp_mm->brk, 0, sizeof(temp_mm->brk));
> > +}
> > +
> > +static inline void use_temporary_mm(struct temp_mm *temp_mm)
> > +{
> > +	lockdep_assert_irqs_disabled();
> > +
> > +	temp_mm->is_kernel_thread = current->mm == NULL;
> > +	if (temp_mm->is_kernel_thread)
> > +		temp_mm->prev = current->active_mm;
>
> You don't seem to restore active_mm below. I don't know what active_mm
> does, so I don't know if this is a problem.

For kernel threads 'current->mm' is NULL since a kthread does not need
a userspace mm; however they still need a mm so they "borrow" one which
is indicated by 'current->active_mm'.

'current->mm' needs to be restored because Hash requires a non-NULL
value when handling a page fault and so 'current->mm' gets set to the
temp_mm. This is a special case for kernel threads and Hash translation.

>
> > +	else
> > +		temp_mm->prev = current->mm;
> > +
> > +	/*
> > +	 * Hash requires a non-NULL current->mm to allocate a userspace address
> > +	 * when handling a page fault. Does not appear to hurt in Radix either.
> > +	 */
> > +	current->mm = temp_mm->temp;
> > +	switch_mm_irqs_off(NULL, temp_mm->temp, current);
> > +
> > +	if (ppc_breakpoint_available()) {
>
> I wondered if this could be changed during a text-patching operation.
> AIUI, it potentially can on a P9 via "dawr_enable_dangerous" in debugfs.
>
> I don't know if that's a problem. My concern is that you could turn off
> breakpoints, call 'use_temporary_mm', then turn them back on again
> before 'unuse_temporary_mm' and get a breakpoint while that can access
> the temporary mm. Is there something else that makes that safe?
> disabling IRQs maybe?

Hmm, I will have to investigate this more. I'm not sure if there is a
better way to just completely disable breakpoints while the temporary mm
is in use.

>
> > +		struct arch_hw_breakpoint null_brk = {0};
> > +		int i = 0;
> > +
> > +		for (; i < nr_wp_slots(); ++i) {
>
> super nitpicky, and I'm not sure if this is actually documented, but I'd
> usually see this written as:
>
> for (i = 0; i < nr_wp_slots(); i++) {
>
> Not sure if there's any reason that it _shouldn't_ be written the way
> you've written it (and I do like initialising the variable when it's
> defined!), I'm just not used to it. (Likewise with the unuse function.)
>

I've found other places (even in arch/powerpc!) where this is done so I
think it's fine. I prefer using this style when the variable
declaration and initialization are "close" to the loop statement.

> > +			__get_breakpoint(i, &temp_mm->brk[i]);
> > +			if (temp_mm->brk[i].type != 0)
> > +				__set_breakpoint(i, &null_brk);
> > +		}
> > +	}
> > +}
> > +
>
> Kind regards,
> Daniel
>
> > +static inline void unuse_temporary_mm(struct temp_mm *temp_mm)
> > +{
> > +	lockdep_assert_irqs_disabled();
> > +
> > +	if (temp_mm->is_kernel_thread)
> > +		current->mm = NULL;
> > +	else
> > +		current->mm = temp_mm->prev;
> > +	switch_mm_irqs_off(NULL, temp_mm->prev, current);
> > +
> > +	if (ppc_breakpoint_available()) {
> > +		int i = 0;
> > +
> > +		for (; i < nr_wp_slots(); ++i)
> > +			if (temp_mm->brk[i].type != 0)
> > +				__set_breakpoint(i, &temp_mm->brk[i]);
> > +	}
> > +}
> > +
> >  #endif /* __KERNEL__ */
> >  #endif /* __ASM_POWERPC_MMU_CONTEXT_H */
> > diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> > index 4650b9bb217f..b6c123bf5edd 100644
> > --- a/arch/powerpc/kernel/process.c
> > +++ b/arch/powerpc/kernel/process.c
> > @@ -824,6 +824,11 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
> >  	return 0;
> >  }
> >  
> > +void __get_breakpoint(int nr, struct arch_hw_breakpoint *brk)
> > +{
> > +	memcpy(brk, this_cpu_ptr(&current_brk[nr]), sizeof(*brk));
> > +}
> > +
> >  void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
> >  {
> >  	memcpy(this_cpu_ptr(&current_brk[nr]), brk, sizeof(*brk));
> > -- 
> > 2.27.0

Patch
diff mbox series

diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index ec57daf87f40..827350c9bcf3 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -46,6 +46,7 @@  static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
 #endif
 
 void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk);
+void __get_breakpoint(int nr, struct arch_hw_breakpoint *brk);
 bool ppc_breakpoint_available(void);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 extern void do_send_trap(struct pt_regs *regs, unsigned long address,
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 1a474f6b1992..9269c7c7b04e 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -10,6 +10,7 @@ 
 #include <asm/mmu.h>	
 #include <asm/cputable.h>
 #include <asm/cputhreads.h>
+#include <asm/debug.h>
 
 /*
  * Most if the context management is out of line
@@ -300,5 +301,68 @@  static inline int arch_dup_mmap(struct mm_struct *oldmm,
 	return 0;
 }
 
+struct temp_mm {
+	struct mm_struct *temp;
+	struct mm_struct *prev;
+	bool is_kernel_thread;
+	struct arch_hw_breakpoint brk[HBP_NUM_MAX];
+};
+
+static inline void init_temp_mm(struct temp_mm *temp_mm, struct mm_struct *mm)
+{
+	temp_mm->temp = mm;
+	temp_mm->prev = NULL;
+	temp_mm->is_kernel_thread = false;
+	memset(&temp_mm->brk, 0, sizeof(temp_mm->brk));
+}
+
+static inline void use_temporary_mm(struct temp_mm *temp_mm)
+{
+	lockdep_assert_irqs_disabled();
+
+	temp_mm->is_kernel_thread = current->mm == NULL;
+	if (temp_mm->is_kernel_thread)
+		temp_mm->prev = current->active_mm;
+	else
+		temp_mm->prev = current->mm;
+
+	/*
+	 * Hash requires a non-NULL current->mm to allocate a userspace address
+	 * when handling a page fault. Does not appear to hurt in Radix either.
+	 */
+	current->mm = temp_mm->temp;
+	switch_mm_irqs_off(NULL, temp_mm->temp, current);
+
+	if (ppc_breakpoint_available()) {
+		struct arch_hw_breakpoint null_brk = {0};
+		int i = 0;
+
+		for (; i < nr_wp_slots(); ++i) {
+			__get_breakpoint(i, &temp_mm->brk[i]);
+			if (temp_mm->brk[i].type != 0)
+				__set_breakpoint(i, &null_brk);
+		}
+	}
+}
+
+static inline void unuse_temporary_mm(struct temp_mm *temp_mm)
+{
+	lockdep_assert_irqs_disabled();
+
+	if (temp_mm->is_kernel_thread)
+		current->mm = NULL;
+	else
+		current->mm = temp_mm->prev;
+	switch_mm_irqs_off(NULL, temp_mm->prev, current);
+
+	if (ppc_breakpoint_available()) {
+		int i = 0;
+
+		for (; i < nr_wp_slots(); ++i)
+			if (temp_mm->brk[i].type != 0)
+				__set_breakpoint(i, &temp_mm->brk[i]);
+	}
+}
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_POWERPC_MMU_CONTEXT_H */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4650b9bb217f..b6c123bf5edd 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -824,6 +824,11 @@  static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
 	return 0;
 }
 
+void __get_breakpoint(int nr, struct arch_hw_breakpoint *brk)
+{
+	memcpy(brk, this_cpu_ptr(&current_brk[nr]), sizeof(*brk));
+}
+
 void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
 {
 	memcpy(this_cpu_ptr(&current_brk[nr]), brk, sizeof(*brk));