diff mbox series

[RFC,v2,1/5] powerpc/mm: Introduce temporary mm

Message ID 20200429020531.20684-2-cmr@informatik.wtf (mailing list archive)
State New, archived
Headers show
Series Use per-CPU temporary mappings for patching | expand

Commit Message

Christopher M. Riedl April 29, 2020, 2:05 a.m. UTC
x86 supports the notion of a temporary mm which restricts access to
temporary PTEs to a single CPU. A temporary mm is useful for situations
where a CPU needs to perform sensitive operations (such as patching a
STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing
said mappings to other CPUs. A side benefit is that other CPU TLBs do
not need to be flushed when the temporary mm is torn down.

Mappings in the temporary mm can be set in the userspace portion of the
address-space.

Interrupts must be disabled while the temporary mm is in use. HW
breakpoints, which may have been set by userspace as watchpoints on
addresses now within the temporary mm, are saved and disabled when
loading the temporary mm. The HW breakpoints are restored when unloading
the temporary mm. All HW breakpoints are indiscriminately disabled while
the temporary mm is in use.

Based on x86 implementation:

commit cefa929c034e
("x86/mm: Introduce temporary mm structs")

Signed-off-by: Christopher M. Riedl <cmr@informatik.wtf>
---
 arch/powerpc/include/asm/debug.h       |  1 +
 arch/powerpc/include/asm/mmu_context.h | 54 ++++++++++++++++++++++++++
 arch/powerpc/kernel/process.c          |  5 +++
 3 files changed, 60 insertions(+)

Comments

Christophe Leroy April 29, 2020, 5:39 a.m. UTC | #1
Le 29/04/2020 à 04:05, Christopher M. Riedl a écrit :
> x86 supports the notion of a temporary mm which restricts access to
> temporary PTEs to a single CPU. A temporary mm is useful for situations
> where a CPU needs to perform sensitive operations (such as patching a
> STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing
> said mappings to other CPUs. A side benefit is that other CPU TLBs do
> not need to be flushed when the temporary mm is torn down.
> 
> Mappings in the temporary mm can be set in the userspace portion of the
> address-space.
> 
> Interrupts must be disabled while the temporary mm is in use. HW
> breakpoints, which may have been set by userspace as watchpoints on
> addresses now within the temporary mm, are saved and disabled when
> loading the temporary mm. The HW breakpoints are restored when unloading
> the temporary mm. All HW breakpoints are indiscriminately disabled while
> the temporary mm is in use.
> 
> Based on x86 implementation:
> 
> commit cefa929c034e
> ("x86/mm: Introduce temporary mm structs")
> 
> Signed-off-by: Christopher M. Riedl <cmr@informatik.wtf>
> ---
>   arch/powerpc/include/asm/debug.h       |  1 +
>   arch/powerpc/include/asm/mmu_context.h | 54 ++++++++++++++++++++++++++
>   arch/powerpc/kernel/process.c          |  5 +++
>   3 files changed, 60 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
> index 7756026b95ca..b945bc16c932 100644
> --- a/arch/powerpc/include/asm/debug.h
> +++ b/arch/powerpc/include/asm/debug.h
> @@ -45,6 +45,7 @@ static inline int debugger_break_match(struct pt_regs *regs) { return 0; }
>   static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
>   #endif
>   
> +void __get_breakpoint(struct arch_hw_breakpoint *brk);
>   void __set_breakpoint(struct arch_hw_breakpoint *brk);
>   bool ppc_breakpoint_available(void);
>   #ifdef CONFIG_PPC_ADV_DEBUG_REGS
> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> index 360367c579de..57a8695fe63f 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -10,6 +10,7 @@
>   #include <asm/mmu.h>	
>   #include <asm/cputable.h>
>   #include <asm/cputhreads.h>
> +#include <asm/debug.h>
>   
>   /*
>    * Most if the context management is out of line
> @@ -270,5 +271,58 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm,
>   	return 0;
>   }
>   
> +struct temp_mm {
> +	struct mm_struct *temp;
> +	struct mm_struct *prev;
> +	bool is_kernel_thread;
> +	struct arch_hw_breakpoint brk;
> +};
> +
> +static inline void init_temp_mm(struct temp_mm *temp_mm, struct mm_struct *mm)
> +{
> +	temp_mm->temp = mm;
> +	temp_mm->prev = NULL;
> +	temp_mm->is_kernel_thread = false;
> +	memset(&temp_mm->brk, 0, sizeof(temp_mm->brk));
> +}
> +
> +static inline void use_temporary_mm(struct temp_mm *temp_mm)
> +{
> +	lockdep_assert_irqs_disabled();
> +
> +	temp_mm->is_kernel_thread = current->mm == NULL;
> +	if (temp_mm->is_kernel_thread)
> +		temp_mm->prev = current->active_mm;
> +	else
> +		temp_mm->prev = current->mm;
> +
> +	/*
> +	 * Hash requires a non-NULL current->mm to allocate a userspace address
> +	 * when handling a page fault. Does not appear to hurt in Radix either.
> +	 */
> +	current->mm = temp_mm->temp;
> +	switch_mm_irqs_off(NULL, temp_mm->temp, current);
> +
> +	if (ppc_breakpoint_available()) {
> +		__get_breakpoint(&temp_mm->brk);
> +		if (temp_mm->brk.type != 0)
> +			hw_breakpoint_disable();
> +	}
> +}
> +
> +static inline void unuse_temporary_mm(struct temp_mm *temp_mm)

Not sure "unuse" is a best naming, allthought I don't have a better 
suggestion a the moment. If not using temporary_mm anymore, what are we 
using now ?

> +{
> +	lockdep_assert_irqs_disabled();
> +
> +	if (temp_mm->is_kernel_thread)
> +		current->mm = NULL;
> +	else
> +		current->mm = temp_mm->prev;
> +	switch_mm_irqs_off(NULL, temp_mm->prev, current);
> +
> +	if (ppc_breakpoint_available() && temp_mm->brk.type != 0)
> +		__set_breakpoint(&temp_mm->brk);
> +}
> +
>   #endif /* __KERNEL__ */
>   #endif /* __ASM_POWERPC_MMU_CONTEXT_H */
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 9c21288f8645..ec4cf890d92c 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -800,6 +800,11 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
>   	return 0;
>   }
>   
> +void __get_breakpoint(struct arch_hw_breakpoint *brk)
> +{
> +	memcpy(brk, this_cpu_ptr(&current_brk), sizeof(*brk));
> +}
> +
>   void __set_breakpoint(struct arch_hw_breakpoint *brk)
>   {
>   	memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
> 

Christophe
Christophe Leroy April 29, 2020, 5:48 a.m. UTC | #2
Le 29/04/2020 à 04:05, Christopher M. Riedl a écrit :
> x86 supports the notion of a temporary mm which restricts access to
> temporary PTEs to a single CPU. A temporary mm is useful for situations
> where a CPU needs to perform sensitive operations (such as patching a
> STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing
> said mappings to other CPUs. A side benefit is that other CPU TLBs do
> not need to be flushed when the temporary mm is torn down.
> 
> Mappings in the temporary mm can be set in the userspace portion of the
> address-space.
> 
> Interrupts must be disabled while the temporary mm is in use. HW
> breakpoints, which may have been set by userspace as watchpoints on
> addresses now within the temporary mm, are saved and disabled when
> loading the temporary mm. The HW breakpoints are restored when unloading
> the temporary mm. All HW breakpoints are indiscriminately disabled while
> the temporary mm is in use.

Why do we need to use a temporary mm all the time ?

Doesn't each CPU have its own mm already ? Only the upper address space 
is shared between all mm's but each mm has its own lower address space, 
at least when it is running a user process. Why not just use that mm ? 
As we are mapping then unmapping with interrupts disabled, there is no 
risk at all that the user starts running while the patch page is mapped, 
so I'm not sure why switching to a temporary mm is needed.


> 
> Based on x86 implementation:
> 
> commit cefa929c034e
> ("x86/mm: Introduce temporary mm structs")
> 
> Signed-off-by: Christopher M. Riedl <cmr@informatik.wtf>

Christophe
Christopher M. Riedl May 1, 2020, 8:30 p.m. UTC | #3
On Wed Apr 29, 2020 at 7:39 AM, Christophe Leroy wrote:
>
> 
>
> 
> Le 29/04/2020 à 04:05, Christopher M. Riedl a écrit :
> > x86 supports the notion of a temporary mm which restricts access to
> > temporary PTEs to a single CPU. A temporary mm is useful for situations
> > where a CPU needs to perform sensitive operations (such as patching a
> > STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing
> > said mappings to other CPUs. A side benefit is that other CPU TLBs do
> > not need to be flushed when the temporary mm is torn down.
> > 
> > Mappings in the temporary mm can be set in the userspace portion of the
> > address-space.
> > 
> > Interrupts must be disabled while the temporary mm is in use. HW
> > breakpoints, which may have been set by userspace as watchpoints on
> > addresses now within the temporary mm, are saved and disabled when
> > loading the temporary mm. The HW breakpoints are restored when unloading
> > the temporary mm. All HW breakpoints are indiscriminately disabled while
> > the temporary mm is in use.
> > 
> > Based on x86 implementation:
> > 
> > commit cefa929c034e
> > ("x86/mm: Introduce temporary mm structs")
> > 
> > Signed-off-by: Christopher M. Riedl <cmr@informatik.wtf>
> > ---
> >   arch/powerpc/include/asm/debug.h       |  1 +
> >   arch/powerpc/include/asm/mmu_context.h | 54 ++++++++++++++++++++++++++
> >   arch/powerpc/kernel/process.c          |  5 +++
> >   3 files changed, 60 insertions(+)
> > 
> > diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
> > index 7756026b95ca..b945bc16c932 100644
> > --- a/arch/powerpc/include/asm/debug.h
> > +++ b/arch/powerpc/include/asm/debug.h
> > @@ -45,6 +45,7 @@ static inline int debugger_break_match(struct pt_regs *regs) { return 0; }
> >   static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
> >   #endif
> >   
> > +void __get_breakpoint(struct arch_hw_breakpoint *brk);
> >   void __set_breakpoint(struct arch_hw_breakpoint *brk);
> >   bool ppc_breakpoint_available(void);
> >   #ifdef CONFIG_PPC_ADV_DEBUG_REGS
> > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> > index 360367c579de..57a8695fe63f 100644
> > --- a/arch/powerpc/include/asm/mmu_context.h
> > +++ b/arch/powerpc/include/asm/mmu_context.h
> > @@ -10,6 +10,7 @@
> >   #include <asm/mmu.h>	
> >   #include <asm/cputable.h>
> >   #include <asm/cputhreads.h>
> > +#include <asm/debug.h>
> >   
> >   /*
> >    * Most if the context management is out of line
> > @@ -270,5 +271,58 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm,
> >   	return 0;
> >   }
> >   
> > +struct temp_mm {
> > +	struct mm_struct *temp;
> > +	struct mm_struct *prev;
> > +	bool is_kernel_thread;
> > +	struct arch_hw_breakpoint brk;
> > +};
> > +
> > +static inline void init_temp_mm(struct temp_mm *temp_mm, struct mm_struct *mm)
> > +{
> > +	temp_mm->temp = mm;
> > +	temp_mm->prev = NULL;
> > +	temp_mm->is_kernel_thread = false;
> > +	memset(&temp_mm->brk, 0, sizeof(temp_mm->brk));
> > +}
> > +
> > +static inline void use_temporary_mm(struct temp_mm *temp_mm)
> > +{
> > +	lockdep_assert_irqs_disabled();
> > +
> > +	temp_mm->is_kernel_thread = current->mm == NULL;
> > +	if (temp_mm->is_kernel_thread)
> > +		temp_mm->prev = current->active_mm;
> > +	else
> > +		temp_mm->prev = current->mm;
> > +
> > +	/*
> > +	 * Hash requires a non-NULL current->mm to allocate a userspace address
> > +	 * when handling a page fault. Does not appear to hurt in Radix either.
> > +	 */
> > +	current->mm = temp_mm->temp;
> > +	switch_mm_irqs_off(NULL, temp_mm->temp, current);
> > +
> > +	if (ppc_breakpoint_available()) {
> > +		__get_breakpoint(&temp_mm->brk);
> > +		if (temp_mm->brk.type != 0)
> > +			hw_breakpoint_disable();
> > +	}
> > +}
> > +
> > +static inline void unuse_temporary_mm(struct temp_mm *temp_mm)
>
> 
> Not sure "unuse" is a best naming, allthought I don't have a better
> suggestion a the moment. If not using temporary_mm anymore, what are we
> using now ?
>
> 

I'm not too fond of 'unuse' either, but it's what x86 uses and I
couldn't come up with anything better on the spot. Maybe 'undo' is
better since we're switching back to whatever mm was in use before?

> > +{
> > +	lockdep_assert_irqs_disabled();
> > +
> > +	if (temp_mm->is_kernel_thread)
> > +		current->mm = NULL;
> > +	else
> > +		current->mm = temp_mm->prev;
> > +	switch_mm_irqs_off(NULL, temp_mm->prev, current);
> > +
> > +	if (ppc_breakpoint_available() && temp_mm->brk.type != 0)
> > +		__set_breakpoint(&temp_mm->brk);
> > +}
> > +
> >   #endif /* __KERNEL__ */
> >   #endif /* __ASM_POWERPC_MMU_CONTEXT_H */
> > diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> > index 9c21288f8645..ec4cf890d92c 100644
> > --- a/arch/powerpc/kernel/process.c
> > +++ b/arch/powerpc/kernel/process.c
> > @@ -800,6 +800,11 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
> >   	return 0;
> >   }
> >   
> > +void __get_breakpoint(struct arch_hw_breakpoint *brk)
> > +{
> > +	memcpy(brk, this_cpu_ptr(&current_brk), sizeof(*brk));
> > +}
> > +
> >   void __set_breakpoint(struct arch_hw_breakpoint *brk)
> >   {
> >   	memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
> > 
>
> 
> Christophe
>
> 
>
>
Christopher M. Riedl May 1, 2020, 8:46 p.m. UTC | #4
On Wed Apr 29, 2020 at 7:48 AM, Christophe Leroy wrote:
>
> 
>
> 
> Le 29/04/2020 à 04:05, Christopher M. Riedl a écrit :
> > x86 supports the notion of a temporary mm which restricts access to
> > temporary PTEs to a single CPU. A temporary mm is useful for situations
> > where a CPU needs to perform sensitive operations (such as patching a
> > STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing
> > said mappings to other CPUs. A side benefit is that other CPU TLBs do
> > not need to be flushed when the temporary mm is torn down.
> > 
> > Mappings in the temporary mm can be set in the userspace portion of the
> > address-space.
> > 
> > Interrupts must be disabled while the temporary mm is in use. HW
> > breakpoints, which may have been set by userspace as watchpoints on
> > addresses now within the temporary mm, are saved and disabled when
> > loading the temporary mm. The HW breakpoints are restored when unloading
> > the temporary mm. All HW breakpoints are indiscriminately disabled while
> > the temporary mm is in use.
>
> 
> Why do we need to use a temporary mm all the time ?
>

Not sure I understand, the temporary mm is only in use for kernel
patching in this series. We could have other uses in the future maybe
where it's beneficial to keep mappings local.

> 
> Doesn't each CPU have its own mm already ? Only the upper address space
> is shared between all mm's but each mm has its own lower address space,
> at least when it is running a user process. Why not just use that mm ?
> As we are mapping then unmapping with interrupts disabled, there is no
> risk at all that the user starts running while the patch page is mapped,
> so I'm not sure why switching to a temporary mm is needed.
>
> 

I suppose that's an option, but then we have to save and restore the
mapping which we temporarily "steal" from userspace. I admit I didn't
consider that as an option when I started this series based on the x86
patches. I think it's cleaner to switch mm, but that's a rather weak
argument. Are you concerned about performance with the temporary mm?

>
> 
> > 
> > Based on x86 implementation:
> > 
> > commit cefa929c034e
> > ("x86/mm: Introduce temporary mm structs")
> > 
> > Signed-off-by: Christopher M. Riedl <cmr@informatik.wtf>
>
> 
> Christophe
>
> 
>
>
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index 7756026b95ca..b945bc16c932 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -45,6 +45,7 @@  static inline int debugger_break_match(struct pt_regs *regs) { return 0; }
 static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
 #endif
 
+void __get_breakpoint(struct arch_hw_breakpoint *brk);
 void __set_breakpoint(struct arch_hw_breakpoint *brk);
 bool ppc_breakpoint_available(void);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 360367c579de..57a8695fe63f 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -10,6 +10,7 @@ 
 #include <asm/mmu.h>	
 #include <asm/cputable.h>
 #include <asm/cputhreads.h>
+#include <asm/debug.h>
 
 /*
  * Most if the context management is out of line
@@ -270,5 +271,58 @@  static inline int arch_dup_mmap(struct mm_struct *oldmm,
 	return 0;
 }
 
+struct temp_mm {
+	struct mm_struct *temp;
+	struct mm_struct *prev;
+	bool is_kernel_thread;
+	struct arch_hw_breakpoint brk;
+};
+
+static inline void init_temp_mm(struct temp_mm *temp_mm, struct mm_struct *mm)
+{
+	temp_mm->temp = mm;
+	temp_mm->prev = NULL;
+	temp_mm->is_kernel_thread = false;
+	memset(&temp_mm->brk, 0, sizeof(temp_mm->brk));
+}
+
+static inline void use_temporary_mm(struct temp_mm *temp_mm)
+{
+	lockdep_assert_irqs_disabled();
+
+	temp_mm->is_kernel_thread = current->mm == NULL;
+	if (temp_mm->is_kernel_thread)
+		temp_mm->prev = current->active_mm;
+	else
+		temp_mm->prev = current->mm;
+
+	/*
+	 * Hash requires a non-NULL current->mm to allocate a userspace address
+	 * when handling a page fault. Does not appear to hurt in Radix either.
+	 */
+	current->mm = temp_mm->temp;
+	switch_mm_irqs_off(NULL, temp_mm->temp, current);
+
+	if (ppc_breakpoint_available()) {
+		__get_breakpoint(&temp_mm->brk);
+		if (temp_mm->brk.type != 0)
+			hw_breakpoint_disable();
+	}
+}
+
+static inline void unuse_temporary_mm(struct temp_mm *temp_mm)
+{
+	lockdep_assert_irqs_disabled();
+
+	if (temp_mm->is_kernel_thread)
+		current->mm = NULL;
+	else
+		current->mm = temp_mm->prev;
+	switch_mm_irqs_off(NULL, temp_mm->prev, current);
+
+	if (ppc_breakpoint_available() && temp_mm->brk.type != 0)
+		__set_breakpoint(&temp_mm->brk);
+}
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_POWERPC_MMU_CONTEXT_H */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9c21288f8645..ec4cf890d92c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -800,6 +800,11 @@  static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
 	return 0;
 }
 
+void __get_breakpoint(struct arch_hw_breakpoint *brk)
+{
+	memcpy(brk, this_cpu_ptr(&current_brk), sizeof(*brk));
+}
+
 void __set_breakpoint(struct arch_hw_breakpoint *brk)
 {
 	memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));