diff mbox series

[v2,33/35] arm64/mm: attempt speculative mm faults first

Message ID 20220128131006.67712-34-michel@lespinasse.org (mailing list archive)
State New
Headers show
Series Speculative page faults | expand

Commit Message

Michel Lespinasse Jan. 28, 2022, 1:10 p.m. UTC
Attempt speculative mm fault handling first, and fall back to the
existing (non-speculative) code if that fails.

This follows the lines of the x86 speculative fault handling code,
but with some minor arch differences such as the way that the
VM_FAULT_BADACCESS case is handled.

Signed-off-by: Michel Lespinasse <michel@lespinasse.org>
---
 arch/arm64/mm/fault.c | 62 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

Comments

Mike Rapoport Jan. 30, 2022, 9:13 a.m. UTC | #1
On Fri, Jan 28, 2022 at 05:10:04AM -0800, Michel Lespinasse wrote:
> Attempt speculative mm fault handling first, and fall back to the
> existing (non-speculative) code if that fails.
> 
> This follows the lines of the x86 speculative fault handling code,
> but with some minor arch differences such as the way that the
> VM_FAULT_BADACCESS case is handled.
> 
> Signed-off-by: Michel Lespinasse <michel@lespinasse.org>
> ---
>  arch/arm64/mm/fault.c | 62 +++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 77341b160aca..2598795f4e70 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -25,6 +25,7 @@
>  #include <linux/perf_event.h>
>  #include <linux/preempt.h>
>  #include <linux/hugetlb.h>
> +#include <linux/vm_event_item.h>
>  
>  #include <asm/acpi.h>
>  #include <asm/bug.h>
> @@ -524,6 +525,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
>  	unsigned long vm_flags;
>  	unsigned int mm_flags = FAULT_FLAG_DEFAULT;
>  	unsigned long addr = untagged_addr(far);
> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
> +	struct vm_area_struct *vma;
> +	struct vm_area_struct pvma;
> +	unsigned long seq;
> +#endif
>  
>  	if (kprobe_page_fault(regs, esr))
>  		return 0;
> @@ -574,6 +580,59 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
>  
>  	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
>  
> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
> +	/*
> +	 * No need to try speculative faults for kernel or
> +	 * single threaded user space.
> +	 */
> +	if (!(mm_flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) == 1)
> +		goto no_spf;
> +
> +	count_vm_event(SPF_ATTEMPT);
> +	seq = mmap_seq_read_start(mm);
> +	if (seq & 1) {
> +		count_vm_spf_event(SPF_ABORT_ODD);
> +		goto spf_abort;
> +	}
> +	rcu_read_lock();
> +	vma = __find_vma(mm, addr);
> +	if (!vma || vma->vm_start > addr) {
> +		rcu_read_unlock();
> +		count_vm_spf_event(SPF_ABORT_UNMAPPED);
> +		goto spf_abort;
> +	}
> +	if (!vma_is_anonymous(vma)) {
> +		rcu_read_unlock();
> +		count_vm_spf_event(SPF_ABORT_NO_SPECULATE);
> +		goto spf_abort;
> +	}
> +	pvma = *vma;
> +	rcu_read_unlock();
> +	if (!mmap_seq_read_check(mm, seq, SPF_ABORT_VMA_COPY))
> +		goto spf_abort;
> +	vma = &pvma;
> +	if (!(vma->vm_flags & vm_flags)) {
> +		count_vm_spf_event(SPF_ABORT_ACCESS_ERROR);
> +		goto spf_abort;
> +	}
> +	fault = do_handle_mm_fault(vma, addr & PAGE_MASK,
> +			mm_flags | FAULT_FLAG_SPECULATIVE, seq, regs);
> +
> +	/* Quick path to respond to signals */
> +	if (fault_signal_pending(fault, regs)) {
> +		if (!user_mode(regs))
> +			goto no_context;
> +		return 0;
> +	}
> +	if (!(fault & VM_FAULT_RETRY))
> +		goto done;
> +
> +spf_abort:
> +	count_vm_event(SPF_ABORT);
> +no_spf:
> +
> +#endif	/* CONFIG_SPECULATIVE_PAGE_FAULT */

The speculative page fault implementation here (and for PowerPC as well)
looks very similar to x86. Can we factor it our rather than copy 3 (or
more) times?

> +
>  	/*
>  	 * As per x86, we may deadlock here. However, since the kernel only
>  	 * validly references user space from well defined areas of the code,
> @@ -612,6 +671,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
>  		goto retry;
>  	}
>  	mmap_read_unlock(mm);
> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
> +done:
> +#endif
>  
>  	/*
>  	 * Handle the "normal" (no error) case first.
> -- 
> 2.20.1
> 
>
Michel Lespinasse Jan. 31, 2022, 8:07 a.m. UTC | #2
On Sun, Jan 30, 2022 at 11:13:26AM +0200, Mike Rapoport wrote:
> The speculative page fault implementation here (and for PowerPC as well)
> looks very similar to x86. Can we factor it our rather than copy 3 (or
> more) times?

In each arch, the speculative code was written along the lines of the
existing non-speculative code, so that behavior would be unchanged
when speculation succeeds.

Now each arch's existing, non-speculative code paths are quite similar,
but they do have small differences as to how they implement various
permission checks, protection keys and the like. The same small
differences end up being reflected in the new speculative code paths.

I agree it would be nice if this code could be unified between archs,
but IMO this should start with the existing non-speculative code -
I don't think it would make sense to try unifying the new speculative
code while trying to follow the behavior of the non-unified old
non-speculative code paths...
Mike Rapoport Feb. 1, 2022, 8:58 a.m. UTC | #3
On Mon, Jan 31, 2022 at 12:07:29AM -0800, Michel Lespinasse wrote:
> On Sun, Jan 30, 2022 at 11:13:26AM +0200, Mike Rapoport wrote:
> > The speculative page fault implementation here (and for PowerPC as well)
> > looks very similar to x86. Can we factor it our rather than copy 3 (or
> > more) times?
> 
> In each arch, the speculative code was written along the lines of the
> existing non-speculative code, so that behavior would be unchanged
> when speculation succeeds.
> 
> Now each arch's existing, non-speculative code paths are quite similar,
> but they do have small differences as to how they implement various
> permission checks, protection keys and the like. The same small
> differences end up being reflected in the new speculative code paths.
> 
> I agree it would be nice if this code could be unified between archs,
> but IMO this should start with the existing non-speculative code -
> I don't think it would make sense to try unifying the new speculative
> code while trying to follow the behavior of the non-unified old
> non-speculative code paths...

Then maybe this unification can be done as the ground work for the
speculative page fault handling?
Michel Lespinasse Feb. 7, 2022, 5:39 p.m. UTC | #4
On Tue, Feb 01, 2022 at 10:58:03AM +0200, Mike Rapoport wrote:
> On Mon, Jan 31, 2022 at 12:07:29AM -0800, Michel Lespinasse wrote:
> > On Sun, Jan 30, 2022 at 11:13:26AM +0200, Mike Rapoport wrote:
> > > The speculative page fault implementation here (and for PowerPC as well)
> > > looks very similar to x86. Can we factor it our rather than copy 3 (or
> > > more) times?
> > 
> > In each arch, the speculative code was written along the lines of the
> > existing non-speculative code, so that behavior would be unchanged
> > when speculation succeeds.
> > 
> > Now each arch's existing, non-speculative code paths are quite similar,
> > but they do have small differences as to how they implement various
> > permission checks, protection keys and the like. The same small
> > differences end up being reflected in the new speculative code paths.
> > 
> > I agree it would be nice if this code could be unified between archs,
> > but IMO this should start with the existing non-speculative code -
> > I don't think it would make sense to try unifying the new speculative
> > code while trying to follow the behavior of the non-unified old
> > non-speculative code paths...
> 
> Then maybe this unification can be done as the ground work for the
> speculative page fault handling?

I feel like this is quite unrelated, and that introducing such
artificial dependencies is a bad work habit we have here in linux MM...

That said, unifying the PF code between archs would be an interesting
project on its own. The way I see it, there could be a unified page
fault handler, with some arch specific parts defined as inline
functions.  I can see myself making an x86/arm64/powerpc initial
proposal if there is enough interest for it, but I'm not sure how
extending it to more exotic archs would go - I think this would have
to involve arch maintainers at least for testing purposes, and I'm not
sure if they'd have any bandwidth for such a project...

--
Michel "walken" Lespinasse
Mike Rapoport Feb. 8, 2022, 9:07 a.m. UTC | #5
On Mon, Feb 07, 2022 at 09:39:19AM -0800, Michel Lespinasse wrote:
> On Tue, Feb 01, 2022 at 10:58:03AM +0200, Mike Rapoport wrote:
> > On Mon, Jan 31, 2022 at 12:07:29AM -0800, Michel Lespinasse wrote:
> > > On Sun, Jan 30, 2022 at 11:13:26AM +0200, Mike Rapoport wrote:
> > > > The speculative page fault implementation here (and for PowerPC as well)
> > > > looks very similar to x86. Can we factor it our rather than copy 3 (or
> > > > more) times?
> > > 
> > > In each arch, the speculative code was written along the lines of the
> > > existing non-speculative code, so that behavior would be unchanged
> > > when speculation succeeds.
> > > 
> > > Now each arch's existing, non-speculative code paths are quite similar,
> > > but they do have small differences as to how they implement various
> > > permission checks, protection keys and the like. The same small
> > > differences end up being reflected in the new speculative code paths.
> > > 
> > > I agree it would be nice if this code could be unified between archs,
> > > but IMO this should start with the existing non-speculative code -
> > > I don't think it would make sense to try unifying the new speculative
> > > code while trying to follow the behavior of the non-unified old
> > > non-speculative code paths...
> > 
> > Then maybe this unification can be done as the ground work for the
> > speculative page fault handling?
> 
> I feel like this is quite unrelated, and that introducing such
> artificial dependencies is a bad work habit we have here in linux MM...

The reduction of the code duplication in page fault handlers per se is
indeed not very related to SPF work, but since the SPF patches increase
the code duplication, I believe that the refactoring that prevents this
additional code duplication is related and is in scope of this work.
 
> That said, unifying the PF code between archs would be an interesting
> project on its own. The way I see it, there could be a unified page
> fault handler, with some arch specific parts defined as inline
> functions.  I can see myself making an x86/arm64/powerpc initial
> proposal if there is enough interest for it, but I'm not sure how
> extending it to more exotic archs would go - I think this would have
> to involve arch maintainers at least for testing purposes, and I'm not
> sure if they'd have any bandwidth for such a project...

There is no need to convert all architectures and surely not at once.
The parts of page fault handler that are shared by several architectures
can live under #ifdef ARCH_WANTS_GENERIC_PAGE_FAULT or something like this.

> --
> Michel "walken" Lespinasse
diff mbox series

Patch

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 77341b160aca..2598795f4e70 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -25,6 +25,7 @@ 
 #include <linux/perf_event.h>
 #include <linux/preempt.h>
 #include <linux/hugetlb.h>
+#include <linux/vm_event_item.h>
 
 #include <asm/acpi.h>
 #include <asm/bug.h>
@@ -524,6 +525,11 @@  static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
 	unsigned long vm_flags;
 	unsigned int mm_flags = FAULT_FLAG_DEFAULT;
 	unsigned long addr = untagged_addr(far);
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+	struct vm_area_struct *vma;
+	struct vm_area_struct pvma;
+	unsigned long seq;
+#endif
 
 	if (kprobe_page_fault(regs, esr))
 		return 0;
@@ -574,6 +580,59 @@  static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+	/*
+	 * No need to try speculative faults for kernel or
+	 * single threaded user space.
+	 */
+	if (!(mm_flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) == 1)
+		goto no_spf;
+
+	count_vm_event(SPF_ATTEMPT);
+	seq = mmap_seq_read_start(mm);
+	if (seq & 1) {
+		count_vm_spf_event(SPF_ABORT_ODD);
+		goto spf_abort;
+	}
+	rcu_read_lock();
+	vma = __find_vma(mm, addr);
+	if (!vma || vma->vm_start > addr) {
+		rcu_read_unlock();
+		count_vm_spf_event(SPF_ABORT_UNMAPPED);
+		goto spf_abort;
+	}
+	if (!vma_is_anonymous(vma)) {
+		rcu_read_unlock();
+		count_vm_spf_event(SPF_ABORT_NO_SPECULATE);
+		goto spf_abort;
+	}
+	pvma = *vma;
+	rcu_read_unlock();
+	if (!mmap_seq_read_check(mm, seq, SPF_ABORT_VMA_COPY))
+		goto spf_abort;
+	vma = &pvma;
+	if (!(vma->vm_flags & vm_flags)) {
+		count_vm_spf_event(SPF_ABORT_ACCESS_ERROR);
+		goto spf_abort;
+	}
+	fault = do_handle_mm_fault(vma, addr & PAGE_MASK,
+			mm_flags | FAULT_FLAG_SPECULATIVE, seq, regs);
+
+	/* Quick path to respond to signals */
+	if (fault_signal_pending(fault, regs)) {
+		if (!user_mode(regs))
+			goto no_context;
+		return 0;
+	}
+	if (!(fault & VM_FAULT_RETRY))
+		goto done;
+
+spf_abort:
+	count_vm_event(SPF_ABORT);
+no_spf:
+
+#endif	/* CONFIG_SPECULATIVE_PAGE_FAULT */
+
 	/*
 	 * As per x86, we may deadlock here. However, since the kernel only
 	 * validly references user space from well defined areas of the code,
@@ -612,6 +671,9 @@  static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
 		goto retry;
 	}
 	mmap_read_unlock(mm);
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+done:
+#endif
 
 	/*
 	 * Handle the "normal" (no error) case first.