diff mbox series

[V2,3/4] arm64/mm: Consolidate page fault information capture

Message ID 1559544085-7502-4-git-send-email-anshuman.khandual@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64/mm: Clean ups for do_page_fault() | expand

Commit Message

Anshuman Khandual June 3, 2019, 6:41 a.m. UTC
This consolidates page fault information capture and move them bit earlier.
While here it also adds an wrapper is_el0_write_abort(). It also saves some
cycles by replacing multiple user_mode() calls into a single one earlier
during the fault.

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
---
 arch/arm64/mm/fault.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

Comments

Catalin Marinas June 4, 2019, 2:42 p.m. UTC | #1
On Mon, Jun 03, 2019 at 12:11:24PM +0530, Anshuman Khandual wrote:
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index da02678..4bb65f3 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -435,6 +435,14 @@ static bool is_el0_instruction_abort(unsigned int esr)
>  	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
>  }
>  
> +/*
> + * This is applicable only for EL0 write aborts.
> + */
> +static bool is_el0_write_abort(unsigned int esr)
> +{
> +	return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
> +}

What makes this EL0 only?

> +
>  static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
>  				   struct pt_regs *regs)
>  {
> @@ -443,6 +451,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
>  	vm_fault_t fault, major = 0;
>  	unsigned long vm_flags = VM_READ | VM_WRITE;
>  	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
> +	bool is_user = user_mode(regs);
> +	bool is_el0_exec = is_el0_instruction_abort(esr);
> +	bool is_el0_write = is_el0_write_abort(esr);
>  
>  	if (notify_page_fault(regs, esr))
>  		return 0;
> @@ -454,12 +465,12 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
>  	if (faulthandler_disabled() || !mm)
>  		goto no_context;
>  
> -	if (user_mode(regs))
> +	if (is_user)
>  		mm_flags |= FAULT_FLAG_USER;
>  
> -	if (is_el0_instruction_abort(esr)) {
> +	if (is_el0_exec) {
>  		vm_flags = VM_EXEC;
> -	} else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
> +	} else if (is_el0_write) {
>  		vm_flags = VM_WRITE;
>  		mm_flags |= FAULT_FLAG_WRITE;
>  	}

This can be triggered by an EL1 write to a user mapping, so is_el0_write
is misleading.
Mark Rutland June 6, 2019, 9:38 a.m. UTC | #2
On Tue, Jun 04, 2019 at 03:42:09PM +0100, Catalin Marinas wrote:
> On Mon, Jun 03, 2019 at 12:11:24PM +0530, Anshuman Khandual wrote:
> > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> > index da02678..4bb65f3 100644
> > --- a/arch/arm64/mm/fault.c
> > +++ b/arch/arm64/mm/fault.c
> > @@ -435,6 +435,14 @@ static bool is_el0_instruction_abort(unsigned int esr)
> >  	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
> >  }
> >  
> > +/*
> > + * This is applicable only for EL0 write aborts.
> > + */
> > +static bool is_el0_write_abort(unsigned int esr)
> > +{
> > +	return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
> > +}
> 
> What makes this EL0 only?

It returns false for EL1 faults caused by DC IVAC, where write
permission is required. EL0 can only issue maintenance that requires
read permission.

For whatever reason, the architecture says that WnR is always 1b1, even
if read permission was sufficient.

How about:

/*
 * Note: not valid for EL1 DC IVAC, but we never use that such that it
 * should fault.
 */
static bool is_write_abort(unsigned int esr)
{
	return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
}

... which would also address your concern about EL1 writes to a user
mapping.

Thanks,
Mark.
Catalin Marinas June 6, 2019, 11:23 a.m. UTC | #3
On Thu, Jun 06, 2019 at 10:38:11AM +0100, Mark Rutland wrote:
> On Tue, Jun 04, 2019 at 03:42:09PM +0100, Catalin Marinas wrote:
> > On Mon, Jun 03, 2019 at 12:11:24PM +0530, Anshuman Khandual wrote:
> > > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> > > index da02678..4bb65f3 100644
> > > --- a/arch/arm64/mm/fault.c
> > > +++ b/arch/arm64/mm/fault.c
> > > @@ -435,6 +435,14 @@ static bool is_el0_instruction_abort(unsigned int esr)
> > >  	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
> > >  }
> > >  
> > > +/*
> > > + * This is applicable only for EL0 write aborts.
> > > + */
> > > +static bool is_el0_write_abort(unsigned int esr)
> > > +{
> > > +	return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
> > > +}
> > 
> > What makes this EL0 only?
> 
> It returns false for EL1 faults caused by DC IVAC, where write
> permission is required. EL0 can only issue maintenance that requires
> read permission.
> 
> For whatever reason, the architecture says that WnR is always 1b1, even
> if read permission was sufficient.
> 
> How about:
> 
> /*
>  * Note: not valid for EL1 DC IVAC, but we never use that such that it
>  * should fault.
>  */

For completeness, I'd add "... should fault. EL0 cannot issue DC IVAC
(undef)." or something like that.

Looks fine otherwise.
diff mbox series

Patch

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index da02678..4bb65f3 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -435,6 +435,14 @@  static bool is_el0_instruction_abort(unsigned int esr)
 	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
 }
 
+/*
+ * This is applicable only for EL0 write aborts.
+ */
+static bool is_el0_write_abort(unsigned int esr)
+{
+	return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
+}
+
 static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 				   struct pt_regs *regs)
 {
@@ -443,6 +451,9 @@  static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	vm_fault_t fault, major = 0;
 	unsigned long vm_flags = VM_READ | VM_WRITE;
 	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+	bool is_user = user_mode(regs);
+	bool is_el0_exec = is_el0_instruction_abort(esr);
+	bool is_el0_write = is_el0_write_abort(esr);
 
 	if (notify_page_fault(regs, esr))
 		return 0;
@@ -454,12 +465,12 @@  static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
-	if (user_mode(regs))
+	if (is_user)
 		mm_flags |= FAULT_FLAG_USER;
 
-	if (is_el0_instruction_abort(esr)) {
+	if (is_el0_exec) {
 		vm_flags = VM_EXEC;
-	} else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
+	} else if (is_el0_write) {
 		vm_flags = VM_WRITE;
 		mm_flags |= FAULT_FLAG_WRITE;
 	}
@@ -487,7 +498,7 @@  static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	 * we can bug out early if this is from code which shouldn't.
 	 */
 	if (!down_read_trylock(&mm->mmap_sem)) {
-		if (!user_mode(regs) && !search_exception_tables(regs->pc))
+		if (!is_user && !search_exception_tables(regs->pc))
 			goto no_context;
 retry:
 		down_read(&mm->mmap_sem);
@@ -498,7 +509,7 @@  static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 		 */
 		might_sleep();
 #ifdef CONFIG_DEBUG_VM
-		if (!user_mode(regs) && !search_exception_tables(regs->pc)) {
+		if (!is_user && !search_exception_tables(regs->pc)) {
 			up_read(&mm->mmap_sem);
 			goto no_context;
 		}
@@ -516,7 +527,7 @@  static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 		 * in __lock_page_or_retry in mm/filemap.c.
 		 */
 		if (fatal_signal_pending(current)) {
-			if (!user_mode(regs))
+			if (!is_user)
 				goto no_context;
 			return 0;
 		}
@@ -561,7 +572,7 @@  static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	 * If we are in kernel mode at this point, we have no context to
 	 * handle this fault with.
 	 */
-	if (!user_mode(regs))
+	if (!is_user)
 		goto no_context;
 
 	if (fault & VM_FAULT_OOM) {