diff mbox series

[v11,19/39] arm64/mm: Handle GCS data aborts

Message ID 20240822-arm64-gcs-v11-19-41b81947ecb5@kernel.org (mailing list archive)
State New, archived
Headers show
Series arm64/gcs: Provide support for GCS in userspace | expand

Commit Message

Mark Brown Aug. 22, 2024, 1:15 a.m. UTC
All GCS operations at EL0 must happen on a page which is marked as
having UnprivGCS access, including read operations.  If a GCS operation
attempts to access a page without this then it will generate a data
abort with the GCS bit set in ESR_EL1.ISS2.

EL0 may validly generate such faults, for example due to copy on write
which will cause the GCS data to be stored in a read only page with no
GCS permissions until the actual copy happens.  Since UnprivGCS allows
both reads and writes to the GCS (though only through GCS operations) we
need to ensure that the memory management subsystem handles GCS accesses
as writes at all times.  Do this by adding FAULT_FLAG_WRITE to any GCS
page faults, adding handling to ensure that invalid cases are identfied
as such early so the memory management core does not think they will
succeed.  The core cannot distinguish between VMAs which are generally
writeable and VMAs which are only writeable through GCS operations.

EL1 may validly write to EL0 GCS for management purposes (eg, while
initialising with cap tokens).

We also report any GCS faults in VMAs not marked as part of a GCS as
access violations, causing a fault to be delivered to userspace if it
attempts to do GCS operations outside a GCS.

Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm64/mm/fault.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

Comments

Catalin Marinas Aug. 22, 2024, 4:12 p.m. UTC | #1
On Thu, Aug 22, 2024 at 02:15:22AM +0100, Mark Brown wrote:
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 451ba7cbd5ad..3ada31c2ac12 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -486,6 +486,14 @@ static void do_bad_area(unsigned long far, unsigned long esr,
>  	}
>  }
>  
> +static bool is_gcs_fault(unsigned long esr)
> +{
> +	if (!esr_is_data_abort(esr))
> +		return false;
> +
> +	return ESR_ELx_ISS2(esr) & ESR_ELx_GCS;
> +}
> +
>  static bool is_el0_instruction_abort(unsigned long esr)
>  {
>  	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
> @@ -500,6 +508,23 @@ static bool is_write_abort(unsigned long esr)
>  	return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
>  }
>  
> +static bool is_invalid_gcs_access(struct vm_area_struct *vma, u64 esr)
> +{
> +	if (!system_supports_gcs())
> +		return false;
> +
> +	if (unlikely(is_gcs_fault(esr))) {
> +		/* GCS accesses must be performed on a GCS page */
> +		if (!(vma->vm_flags & VM_SHADOW_STACK))
> +			return true;

This first check covers the GCSPOPM/RET etc. permission faults on
non-GCS vmas. It looks correct.

> +	} else if (unlikely(vma->vm_flags & VM_SHADOW_STACK)) {
> +		/* Only GCS operations can write to a GCS page */
> +		return is_write_abort(esr);
> +	}

I don't think that's right. The ESR on this path may not even indicate a
data abort and ESR.WnR bit check wouldn't make sense.

I presume we want to avoid an infinite loop on a (writeable) GCS page
when the user does a normal STR but the CPU raises a permission fault. I
think this function needs to just return false if !esr_is_data_abort().

> +
> +	return false;
> +}
> +
>  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  				   struct pt_regs *regs)
>  {
> @@ -535,6 +560,14 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  		/* It was exec fault */
>  		vm_flags = VM_EXEC;
>  		mm_flags |= FAULT_FLAG_INSTRUCTION;
> +	} else if (is_gcs_fault(esr)) {
> +		/*
> +		 * The GCS permission on a page implies both read and
> +		 * write so always handle any GCS fault as a write fault,
> +		 * we need to trigger CoW even for GCS reads.
> +		 */
> +		vm_flags = VM_WRITE;
> +		mm_flags |= FAULT_FLAG_WRITE;
>  	} else if (is_write_abort(esr)) {
>  		/* It was write fault */
>  		vm_flags = VM_WRITE;
> @@ -568,6 +601,13 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  	if (!vma)
>  		goto lock_mmap;
>  
> +	if (is_invalid_gcs_access(vma, esr)) {
> +		vma_end_read(vma);
> +		fault = 0;
> +		si_code = SEGV_ACCERR;
> +		goto bad_area;
> +	}

Here there's a risk that the above function returns true for some
unrelated fault that happens to have bit 6 in ESR set.
Mark Brown Aug. 22, 2024, 4:44 p.m. UTC | #2
On Thu, Aug 22, 2024 at 05:12:30PM +0100, Catalin Marinas wrote:
> On Thu, Aug 22, 2024 at 02:15:22AM +0100, Mark Brown wrote:

> > +static bool is_invalid_gcs_access(struct vm_area_struct *vma, u64 esr)

> > +	} else if (unlikely(vma->vm_flags & VM_SHADOW_STACK)) {
> > +		/* Only GCS operations can write to a GCS page */
> > +		return is_write_abort(esr);
> > +	}

> I don't think that's right. The ESR on this path may not even indicate a
> data abort and ESR.WnR bit check wouldn't make sense.

> I presume we want to avoid an infinite loop on a (writeable) GCS page
> when the user does a normal STR but the CPU raises a permission fault. I
> think this function needs to just return false if !esr_is_data_abort().

Yes, that should check for a data abort.  I think I'd formed the
impression that is_write_abort() included that check somehow.  As you
say it's to avoid spinning trying to resolve a permission fault for a
write (non-GCS reads to a GCS page are valid), I do think we need the 
is_write_abort() since non-GCS reads are valid so something like:

	if (!esr_is_data_abort(esr))
		return false;

	return is_write_abort(esr);
Catalin Marinas Aug. 22, 2024, 5:19 p.m. UTC | #3
On Thu, Aug 22, 2024 at 05:44:19PM +0100, Mark Brown wrote:
> On Thu, Aug 22, 2024 at 05:12:30PM +0100, Catalin Marinas wrote:
> > On Thu, Aug 22, 2024 at 02:15:22AM +0100, Mark Brown wrote:
> 
> > > +static bool is_invalid_gcs_access(struct vm_area_struct *vma, u64 esr)
> 
> > > +	} else if (unlikely(vma->vm_flags & VM_SHADOW_STACK)) {
> > > +		/* Only GCS operations can write to a GCS page */
> > > +		return is_write_abort(esr);
> > > +	}
> 
> > I don't think that's right. The ESR on this path may not even indicate a
> > data abort and ESR.WnR bit check wouldn't make sense.
> 
> > I presume we want to avoid an infinite loop on a (writeable) GCS page
> > when the user does a normal STR but the CPU raises a permission fault. I
> > think this function needs to just return false if !esr_is_data_abort().
> 
> Yes, that should check for a data abort.  I think I'd formed the
> impression that is_write_abort() included that check somehow.  As you
> say it's to avoid spinning trying to resolve a permission fault for a
> write (non-GCS reads to a GCS page are valid), I do think we need the 
> is_write_abort() since non-GCS reads are valid so something like:
> 
> 	if (!esr_is_data_abort(esr))
> 		return false;
> 
> 	return is_write_abort(esr);

We do need the write abort check but not unconditionally, only if to a
GCS page (you can have other genuine write aborts).
Mark Brown Aug. 22, 2024, 5:30 p.m. UTC | #4
On Thu, Aug 22, 2024 at 06:19:38PM +0100, Catalin Marinas wrote:
> On Thu, Aug 22, 2024 at 05:44:19PM +0100, Mark Brown wrote:
> > On Thu, Aug 22, 2024 at 05:12:30PM +0100, Catalin Marinas wrote:
> > > On Thu, Aug 22, 2024 at 02:15:22AM +0100, Mark Brown wrote:
> > 
> > > > +static bool is_invalid_gcs_access(struct vm_area_struct *vma, u64 esr)
> > 
> > > > +	} else if (unlikely(vma->vm_flags & VM_SHADOW_STACK)) {
> > > > +		/* Only GCS operations can write to a GCS page */
> > > > +		return is_write_abort(esr);
> > > > +	}

> > Yes, that should check for a data abort.  I think I'd formed the
> > impression that is_write_abort() included that check somehow.  As you
> > say it's to avoid spinning trying to resolve a permission fault for a
> > write (non-GCS reads to a GCS page are valid), I do think we need the 
> > is_write_abort() since non-GCS reads are valid so something like:
> > 
> > 	if (!esr_is_data_abort(esr))
> > 		return false;
> > 
> > 	return is_write_abort(esr);
> 
> We do need the write abort check but not unconditionally, only if to a
> GCS page (you can have other genuine write aborts).

tThat was to replace the checks in the above case, not the function as a
whole.
diff mbox series

Patch

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 451ba7cbd5ad..3ada31c2ac12 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -486,6 +486,14 @@  static void do_bad_area(unsigned long far, unsigned long esr,
 	}
 }
 
+static bool is_gcs_fault(unsigned long esr)
+{
+	if (!esr_is_data_abort(esr))
+		return false;
+
+	return ESR_ELx_ISS2(esr) & ESR_ELx_GCS;
+}
+
 static bool is_el0_instruction_abort(unsigned long esr)
 {
 	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
@@ -500,6 +508,23 @@  static bool is_write_abort(unsigned long esr)
 	return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
 }
 
+static bool is_invalid_gcs_access(struct vm_area_struct *vma, u64 esr)
+{
+	if (!system_supports_gcs())
+		return false;
+
+	if (unlikely(is_gcs_fault(esr))) {
+		/* GCS accesses must be performed on a GCS page */
+		if (!(vma->vm_flags & VM_SHADOW_STACK))
+			return true;
+	} else if (unlikely(vma->vm_flags & VM_SHADOW_STACK)) {
+		/* Only GCS operations can write to a GCS page */
+		return is_write_abort(esr);
+	}
+
+	return false;
+}
+
 static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 				   struct pt_regs *regs)
 {
@@ -535,6 +560,14 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 		/* It was exec fault */
 		vm_flags = VM_EXEC;
 		mm_flags |= FAULT_FLAG_INSTRUCTION;
+	} else if (is_gcs_fault(esr)) {
+		/*
+		 * The GCS permission on a page implies both read and
+		 * write so always handle any GCS fault as a write fault,
+		 * we need to trigger CoW even for GCS reads.
+		 */
+		vm_flags = VM_WRITE;
+		mm_flags |= FAULT_FLAG_WRITE;
 	} else if (is_write_abort(esr)) {
 		/* It was write fault */
 		vm_flags = VM_WRITE;
@@ -568,6 +601,13 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 	if (!vma)
 		goto lock_mmap;
 
+	if (is_invalid_gcs_access(vma, esr)) {
+		vma_end_read(vma);
+		fault = 0;
+		si_code = SEGV_ACCERR;
+		goto bad_area;
+	}
+
 	if (!(vma->vm_flags & vm_flags)) {
 		vma_end_read(vma);
 		fault = 0;