diff mbox series

[v4] arm64: mte: optimize GCR_EL1 modification on kernel entry/exit

Message ID 20210714013638.3995315-1-pcc@google.com (mailing list archive)
State New, archived
Headers show
Series [v4] arm64: mte: optimize GCR_EL1 modification on kernel entry/exit | expand

Commit Message

Peter Collingbourne July 14, 2021, 1:36 a.m. UTC
Accessing GCR_EL1 and issuing an ISB can be expensive on some
microarchitectures. Although we must write to GCR_EL1, we can
restructure the code to avoid reading from it because the new value
can be derived entirely from the exclusion mask, which is already in
a GPR. Do so.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/I560a190a74176ca4cc5191dad08f77f6b1577c75
---
v4:
- split in two

v3:
- go back to modifying on entry/exit; optimize that path instead

v2:
- rebase onto v9 of the tag checking mode preference series

 arch/arm64/kernel/entry.S | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

Comments

Mark Rutland July 14, 2021, 2:04 p.m. UTC | #1
Hi Peter,

On Tue, Jul 13, 2021 at 06:36:38PM -0700, Peter Collingbourne wrote:
> Accessing GCR_EL1 and issuing an ISB can be expensive on some
> microarchitectures. Although we must write to GCR_EL1, we can
> restructure the code to avoid reading from it because the new value
> can be derived entirely from the exclusion mask, which is already in
> a GPR. Do so.
> 
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> Link: https://linux-review.googlesource.com/id/I560a190a74176ca4cc5191dad08f77f6b1577c75
> ---
> v4:
> - split in two
> 
> v3:
> - go back to modifying on entry/exit; optimize that path instead
> 
> v2:
> - rebase onto v9 of the tag checking mode preference series
> 
>  arch/arm64/kernel/entry.S | 12 ++++--------
>  1 file changed, 4 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index ce59280355c5..2d6dc62d929a 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -175,15 +175,11 @@ alternative_else_nop_endif
>  #endif
>  	.endm
>  
> -	.macro mte_set_gcr, tmp, tmp2
> +	.macro mte_set_gcr, mte_ctrl, tmp
>  #ifdef CONFIG_ARM64_MTE
> -	/*
> -	 * Calculate and set the exclude mask preserving
> -	 * the RRND (bit[16]) setting.
> -	 */
> -	mrs_s	\tmp2, SYS_GCR_EL1
> -	bfxil	\tmp2, \tmp, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
> -	msr_s	SYS_GCR_EL1, \tmp2
> +	ubfx	\tmp, \mte_ctrl, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
> +	orr	\tmp, \tmp, #SYS_GCR_EL1_RRND
> +	msr_s	SYS_GCR_EL1, \tmp
>  #endif
>  	.endm

Since the mte_ctrl value only has the Exclude bits set, we can make this
even simpler:

	orr	\tmp, \mte_ctrl, #SYS_GCR_EL1_RRND
	msr_s   SYS_GCR_EL1, \tmp

Otherwise, looks good to me!

Thanks,
Mark.
Catalin Marinas July 28, 2021, 5:18 p.m. UTC | #2
On Wed, Jul 14, 2021 at 03:04:42PM +0100, Mark Rutland wrote:
> On Tue, Jul 13, 2021 at 06:36:38PM -0700, Peter Collingbourne wrote:
> > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> > index ce59280355c5..2d6dc62d929a 100644
> > --- a/arch/arm64/kernel/entry.S
> > +++ b/arch/arm64/kernel/entry.S
> > @@ -175,15 +175,11 @@ alternative_else_nop_endif
> >  #endif
> >  	.endm
> >  
> > -	.macro mte_set_gcr, tmp, tmp2
> > +	.macro mte_set_gcr, mte_ctrl, tmp
> >  #ifdef CONFIG_ARM64_MTE
> > -	/*
> > -	 * Calculate and set the exclude mask preserving
> > -	 * the RRND (bit[16]) setting.
> > -	 */
> > -	mrs_s	\tmp2, SYS_GCR_EL1
> > -	bfxil	\tmp2, \tmp, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
> > -	msr_s	SYS_GCR_EL1, \tmp2
> > +	ubfx	\tmp, \mte_ctrl, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
> > +	orr	\tmp, \tmp, #SYS_GCR_EL1_RRND
> > +	msr_s	SYS_GCR_EL1, \tmp
> >  #endif
> >  	.endm
> 
> Since the mte_ctrl value only has the Exclude bits set, we can make this
> even simpler:
> 
> 	orr	\tmp, \mte_ctrl, #SYS_GCR_EL1_RRND
> 	msr_s   SYS_GCR_EL1, \tmp

I don't think we can guarantee it following this patch (some other bits
added to mte_ctrl):

https://lore.kernel.org/r/20210727205300.2554659-3-pcc@google.com
Catalin Marinas July 28, 2021, 5:46 p.m. UTC | #3
On Tue, 13 Jul 2021 18:36:38 -0700, Peter Collingbourne wrote:
> Accessing GCR_EL1 and issuing an ISB can be expensive on some
> microarchitectures. Although we must write to GCR_EL1, we can
> restructure the code to avoid reading from it because the new value
> can be derived entirely from the exclusion mask, which is already in
> a GPR. Do so.

Applied to arm64 (for-next/mte), thanks!

[1/1] arm64: mte: optimize GCR_EL1 modification on kernel entry/exit
      https://git.kernel.org/arm64/c/afdfd93a53ae
diff mbox series

Patch

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ce59280355c5..2d6dc62d929a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -175,15 +175,11 @@  alternative_else_nop_endif
 #endif
 	.endm
 
-	.macro mte_set_gcr, tmp, tmp2
+	.macro mte_set_gcr, mte_ctrl, tmp
 #ifdef CONFIG_ARM64_MTE
-	/*
-	 * Calculate and set the exclude mask preserving
-	 * the RRND (bit[16]) setting.
-	 */
-	mrs_s	\tmp2, SYS_GCR_EL1
-	bfxil	\tmp2, \tmp, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
-	msr_s	SYS_GCR_EL1, \tmp2
+	ubfx	\tmp, \mte_ctrl, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
+	orr	\tmp, \tmp, #SYS_GCR_EL1_RRND
+	msr_s	SYS_GCR_EL1, \tmp
 #endif
 	.endm