diff mbox series

[v2] arm64: Rewrite __arch_clear_user()

Message ID ec0eb3452b572de993cac20093850d5b0c0ba003.1620993260.git.robin.murphy@arm.com (mailing list archive)
State New, archived
Headers show
Series [v2] arm64: Rewrite __arch_clear_user() | expand

Commit Message

Robin Murphy May 14, 2021, 11:57 a.m. UTC
Now that we're always using STTR variants rather than abstracting two
different addressing modes, the user_ldst macro here is frankly more
obfuscating than helpful. Rewrite __arch_clear_user() with regular
USER() annotations so that it's clearer what's going on, and take the
opportunity to minimise the branchiness in the most common paths, while
also allowing the exception fixup to return an accurate result.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---

v2: Tweak exception fixup to be exact

 arch/arm64/lib/clear_user.S | 44 ++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 20 deletions(-)

Comments

Mark Rutland May 26, 2021, 11:15 a.m. UTC | #1
On Fri, May 14, 2021 at 12:57:07PM +0100, Robin Murphy wrote:
> Now that we're always using STTR variants rather than abstracting two
> different addressing modes, the user_ldst macro here is frankly more
> obfuscating than helpful. Rewrite __arch_clear_user() with regular
> USER() annotations so that it's clearer what's going on, and take the
> opportunity to minimise the branchiness in the most common paths, while
> also allowing the exception fixup to return an accurate result.
> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> 
> v2: Tweak exception fixup to be exact
> 
>  arch/arm64/lib/clear_user.S | 44 ++++++++++++++++++++-----------------
>  1 file changed, 24 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
> index af9afcbec92c..d30c03f6caed 100644
> --- a/arch/arm64/lib/clear_user.S
> +++ b/arch/arm64/lib/clear_user.S
> @@ -1,12 +1,9 @@
>  /* SPDX-License-Identifier: GPL-2.0-only */
>  /*
> - * Based on arch/arm/lib/clear_user.S
> - *
> - * Copyright (C) 2012 ARM Ltd.
> + * Copyright (C) 2021 Arm Ltd.
>   */
> -#include <linux/linkage.h>
>  
> -#include <asm/asm-uaccess.h>
> +#include <linux/linkage.h>
>  #include <asm/assembler.h>
>  
>  	.text
> @@ -19,25 +16,30 @@
>   *
>   * Alignment fixed up by hardware.
>   */
> +	.p2align 4

Could we note the reason for the alignment in the commit message? IIUC
that's for consistency with the other routines.

Regardless, this looks good to me. I hand-executed this for `sz` in the
range 0 to 7 to make sure the logic and fixups were correct, and that
all looks good to me. FWIW:

Reviewed-by: Mark Rutland <mark.rutland@arm.com>

Mark.

>  SYM_FUNC_START(__arch_clear_user)
> -	mov	x2, x1			// save the size for fixup return
> +	add	x2, x0, x1
>  	subs	x1, x1, #8
>  	b.mi	2f
>  1:
> -user_ldst 9f, sttr, xzr, x0, 8
> +USER(9f, sttr	xzr, [x0])
> +	add	x0, x0, #8
>  	subs	x1, x1, #8
> -	b.pl	1b
> -2:	adds	x1, x1, #4
> -	b.mi	3f
> -user_ldst 9f, sttr, wzr, x0, 4
> -	sub	x1, x1, #4
> -3:	adds	x1, x1, #2
> -	b.mi	4f
> -user_ldst 9f, sttrh, wzr, x0, 2
> -	sub	x1, x1, #2
> -4:	adds	x1, x1, #1
> -	b.mi	5f
> -user_ldst 9f, sttrb, wzr, x0, 0
> +	b.hi	1b
> +USER(9f, sttr	xzr, [x2, #-8])
> +	mov	x0, #0
> +	ret
> +
> +2:	tbz	x1, #2, 3f
> +USER(9f, sttr	wzr, [x0])
> +USER(8f, sttr	wzr, [x2, #-4])
> +	mov	x0, #0
> +	ret
> +
> +3:	tbz	x1, #1, 4f
> +USER(9f, sttrh	wzr, [x0])
> +4:	tbz	x1, #0, 5f
> +USER(7f, sttrb	wzr, [x2, #-1])
>  5:	mov	x0, #0
>  	ret
>  SYM_FUNC_END(__arch_clear_user)
> @@ -45,6 +47,8 @@ EXPORT_SYMBOL(__arch_clear_user)
>  
>  	.section .fixup,"ax"
>  	.align	2
> -9:	mov	x0, x2			// return the original size
> +7:	sub	x0, x2, #5	// Adjust for faulting on the final byte...
> +8:	add	x0, x0, #4	// ...or the second word of the 4-7 byte case
> +9:	sub	x0, x2, x0
>  	ret
>  	.previous
> -- 
> 2.21.0.dirty
>
Robin Murphy May 27, 2021, 1:24 p.m. UTC | #2
On 2021-05-26 12:15, Mark Rutland wrote:
> On Fri, May 14, 2021 at 12:57:07PM +0100, Robin Murphy wrote:
>> Now that we're always using STTR variants rather than abstracting two
>> different addressing modes, the user_ldst macro here is frankly more
>> obfuscating than helpful. Rewrite __arch_clear_user() with regular
>> USER() annotations so that it's clearer what's going on, and take the
>> opportunity to minimise the branchiness in the most common paths, while
>> also allowing the exception fixup to return an accurate result.
>>
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>> ---
>>
>> v2: Tweak exception fixup to be exact
>>
>>   arch/arm64/lib/clear_user.S | 44 ++++++++++++++++++++-----------------
>>   1 file changed, 24 insertions(+), 20 deletions(-)
>>
>> diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
>> index af9afcbec92c..d30c03f6caed 100644
>> --- a/arch/arm64/lib/clear_user.S
>> +++ b/arch/arm64/lib/clear_user.S
>> @@ -1,12 +1,9 @@
>>   /* SPDX-License-Identifier: GPL-2.0-only */
>>   /*
>> - * Based on arch/arm/lib/clear_user.S
>> - *
>> - * Copyright (C) 2012 ARM Ltd.
>> + * Copyright (C) 2021 Arm Ltd.
>>    */
>> -#include <linux/linkage.h>
>>   
>> -#include <asm/asm-uaccess.h>
>> +#include <linux/linkage.h>
>>   #include <asm/assembler.h>
>>   
>>   	.text
>> @@ -19,25 +16,30 @@
>>    *
>>    * Alignment fixed up by hardware.
>>    */
>> +	.p2align 4
> 
> Could we note the reason for the alignment in the commit message? IIUC
> that's for consistency with the other routines.

Yes, it's alignment for the loop as per the other routines - since the 
prologue (including BTI landing pad) is the same length as we're 
aligning to, then we may as well let any padding fall outside the 
function. I'll comment that in the code, since it might be a bit subtle.

> Regardless, this looks good to me. I hand-executed this for `sz` in the
> range 0 to 7 to make sure the logic and fixups were correct, and that
> all looks good to me. FWIW:
> 
> Reviewed-by: Mark Rutland <mark.rutland@arm.com>

Thanks! I'll clarify the relevant commit messages about the relicensing 
as well and repost the series shortly, unless there are any further 
comments.

Cheers,
Robin.

> 
> Mark.
> 
>>   SYM_FUNC_START(__arch_clear_user)
>> -	mov	x2, x1			// save the size for fixup return
>> +	add	x2, x0, x1
>>   	subs	x1, x1, #8
>>   	b.mi	2f
>>   1:
>> -user_ldst 9f, sttr, xzr, x0, 8
>> +USER(9f, sttr	xzr, [x0])
>> +	add	x0, x0, #8
>>   	subs	x1, x1, #8
>> -	b.pl	1b
>> -2:	adds	x1, x1, #4
>> -	b.mi	3f
>> -user_ldst 9f, sttr, wzr, x0, 4
>> -	sub	x1, x1, #4
>> -3:	adds	x1, x1, #2
>> -	b.mi	4f
>> -user_ldst 9f, sttrh, wzr, x0, 2
>> -	sub	x1, x1, #2
>> -4:	adds	x1, x1, #1
>> -	b.mi	5f
>> -user_ldst 9f, sttrb, wzr, x0, 0
>> +	b.hi	1b
>> +USER(9f, sttr	xzr, [x2, #-8])
>> +	mov	x0, #0
>> +	ret
>> +
>> +2:	tbz	x1, #2, 3f
>> +USER(9f, sttr	wzr, [x0])
>> +USER(8f, sttr	wzr, [x2, #-4])
>> +	mov	x0, #0
>> +	ret
>> +
>> +3:	tbz	x1, #1, 4f
>> +USER(9f, sttrh	wzr, [x0])
>> +4:	tbz	x1, #0, 5f
>> +USER(7f, sttrb	wzr, [x2, #-1])
>>   5:	mov	x0, #0
>>   	ret
>>   SYM_FUNC_END(__arch_clear_user)
>> @@ -45,6 +47,8 @@ EXPORT_SYMBOL(__arch_clear_user)
>>   
>>   	.section .fixup,"ax"
>>   	.align	2
>> -9:	mov	x0, x2			// return the original size
>> +7:	sub	x0, x2, #5	// Adjust for faulting on the final byte...
>> +8:	add	x0, x0, #4	// ...or the second word of the 4-7 byte case
>> +9:	sub	x0, x2, x0
>>   	ret
>>   	.previous
>> -- 
>> 2.21.0.dirty
>>
diff mbox series

Patch

diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index af9afcbec92c..d30c03f6caed 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -1,12 +1,9 @@ 
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Based on arch/arm/lib/clear_user.S
- *
- * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2021 Arm Ltd.
  */
-#include <linux/linkage.h>
 
-#include <asm/asm-uaccess.h>
+#include <linux/linkage.h>
 #include <asm/assembler.h>
 
 	.text
@@ -19,25 +16,30 @@ 
  *
  * Alignment fixed up by hardware.
  */
+	.p2align 4
 SYM_FUNC_START(__arch_clear_user)
-	mov	x2, x1			// save the size for fixup return
+	add	x2, x0, x1
 	subs	x1, x1, #8
 	b.mi	2f
 1:
-user_ldst 9f, sttr, xzr, x0, 8
+USER(9f, sttr	xzr, [x0])
+	add	x0, x0, #8
 	subs	x1, x1, #8
-	b.pl	1b
-2:	adds	x1, x1, #4
-	b.mi	3f
-user_ldst 9f, sttr, wzr, x0, 4
-	sub	x1, x1, #4
-3:	adds	x1, x1, #2
-	b.mi	4f
-user_ldst 9f, sttrh, wzr, x0, 2
-	sub	x1, x1, #2
-4:	adds	x1, x1, #1
-	b.mi	5f
-user_ldst 9f, sttrb, wzr, x0, 0
+	b.hi	1b
+USER(9f, sttr	xzr, [x2, #-8])
+	mov	x0, #0
+	ret
+
+2:	tbz	x1, #2, 3f
+USER(9f, sttr	wzr, [x0])
+USER(8f, sttr	wzr, [x2, #-4])
+	mov	x0, #0
+	ret
+
+3:	tbz	x1, #1, 4f
+USER(9f, sttrh	wzr, [x0])
+4:	tbz	x1, #0, 5f
+USER(7f, sttrb	wzr, [x2, #-1])
 5:	mov	x0, #0
 	ret
 SYM_FUNC_END(__arch_clear_user)
@@ -45,6 +47,8 @@  EXPORT_SYMBOL(__arch_clear_user)
 
 	.section .fixup,"ax"
 	.align	2
-9:	mov	x0, x2			// return the original size
+7:	sub	x0, x2, #5	// Adjust for faulting on the final byte...
+8:	add	x0, x0, #4	// ...or the second word of the 4-7 byte case
+9:	sub	x0, x2, x0
 	ret
 	.previous