diff mbox series

[v3,05/18] arm64: Do not enable uaccess for flush_icache_range

Message ID 20210520124406.2731873-6-tabba@google.com (mailing list archive)
State New, archived
Headers show
Series Tidy up cache.S | expand

Commit Message

Fuad Tabba May 20, 2021, 12:43 p.m. UTC
__flush_icache_range works on the kernel linear map, and doesn't
need uaccess. The existing code is a side-effect of its current
implementation with __flush_cache_user_range fallthrough.

Instead of fallthrough to share the code, use a common macro for
the two where the caller specifies an optional fixup label if
user access is needed. If provided, this label would be used to
generate an extable entry.

No functional change intended.
Possible performance impact due to the reduced number of
instructions.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/mm/cache.S | 64 +++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 23 deletions(-)

Comments

Mark Rutland May 20, 2021, 2:02 p.m. UTC | #1
On Thu, May 20, 2021 at 01:43:53PM +0100, Fuad Tabba wrote:
> __flush_icache_range works on the kernel linear map, and doesn't
> need uaccess. The existing code is a side-effect of its current
> implementation with __flush_cache_user_range fallthrough.
> 
> Instead of fallthrough to share the code, use a common macro for
> the two where the caller specifies an optional fixup label if
> user access is needed. If provided, this label would be used to
> generate an extable entry.
> 
> No functional change intended.
> Possible performance impact due to the reduced number of
> instructions.
> 
> Reported-by: Catalin Marinas <catalin.marinas@arm.com>
> Reported-by: Will Deacon <will@kernel.org>
> Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
> Signed-off-by: Fuad Tabba <tabba@google.com>

I have one comment below, but either way this looks good to me, so:

Acked-by: Mark Rutland <mark.rutland@arm.com>

> ---
>  arch/arm64/mm/cache.S | 64 +++++++++++++++++++++++++++----------------
>  1 file changed, 41 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> index 5ff8dfa86975..c6bc3b8138e1 100644
> --- a/arch/arm64/mm/cache.S
> +++ b/arch/arm64/mm/cache.S
> @@ -14,6 +14,41 @@
>  #include <asm/alternative.h>
>  #include <asm/asm-uaccess.h>
>  
> +/*
> + *	__flush_cache_range(start,end) [fixup]
> + *
> + *	Ensure that the I and D caches are coherent within specified region.
> + *	This is typically used when code has been written to a memory region,
> + *	and will be executed.
> + *
> + *	- start   - virtual start address of region
> + *	- end     - virtual end address of region
> + *	- fixup   - optional label to branch to on user fault
> + */
> +.macro	__flush_cache_range, fixup
> +alternative_if ARM64_HAS_CACHE_IDC
> +	dsb	ishst
> +	b	.Ldc_skip_\@
> +alternative_else_nop_endif
> +	dcache_line_size x2, x3
> +	sub	x3, x2, #1
> +	bic	x4, x0, x3
> +.Ldc_loop_\@:
> +user_alt "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE, \fixup
> +	add	x4, x4, x2
> +	cmp	x4, x1
> +	b.lo	.Ldc_loop_\@
> +	dsb	ish

As on the prior patch, I reckon it'd be nicer overall to align with the
*by_line macros and have an explicit _cond_extable here, e.g.

| .Ldc_op\@:
| 	alternative_insn "dc cvau, x4",  "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
| 	add	x4, x4, x2
| 	cmp     x4, x1
| 	b.lo	.Ldc_op\@
| 	dsb	ish
| ...
| 	// just before the .endm
| 	_cond_extable .Ldc_op\@, \fixup

... and with some rework it might be possible to use dcache_by_line_op
directly here (it currently clobbers the base and end, so can't be used
as-is).

Thanks,
Mark.

> +
> +.Ldc_skip_\@:
> +alternative_if ARM64_HAS_CACHE_DIC
> +	isb
> +	b	.Lic_skip_\@
> +alternative_else_nop_endif
> +	invalidate_icache_by_line x0, x1, x2, x3, \fixup
> +.Lic_skip_\@:
> +.endm
> +
>  /*
>   *	flush_icache_range(start,end)
>   *
> @@ -25,7 +60,9 @@
>   *	- end     - virtual end address of region
>   */
>  SYM_FUNC_START(__flush_icache_range)
> -	/* FALLTHROUGH */
> +	__flush_cache_range
> +	ret
> +SYM_FUNC_END(__flush_icache_range)
>  
>  /*
>   *	__flush_cache_user_range(start,end)
> @@ -39,34 +76,15 @@ SYM_FUNC_START(__flush_icache_range)
>   */
>  SYM_FUNC_START(__flush_cache_user_range)
>  	uaccess_ttbr0_enable x2, x3, x4
> -alternative_if ARM64_HAS_CACHE_IDC
> -	dsb	ishst
> -	b	7f
> -alternative_else_nop_endif
> -	dcache_line_size x2, x3
> -	sub	x3, x2, #1
> -	bic	x4, x0, x3
> -1:
> -user_alt "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE, 9f
> -	add	x4, x4, x2
> -	cmp	x4, x1
> -	b.lo	1b
> -	dsb	ish
>  
> -7:
> -alternative_if ARM64_HAS_CACHE_DIC
> -	isb
> -	b	8f
> -alternative_else_nop_endif
> -	invalidate_icache_by_line x0, x1, x2, x3, 9f
> -8:	mov	x0, #0
> +	__flush_cache_range 2f
> +	mov	x0, xzr
>  1:
>  	uaccess_ttbr0_disable x1, x2
>  	ret
> -9:
> +2:
>  	mov	x0, #-EFAULT
>  	b	1b
> -SYM_FUNC_END(__flush_icache_range)
>  SYM_FUNC_END(__flush_cache_user_range)
>  
>  /*
> -- 
> 2.31.1.751.gd2f1c929bd-goog
>
Mark Rutland May 20, 2021, 3:37 p.m. UTC | #2
On Thu, May 20, 2021 at 03:02:16PM +0100, Mark Rutland wrote:
> On Thu, May 20, 2021 at 01:43:53PM +0100, Fuad Tabba wrote:
> > __flush_icache_range works on the kernel linear map, and doesn't
> > need uaccess. The existing code is a side-effect of its current
> > implementation with __flush_cache_user_range fallthrough.
> > 
> > Instead of fallthrough to share the code, use a common macro for
> > the two where the caller specifies an optional fixup label if
> > user access is needed. If provided, this label would be used to
> > generate an extable entry.
> > 
> > No functional change intended.
> > Possible performance impact due to the reduced number of
> > instructions.
> > 
> > Reported-by: Catalin Marinas <catalin.marinas@arm.com>
> > Reported-by: Will Deacon <will@kernel.org>
> > Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> 
> I have one comment below, but either way this looks good to me, so:
> 
> Acked-by: Mark Rutland <mark.rutland@arm.com>
> 
> > ---
> >  arch/arm64/mm/cache.S | 64 +++++++++++++++++++++++++++----------------
> >  1 file changed, 41 insertions(+), 23 deletions(-)
> > 
> > diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> > index 5ff8dfa86975..c6bc3b8138e1 100644
> > --- a/arch/arm64/mm/cache.S
> > +++ b/arch/arm64/mm/cache.S
> > @@ -14,6 +14,41 @@
> >  #include <asm/alternative.h>
> >  #include <asm/asm-uaccess.h>
> >  
> > +/*
> > + *	__flush_cache_range(start,end) [fixup]
> > + *
> > + *	Ensure that the I and D caches are coherent within specified region.
> > + *	This is typically used when code has been written to a memory region,
> > + *	and will be executed.
> > + *
> > + *	- start   - virtual start address of region
> > + *	- end     - virtual end address of region
> > + *	- fixup   - optional label to branch to on user fault
> > + */
> > +.macro	__flush_cache_range, fixup
> > +alternative_if ARM64_HAS_CACHE_IDC
> > +	dsb	ishst
> > +	b	.Ldc_skip_\@
> > +alternative_else_nop_endif
> > +	dcache_line_size x2, x3
> > +	sub	x3, x2, #1
> > +	bic	x4, x0, x3
> > +.Ldc_loop_\@:
> > +user_alt "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE, \fixup
> > +	add	x4, x4, x2
> > +	cmp	x4, x1
> > +	b.lo	.Ldc_loop_\@
> > +	dsb	ish
> 
> As on the prior patch, I reckon it'd be nicer overall to align with the
> *by_line macros and have an explicit _cond_extable here, e.g.
> 
> | .Ldc_op\@:
> | 	alternative_insn "dc cvau, x4",  "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
> | 	add	x4, x4, x2
> | 	cmp     x4, x1
> | 	b.lo	.Ldc_op\@
> | 	dsb	ish
> | ...
> | 	// just before the .endm
> | 	_cond_extable .Ldc_op\@, \fixup
> 
> ... and with some rework it might be possible to use dcache_by_line_op
> directly here (it currently clobbers the base and end, so can't be used
> as-is).

Having thought about this a bit more, it's simple enough to do that now:

| alternative_if ARM64_HAS_CACHE_IDC
| 	dsb	ishst
| 	b	.Ldc_skip_\@
| alternative_else_nop_endif
| 	mov	x0, x2
| 	add	x3, x0, x1
| 	dcache_by_line_op cvau, ishst, x2, x3, x4, x5, \fixup
| .Ldc_skip_\@

... and when we just need to change the ADD to a MOV when we change the
macro to take the end in x1.

Note that dcache_by_line_op will automatically upgrade 'cvau' to 'civac'
when ARM64_WORKAROUND_CLEAN_CACHE is present, so the resulting logic is
the same.

Thanks,
Mark.
Mark Rutland May 21, 2021, 12:18 p.m. UTC | #3
On Thu, May 20, 2021 at 04:37:35PM +0100, Mark Rutland wrote:
> On Thu, May 20, 2021 at 03:02:16PM +0100, Mark Rutland wrote:
> Having thought about this a bit more, it's simple enough to do that now:
> 
> | alternative_if ARM64_HAS_CACHE_IDC
> | 	dsb	ishst
> | 	b	.Ldc_skip_\@
> | alternative_else_nop_endif
> | 	mov	x0, x2
> | 	add	x3, x0, x1
> | 	dcache_by_line_op cvau, ishst, x2, x3, x4, x5, \fixup
> | .Ldc_skip_\@

Looking at this again, that "ishst" should be "ish", but otherwise this
stands.

Mark.
Catalin Marinas May 25, 2021, 11:18 a.m. UTC | #4
On Thu, May 20, 2021 at 01:43:53PM +0100, Fuad Tabba wrote:
> __flush_icache_range works on the kernel linear map, and doesn't
> need uaccess. The existing code is a side-effect of its current
> implementation with __flush_cache_user_range fallthrough.
> 
> Instead of fallthrough to share the code, use a common macro for
> the two where the caller specifies an optional fixup label if
> user access is needed. If provided, this label would be used to
> generate an extable entry.
> 
> No functional change intended.
> Possible performance impact due to the reduced number of
> instructions.
> 
> Reported-by: Catalin Marinas <catalin.marinas@arm.com>
> Reported-by: Will Deacon <will@kernel.org>
> Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
> Signed-off-by: Fuad Tabba <tabba@google.com>

Just a few acks on the patches that have my reported-by but I'm happy
with the series overall, nice clean-up.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
diff mbox series

Patch

diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 5ff8dfa86975..c6bc3b8138e1 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -14,6 +14,41 @@ 
 #include <asm/alternative.h>
 #include <asm/asm-uaccess.h>
 
+/*
+ *	__flush_cache_range(start,end) [fixup]
+ *
+ *	Ensure that the I and D caches are coherent within specified region.
+ *	This is typically used when code has been written to a memory region,
+ *	and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *	- fixup   - optional label to branch to on user fault
+ */
+.macro	__flush_cache_range, fixup
+alternative_if ARM64_HAS_CACHE_IDC
+	dsb	ishst
+	b	.Ldc_skip_\@
+alternative_else_nop_endif
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x4, x0, x3
+.Ldc_loop_\@:
+user_alt "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE, \fixup
+	add	x4, x4, x2
+	cmp	x4, x1
+	b.lo	.Ldc_loop_\@
+	dsb	ish
+
+.Ldc_skip_\@:
+alternative_if ARM64_HAS_CACHE_DIC
+	isb
+	b	.Lic_skip_\@
+alternative_else_nop_endif
+	invalidate_icache_by_line x0, x1, x2, x3, \fixup
+.Lic_skip_\@:
+.endm
+
 /*
  *	flush_icache_range(start,end)
  *
@@ -25,7 +60,9 @@ 
  *	- end     - virtual end address of region
  */
 SYM_FUNC_START(__flush_icache_range)
-	/* FALLTHROUGH */
+	__flush_cache_range
+	ret
+SYM_FUNC_END(__flush_icache_range)
 
 /*
  *	__flush_cache_user_range(start,end)
@@ -39,34 +76,15 @@  SYM_FUNC_START(__flush_icache_range)
  */
 SYM_FUNC_START(__flush_cache_user_range)
 	uaccess_ttbr0_enable x2, x3, x4
-alternative_if ARM64_HAS_CACHE_IDC
-	dsb	ishst
-	b	7f
-alternative_else_nop_endif
-	dcache_line_size x2, x3
-	sub	x3, x2, #1
-	bic	x4, x0, x3
-1:
-user_alt "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE, 9f
-	add	x4, x4, x2
-	cmp	x4, x1
-	b.lo	1b
-	dsb	ish
 
-7:
-alternative_if ARM64_HAS_CACHE_DIC
-	isb
-	b	8f
-alternative_else_nop_endif
-	invalidate_icache_by_line x0, x1, x2, x3, 9f
-8:	mov	x0, #0
+	__flush_cache_range 2f
+	mov	x0, xzr
 1:
 	uaccess_ttbr0_disable x1, x2
 	ret
-9:
+2:
 	mov	x0, #-EFAULT
 	b	1b
-SYM_FUNC_END(__flush_icache_range)
 SYM_FUNC_END(__flush_cache_user_range)
 
 /*