diff mbox series

[v1,01/13] arm64: Do not enable uaccess for flush_icache_range

Message ID 20210511144252.3779113-2-tabba@google.com (mailing list archive)
State New, archived
Headers show
Series Tidy up cache.S | expand

Commit Message

Fuad Tabba May 11, 2021, 2:42 p.m. UTC
__flush_icache_range works on the kernel linear map, and doesn't
need uaccess. The existing code is a side-effect of its current
implementation with __flush_cache_user_range fallthrough.

Instead of fallthrough to share the code, use a common macro for
the two where the caller can specify whether user-space access is
needed.

No functional change intended.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/include/asm/assembler.h | 13 ++++--
 arch/arm64/mm/cache.S              | 64 +++++++++++++++++++++---------
 2 files changed, 54 insertions(+), 23 deletions(-)

Comments

Mark Rutland May 11, 2021, 3:22 p.m. UTC | #1
Hi Fuad,

On Tue, May 11, 2021 at 03:42:40PM +0100, Fuad Tabba wrote:
> __flush_icache_range works on the kernel linear map, and doesn't
> need uaccess. The existing code is a side-effect of its current
> implementation with __flush_cache_user_range fallthrough.
> 
> Instead of fallthrough to share the code, use a common macro for
> the two where the caller can specify whether user-space access is
> needed.

FWIW, I agree that we should fix __flush_icache_range to not map fiddle
with uaccess, and that we should split these.

> No functional change intended.

There is a performance change here, since the existing
`__flush_cache_user_range` takes IDC and DIC into account, whereas
`invalidate_icache_by_line` does not.

There's also an existing oversight where `__flush_cache_user_range`
takes ARM64_WORKAROUND_CLEAN_CACHE into account, but
`invalidate_icache_by_line` does not. I think that's a bug that we
should fix first, so that we can backport something to stable. Arguably
similar is true in `swsusp_arch_suspend_exit`, but for that we could add
a comment and always use `DC CIVAC`.

Thanks,
Mark.

> Reported-by: Catalin Marinas <catalin.marinas@arm.com>
> Reported-by: Will Deacon <will@kernel.org>
> Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>  arch/arm64/include/asm/assembler.h | 13 ++++--
>  arch/arm64/mm/cache.S              | 64 +++++++++++++++++++++---------
>  2 files changed, 54 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 8418c1bd8f04..6ff7a3a3b238 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -426,16 +426,21 @@ alternative_endif
>   * Macro to perform an instruction cache maintenance for the interval
>   * [start, end)
>   *
> - * 	start, end:	virtual addresses describing the region
> - *	label:		A label to branch to on user fault.
> - * 	Corrupts:	tmp1, tmp2
> + *	start, end:	virtual addresses describing the region
> + *	needs_uaccess:	might access user space memory
> + *	label:		label to branch to on user fault (if needs_uaccess)
> + *	Corrupts:	tmp1, tmp2
>   */
> -	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
> +	.macro invalidate_icache_by_line start, end, tmp1, tmp2, needs_uaccess, label
>  	icache_line_size \tmp1, \tmp2
>  	sub	\tmp2, \tmp1, #1
>  	bic	\tmp2, \start, \tmp2
>  9997:
> +	.if	\needs_uaccess
>  USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
> +	.else
> +	ic	ivau, \tmp2
> +	.endif
>  	add	\tmp2, \tmp2, \tmp1
>  	cmp	\tmp2, \end
>  	b.lo	9997b
> diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> index 2d881f34dd9d..092f73acdf9a 100644
> --- a/arch/arm64/mm/cache.S
> +++ b/arch/arm64/mm/cache.S
> @@ -15,30 +15,20 @@
>  #include <asm/asm-uaccess.h>
>  
>  /*
> - *	flush_icache_range(start,end)
> + *	__flush_cache_range(start,end) [needs_uaccess]
>   *
>   *	Ensure that the I and D caches are coherent within specified region.
>   *	This is typically used when code has been written to a memory region,
>   *	and will be executed.
>   *
> - *	- start   - virtual start address of region
> - *	- end     - virtual end address of region
> + *	- start   	- virtual start address of region
> + *	- end     	- virtual end address of region
> + *	- needs_uaccess - (macro parameter) might access user space memory
>   */
> -SYM_FUNC_START(__flush_icache_range)
> -	/* FALLTHROUGH */
> -
> -/*
> - *	__flush_cache_user_range(start,end)
> - *
> - *	Ensure that the I and D caches are coherent within specified region.
> - *	This is typically used when code has been written to a memory region,
> - *	and will be executed.
> - *
> - *	- start   - virtual start address of region
> - *	- end     - virtual end address of region
> - */
> -SYM_FUNC_START(__flush_cache_user_range)
> +.macro	__flush_cache_range, needs_uaccess
> +	.if 	\needs_uaccess
>  	uaccess_ttbr0_enable x2, x3, x4
> +	.endif
>  alternative_if ARM64_HAS_CACHE_IDC
>  	dsb	ishst
>  	b	7f
> @@ -47,7 +37,11 @@ alternative_else_nop_endif
>  	sub	x3, x2, #1
>  	bic	x4, x0, x3
>  1:
> +	.if 	\needs_uaccess
>  user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> +	.else
> +alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> +	.endif
>  	add	x4, x4, x2
>  	cmp	x4, x1
>  	b.lo	1b
> @@ -58,15 +52,47 @@ alternative_if ARM64_HAS_CACHE_DIC
>  	isb
>  	b	8f
>  alternative_else_nop_endif
> -	invalidate_icache_by_line x0, x1, x2, x3, 9f
> +	invalidate_icache_by_line x0, x1, x2, x3, \needs_uaccess, 9f
>  8:	mov	x0, #0
>  1:
> +	.if	\needs_uaccess
>  	uaccess_ttbr0_disable x1, x2
> +	.endif
>  	ret
> +
> +	.if 	\needs_uaccess
>  9:
>  	mov	x0, #-EFAULT
>  	b	1b
> +	.endif
> +.endm
> +
> +/*
> + *	flush_icache_range(start,end)
> + *
> + *	Ensure that the I and D caches are coherent within specified region.
> + *	This is typically used when code has been written to a memory region,
> + *	and will be executed.
> + *
> + *	- start   - virtual start address of region
> + *	- end     - virtual end address of region
> + */
> +SYM_FUNC_START(__flush_icache_range)
> +	__flush_cache_range needs_uaccess=0
>  SYM_FUNC_END(__flush_icache_range)
> +
> +/*
> + *	__flush_cache_user_range(start,end)
> + *
> + *	Ensure that the I and D caches are coherent within specified region.
> + *	This is typically used when code has been written to a memory region,
> + *	and will be executed.
> + *
> + *	- start   - virtual start address of region
> + *	- end     - virtual end address of region
> + */
> +SYM_FUNC_START(__flush_cache_user_range)
> +	__flush_cache_range needs_uaccess=1
>  SYM_FUNC_END(__flush_cache_user_range)
>  
>  /*
> @@ -86,7 +112,7 @@ alternative_else_nop_endif
>  
>  	uaccess_ttbr0_enable x2, x3, x4
>  
> -	invalidate_icache_by_line x0, x1, x2, x3, 2f
> +	invalidate_icache_by_line x0, x1, x2, x3, 1, 2f
>  	mov	x0, xzr
>  1:
>  	uaccess_ttbr0_disable x1, x2
> -- 
> 2.31.1.607.g51e8a6a459-goog
>
Robin Murphy May 11, 2021, 4:53 p.m. UTC | #2
On 2021-05-11 15:42, Fuad Tabba wrote:
> __flush_icache_range works on the kernel linear map, and doesn't
> need uaccess. The existing code is a side-effect of its current
> implementation with __flush_cache_user_range fallthrough.
> 
> Instead of fallthrough to share the code, use a common macro for
> the two where the caller can specify whether user-space access is
> needed.
> 
> No functional change intended.
> 
> Reported-by: Catalin Marinas <catalin.marinas@arm.com>
> Reported-by: Will Deacon <will@kernel.org>
> Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>   arch/arm64/include/asm/assembler.h | 13 ++++--
>   arch/arm64/mm/cache.S              | 64 +++++++++++++++++++++---------
>   2 files changed, 54 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 8418c1bd8f04..6ff7a3a3b238 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -426,16 +426,21 @@ alternative_endif
>    * Macro to perform an instruction cache maintenance for the interval
>    * [start, end)
>    *
> - * 	start, end:	virtual addresses describing the region
> - *	label:		A label to branch to on user fault.
> - * 	Corrupts:	tmp1, tmp2
> + *	start, end:	virtual addresses describing the region
> + *	needs_uaccess:	might access user space memory
> + *	label:		label to branch to on user fault (if needs_uaccess)
> + *	Corrupts:	tmp1, tmp2
>    */
> -	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
> +	.macro invalidate_icache_by_line start, end, tmp1, tmp2, needs_uaccess, label
>   	icache_line_size \tmp1, \tmp2
>   	sub	\tmp2, \tmp1, #1
>   	bic	\tmp2, \start, \tmp2
>   9997:
> +	.if	\needs_uaccess
>   USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
> +	.else
> +	ic	ivau, \tmp2
> +	.endif
>   	add	\tmp2, \tmp2, \tmp1
>   	cmp	\tmp2, \end
>   	b.lo	9997b
> diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> index 2d881f34dd9d..092f73acdf9a 100644
> --- a/arch/arm64/mm/cache.S
> +++ b/arch/arm64/mm/cache.S
> @@ -15,30 +15,20 @@
>   #include <asm/asm-uaccess.h>
>   
>   /*
> - *	flush_icache_range(start,end)
> + *	__flush_cache_range(start,end) [needs_uaccess]
>    *
>    *	Ensure that the I and D caches are coherent within specified region.
>    *	This is typically used when code has been written to a memory region,
>    *	and will be executed.
>    *
> - *	- start   - virtual start address of region
> - *	- end     - virtual end address of region
> + *	- start   	- virtual start address of region
> + *	- end     	- virtual end address of region
> + *	- needs_uaccess - (macro parameter) might access user space memory
>    */
> -SYM_FUNC_START(__flush_icache_range)
> -	/* FALLTHROUGH */
> -
> -/*
> - *	__flush_cache_user_range(start,end)
> - *
> - *	Ensure that the I and D caches are coherent within specified region.
> - *	This is typically used when code has been written to a memory region,
> - *	and will be executed.
> - *
> - *	- start   - virtual start address of region
> - *	- end     - virtual end address of region
> - */
> -SYM_FUNC_START(__flush_cache_user_range)
> +.macro	__flush_cache_range, needs_uaccess
> +	.if 	\needs_uaccess
>   	uaccess_ttbr0_enable x2, x3, x4
> +	.endif

Nit: this feels like it belongs directly in __flush_cache_user_range() 
rather than being hidden in the macro, since it's not really an integral 
part of the cache maintenance operation itself.

Robin.

>   alternative_if ARM64_HAS_CACHE_IDC
>   	dsb	ishst
>   	b	7f
> @@ -47,7 +37,11 @@ alternative_else_nop_endif
>   	sub	x3, x2, #1
>   	bic	x4, x0, x3
>   1:
> +	.if 	\needs_uaccess
>   user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> +	.else
> +alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> +	.endif
>   	add	x4, x4, x2
>   	cmp	x4, x1
>   	b.lo	1b
> @@ -58,15 +52,47 @@ alternative_if ARM64_HAS_CACHE_DIC
>   	isb
>   	b	8f
>   alternative_else_nop_endif
> -	invalidate_icache_by_line x0, x1, x2, x3, 9f
> +	invalidate_icache_by_line x0, x1, x2, x3, \needs_uaccess, 9f
>   8:	mov	x0, #0
>   1:
> +	.if	\needs_uaccess
>   	uaccess_ttbr0_disable x1, x2
> +	.endif
>   	ret
> +
> +	.if 	\needs_uaccess
>   9:
>   	mov	x0, #-EFAULT
>   	b	1b
> +	.endif
> +.endm
> +
> +/*
> + *	flush_icache_range(start,end)
> + *
> + *	Ensure that the I and D caches are coherent within specified region.
> + *	This is typically used when code has been written to a memory region,
> + *	and will be executed.
> + *
> + *	- start   - virtual start address of region
> + *	- end     - virtual end address of region
> + */
> +SYM_FUNC_START(__flush_icache_range)
> +	__flush_cache_range needs_uaccess=0
>   SYM_FUNC_END(__flush_icache_range)
> +
> +/*
> + *	__flush_cache_user_range(start,end)
> + *
> + *	Ensure that the I and D caches are coherent within specified region.
> + *	This is typically used when code has been written to a memory region,
> + *	and will be executed.
> + *
> + *	- start   - virtual start address of region
> + *	- end     - virtual end address of region
> + */
> +SYM_FUNC_START(__flush_cache_user_range)
> +	__flush_cache_range needs_uaccess=1
>   SYM_FUNC_END(__flush_cache_user_range)
>   
>   /*
> @@ -86,7 +112,7 @@ alternative_else_nop_endif
>   
>   	uaccess_ttbr0_enable x2, x3, x4
>   
> -	invalidate_icache_by_line x0, x1, x2, x3, 2f
> +	invalidate_icache_by_line x0, x1, x2, x3, 1, 2f
>   	mov	x0, xzr
>   1:
>   	uaccess_ttbr0_disable x1, x2
>
Fuad Tabba May 12, 2021, 8:52 a.m. UTC | #3
Hi Mark,

> > No functional change intended.
>
> There is a performance change here, since the existing
> `__flush_cache_user_range` takes IDC and DIC into account, whereas
> `invalidate_icache_by_line` does not.

You're right. There is a performance change in this patch and a couple
of the others, which I will note in v2. However, I don't think that
this patch changes the behavior when it comes to IDC and DIC, does it?

> There's also an existing oversight where `__flush_cache_user_range`
> takes ARM64_WORKAROUND_CLEAN_CACHE into account, but
> `invalidate_icache_by_line` does not. I think that's a bug that we
> should fix first, so that we can backport something to stable.

I'd be happy to address that in v2, but let me make sure I understand
the issue properly.

Errata 819472 and friends (ARM64_WORKAROUND_CLEAN_CACHE) are related
to cache maintenance operations on data caches happening concurrently
with other accesses to the same address. The two places
invalidate_icache_by_line is used in conjunction with data caches are
__flush_icache_range and __flush_cache_user_range (which share the
same code before and after my patch series). In both cases,
invalidate_icache_by_line is called after the workaround is applied.
The third and only other user of invalidate_icache_by_line is
invalidate_icache_range, which only performs cache maintenance on the
icache.

The concern is that invalidate_icache_range might be performing a
cache maintenance operation on an address concurrently with another
processor performing a dc operation on the same address. Therefore,
invalidate_icache_range should perform DC CIVAC on the line before
invalidate_icache_by_line if ARM64_WORKAROUND_CLEAN_CACHE applies. Is
that right?

https://documentation-service.arm.com/static/5fa29fddb209f547eebd361d

> Arguably
> similar is true in `swsusp_arch_suspend_exit`, but for that we could add
> a comment and always use `DC CIVAC`.

I can do that in v2 as well.

Thanks,
/fuad

> Thanks,
> Mark.
>
> > Reported-by: Catalin Marinas <catalin.marinas@arm.com>
> > Reported-by: Will Deacon <will@kernel.org>
> > Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >  arch/arm64/include/asm/assembler.h | 13 ++++--
> >  arch/arm64/mm/cache.S              | 64 +++++++++++++++++++++---------
> >  2 files changed, 54 insertions(+), 23 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> > index 8418c1bd8f04..6ff7a3a3b238 100644
> > --- a/arch/arm64/include/asm/assembler.h
> > +++ b/arch/arm64/include/asm/assembler.h
> > @@ -426,16 +426,21 @@ alternative_endif
> >   * Macro to perform an instruction cache maintenance for the interval
> >   * [start, end)
> >   *
> > - *   start, end:     virtual addresses describing the region
> > - *   label:          A label to branch to on user fault.
> > - *   Corrupts:       tmp1, tmp2
> > + *   start, end:     virtual addresses describing the region
> > + *   needs_uaccess:  might access user space memory
> > + *   label:          label to branch to on user fault (if needs_uaccess)
> > + *   Corrupts:       tmp1, tmp2
> >   */
> > -     .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
> > +     .macro invalidate_icache_by_line start, end, tmp1, tmp2, needs_uaccess, label
> >       icache_line_size \tmp1, \tmp2
> >       sub     \tmp2, \tmp1, #1
> >       bic     \tmp2, \start, \tmp2
> >  9997:
> > +     .if     \needs_uaccess
> >  USER(\label, ic      ivau, \tmp2)                    // invalidate I line PoU
> > +     .else
> > +     ic      ivau, \tmp2
> > +     .endif
> >       add     \tmp2, \tmp2, \tmp1
> >       cmp     \tmp2, \end
> >       b.lo    9997b
> > diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> > index 2d881f34dd9d..092f73acdf9a 100644
> > --- a/arch/arm64/mm/cache.S
> > +++ b/arch/arm64/mm/cache.S
> > @@ -15,30 +15,20 @@
> >  #include <asm/asm-uaccess.h>
> >
> >  /*
> > - *   flush_icache_range(start,end)
> > + *   __flush_cache_range(start,end) [needs_uaccess]
> >   *
> >   *   Ensure that the I and D caches are coherent within specified region.
> >   *   This is typically used when code has been written to a memory region,
> >   *   and will be executed.
> >   *
> > - *   - start   - virtual start address of region
> > - *   - end     - virtual end address of region
> > + *   - start         - virtual start address of region
> > + *   - end           - virtual end address of region
> > + *   - needs_uaccess - (macro parameter) might access user space memory
> >   */
> > -SYM_FUNC_START(__flush_icache_range)
> > -     /* FALLTHROUGH */
> > -
> > -/*
> > - *   __flush_cache_user_range(start,end)
> > - *
> > - *   Ensure that the I and D caches are coherent within specified region.
> > - *   This is typically used when code has been written to a memory region,
> > - *   and will be executed.
> > - *
> > - *   - start   - virtual start address of region
> > - *   - end     - virtual end address of region
> > - */
> > -SYM_FUNC_START(__flush_cache_user_range)
> > +.macro       __flush_cache_range, needs_uaccess
> > +     .if     \needs_uaccess
> >       uaccess_ttbr0_enable x2, x3, x4
> > +     .endif
> >  alternative_if ARM64_HAS_CACHE_IDC
> >       dsb     ishst
> >       b       7f
> > @@ -47,7 +37,11 @@ alternative_else_nop_endif
> >       sub     x3, x2, #1
> >       bic     x4, x0, x3
> >  1:
> > +     .if     \needs_uaccess
> >  user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> > +     .else
> > +alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> > +     .endif
> >       add     x4, x4, x2
> >       cmp     x4, x1
> >       b.lo    1b
> > @@ -58,15 +52,47 @@ alternative_if ARM64_HAS_CACHE_DIC
> >       isb
> >       b       8f
> >  alternative_else_nop_endif
> > -     invalidate_icache_by_line x0, x1, x2, x3, 9f
> > +     invalidate_icache_by_line x0, x1, x2, x3, \needs_uaccess, 9f
> >  8:   mov     x0, #0
> >  1:
> > +     .if     \needs_uaccess
> >       uaccess_ttbr0_disable x1, x2
> > +     .endif
> >       ret
> > +
> > +     .if     \needs_uaccess
> >  9:
> >       mov     x0, #-EFAULT
> >       b       1b
> > +     .endif
> > +.endm
> > +
> > +/*
> > + *   flush_icache_range(start,end)
> > + *
> > + *   Ensure that the I and D caches are coherent within specified region.
> > + *   This is typically used when code has been written to a memory region,
> > + *   and will be executed.
> > + *
> > + *   - start   - virtual start address of region
> > + *   - end     - virtual end address of region
> > + */
> > +SYM_FUNC_START(__flush_icache_range)
> > +     __flush_cache_range needs_uaccess=0
> >  SYM_FUNC_END(__flush_icache_range)
> > +
> > +/*
> > + *   __flush_cache_user_range(start,end)
> > + *
> > + *   Ensure that the I and D caches are coherent within specified region.
> > + *   This is typically used when code has been written to a memory region,
> > + *   and will be executed.
> > + *
> > + *   - start   - virtual start address of region
> > + *   - end     - virtual end address of region
> > + */
> > +SYM_FUNC_START(__flush_cache_user_range)
> > +     __flush_cache_range needs_uaccess=1
> >  SYM_FUNC_END(__flush_cache_user_range)
> >
> >  /*
> > @@ -86,7 +112,7 @@ alternative_else_nop_endif
> >
> >       uaccess_ttbr0_enable x2, x3, x4
> >
> > -     invalidate_icache_by_line x0, x1, x2, x3, 2f
> > +     invalidate_icache_by_line x0, x1, x2, x3, 1, 2f
> >       mov     x0, xzr
> >  1:
> >       uaccess_ttbr0_disable x1, x2
> > --
> > 2.31.1.607.g51e8a6a459-goog
> >
Fuad Tabba May 12, 2021, 8:57 a.m. UTC | #4
Hi Robin,

> > -SYM_FUNC_START(__flush_cache_user_range)
> > +.macro       __flush_cache_range, needs_uaccess
> > +     .if     \needs_uaccess
> >       uaccess_ttbr0_enable x2, x3, x4
> > +     .endif
>
> Nit: this feels like it belongs directly in __flush_cache_user_range()
> rather than being hidden in the macro, since it's not really an integral
> part of the cache maintenance operation itself.

I will fix this in v2.

Thanks,
/fuad

> Robin.
>
> >   alternative_if ARM64_HAS_CACHE_IDC
> >       dsb     ishst
> >       b       7f
> > @@ -47,7 +37,11 @@ alternative_else_nop_endif
> >       sub     x3, x2, #1
> >       bic     x4, x0, x3
> >   1:
> > +     .if     \needs_uaccess
> >   user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> > +     .else
> > +alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> > +     .endif
> >       add     x4, x4, x2
> >       cmp     x4, x1
> >       b.lo    1b
> > @@ -58,15 +52,47 @@ alternative_if ARM64_HAS_CACHE_DIC
> >       isb
> >       b       8f
> >   alternative_else_nop_endif
> > -     invalidate_icache_by_line x0, x1, x2, x3, 9f
> > +     invalidate_icache_by_line x0, x1, x2, x3, \needs_uaccess, 9f
> >   8:  mov     x0, #0
> >   1:
> > +     .if     \needs_uaccess
> >       uaccess_ttbr0_disable x1, x2
> > +     .endif
> >       ret
> > +
> > +     .if     \needs_uaccess
> >   9:
> >       mov     x0, #-EFAULT
> >       b       1b
> > +     .endif
> > +.endm
> > +
> > +/*
> > + *   flush_icache_range(start,end)
> > + *
> > + *   Ensure that the I and D caches are coherent within specified region.
> > + *   This is typically used when code has been written to a memory region,
> > + *   and will be executed.
> > + *
> > + *   - start   - virtual start address of region
> > + *   - end     - virtual end address of region
> > + */
> > +SYM_FUNC_START(__flush_icache_range)
> > +     __flush_cache_range needs_uaccess=0
> >   SYM_FUNC_END(__flush_icache_range)
> > +
> > +/*
> > + *   __flush_cache_user_range(start,end)
> > + *
> > + *   Ensure that the I and D caches are coherent within specified region.
> > + *   This is typically used when code has been written to a memory region,
> > + *   and will be executed.
> > + *
> > + *   - start   - virtual start address of region
> > + *   - end     - virtual end address of region
> > + */
> > +SYM_FUNC_START(__flush_cache_user_range)
> > +     __flush_cache_range needs_uaccess=1
> >   SYM_FUNC_END(__flush_cache_user_range)
> >
> >   /*
> > @@ -86,7 +112,7 @@ alternative_else_nop_endif
> >
> >       uaccess_ttbr0_enable x2, x3, x4
> >
> > -     invalidate_icache_by_line x0, x1, x2, x3, 2f
> > +     invalidate_icache_by_line x0, x1, x2, x3, 1, 2f
> >       mov     x0, xzr
> >   1:
> >       uaccess_ttbr0_disable x1, x2
> >
Mark Rutland May 12, 2021, 9:59 a.m. UTC | #5
On Wed, May 12, 2021 at 09:52:28AM +0100, Fuad Tabba wrote:
> Hi Mark,
> 
> > > No functional change intended.
> >
> > There is a performance change here, since the existing
> > `__flush_cache_user_range` takes IDC and DIC into account, whereas
> > `invalidate_icache_by_line` does not.
> 
> You're right. There is a performance change in this patch and a couple
> of the others, which I will note in v2. However, I don't think that
> this patch changes the behavior when it comes to IDC and DIC, does it?

It shouldn't be a functional problem, but it means that the new
`__flush_icache_range` will always perform redundant I-cache maintenance
rather than skipping this when the cpu has DIC=1.

It would be nice if we could structure this to take DIC into account
either in the new `__flush_icache_range`, or in the
`invalidate_icache_by_line` helper.

> > There's also an existing oversight where `__flush_cache_user_range`
> > takes ARM64_WORKAROUND_CLEAN_CACHE into account, but
> > `invalidate_icache_by_line` does not.

Sorry about this. I was evidently confused, as this does not make any
sense. This doesn't matter to `invalidate_icache_by_line`, and
`invalidate_dcache_by_line` already does the right thing via
`__dcache_op_workaround_clean_cache`.

> I'd be happy to address that in v2, but let me make sure I understand
> the issue properly.
> 
> Errata 819472 and friends (ARM64_WORKAROUND_CLEAN_CACHE) are related
> to cache maintenance operations on data caches happening concurrently
> with other accesses to the same address. The two places
> invalidate_icache_by_line is used in conjunction with data caches are
> __flush_icache_range and __flush_cache_user_range (which share the
> same code before and after my patch series). In both cases,
> invalidate_icache_by_line is called after the workaround is applied.
> The third and only other user of invalidate_icache_by_line is
> invalidate_icache_range, which only performs cache maintenance on the
> icache.
> 
> The concern is that invalidate_icache_range might be performing a
> cache maintenance operation on an address concurrently with another
> processor performing a dc operation on the same address. Therefore,
> invalidate_icache_range should perform DC CIVAC on the line before
> invalidate_icache_by_line if ARM64_WORKAROUND_CLEAN_CACHE applies. Is
> that right?
> 
> https://documentation-service.arm.com/static/5fa29fddb209f547eebd361d

Sorry, I had misread the code, and I don't think there's a bug to fix
here after all. Regardless, thanks for digging into that and trying to
make sense of my bogus suggestion.

> > Arguably similar is true in `swsusp_arch_suspend_exit`, but for that
> > we could add a comment and always use `DC CIVAC`.
> 
> I can do that in v2 as well.

A separate patch for `swsusp_arch_suspend_exit` would be great, since
that is something we should backport to stable as a fix.

Thanks,
Mark.

> > > Reported-by: Catalin Marinas <catalin.marinas@arm.com>
> > > Reported-by: Will Deacon <will@kernel.org>
> > > Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
> > > Signed-off-by: Fuad Tabba <tabba@google.com>
> > > ---
> > >  arch/arm64/include/asm/assembler.h | 13 ++++--
> > >  arch/arm64/mm/cache.S              | 64 +++++++++++++++++++++---------
> > >  2 files changed, 54 insertions(+), 23 deletions(-)
> > >
> > > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> > > index 8418c1bd8f04..6ff7a3a3b238 100644
> > > --- a/arch/arm64/include/asm/assembler.h
> > > +++ b/arch/arm64/include/asm/assembler.h
> > > @@ -426,16 +426,21 @@ alternative_endif
> > >   * Macro to perform an instruction cache maintenance for the interval
> > >   * [start, end)
> > >   *
> > > - *   start, end:     virtual addresses describing the region
> > > - *   label:          A label to branch to on user fault.
> > > - *   Corrupts:       tmp1, tmp2
> > > + *   start, end:     virtual addresses describing the region
> > > + *   needs_uaccess:  might access user space memory
> > > + *   label:          label to branch to on user fault (if needs_uaccess)
> > > + *   Corrupts:       tmp1, tmp2
> > >   */
> > > -     .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
> > > +     .macro invalidate_icache_by_line start, end, tmp1, tmp2, needs_uaccess, label
> > >       icache_line_size \tmp1, \tmp2
> > >       sub     \tmp2, \tmp1, #1
> > >       bic     \tmp2, \start, \tmp2
> > >  9997:
> > > +     .if     \needs_uaccess
> > >  USER(\label, ic      ivau, \tmp2)                    // invalidate I line PoU
> > > +     .else
> > > +     ic      ivau, \tmp2
> > > +     .endif
> > >       add     \tmp2, \tmp2, \tmp1
> > >       cmp     \tmp2, \end
> > >       b.lo    9997b
> > > diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> > > index 2d881f34dd9d..092f73acdf9a 100644
> > > --- a/arch/arm64/mm/cache.S
> > > +++ b/arch/arm64/mm/cache.S
> > > @@ -15,30 +15,20 @@
> > >  #include <asm/asm-uaccess.h>
> > >
> > >  /*
> > > - *   flush_icache_range(start,end)
> > > + *   __flush_cache_range(start,end) [needs_uaccess]
> > >   *
> > >   *   Ensure that the I and D caches are coherent within specified region.
> > >   *   This is typically used when code has been written to a memory region,
> > >   *   and will be executed.
> > >   *
> > > - *   - start   - virtual start address of region
> > > - *   - end     - virtual end address of region
> > > + *   - start         - virtual start address of region
> > > + *   - end           - virtual end address of region
> > > + *   - needs_uaccess - (macro parameter) might access user space memory
> > >   */
> > > -SYM_FUNC_START(__flush_icache_range)
> > > -     /* FALLTHROUGH */
> > > -
> > > -/*
> > > - *   __flush_cache_user_range(start,end)
> > > - *
> > > - *   Ensure that the I and D caches are coherent within specified region.
> > > - *   This is typically used when code has been written to a memory region,
> > > - *   and will be executed.
> > > - *
> > > - *   - start   - virtual start address of region
> > > - *   - end     - virtual end address of region
> > > - */
> > > -SYM_FUNC_START(__flush_cache_user_range)
> > > +.macro       __flush_cache_range, needs_uaccess
> > > +     .if     \needs_uaccess
> > >       uaccess_ttbr0_enable x2, x3, x4
> > > +     .endif
> > >  alternative_if ARM64_HAS_CACHE_IDC
> > >       dsb     ishst
> > >       b       7f
> > > @@ -47,7 +37,11 @@ alternative_else_nop_endif
> > >       sub     x3, x2, #1
> > >       bic     x4, x0, x3
> > >  1:
> > > +     .if     \needs_uaccess
> > >  user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> > > +     .else
> > > +alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> > > +     .endif
> > >       add     x4, x4, x2
> > >       cmp     x4, x1
> > >       b.lo    1b
> > > @@ -58,15 +52,47 @@ alternative_if ARM64_HAS_CACHE_DIC
> > >       isb
> > >       b       8f
> > >  alternative_else_nop_endif
> > > -     invalidate_icache_by_line x0, x1, x2, x3, 9f
> > > +     invalidate_icache_by_line x0, x1, x2, x3, \needs_uaccess, 9f
> > >  8:   mov     x0, #0
> > >  1:
> > > +     .if     \needs_uaccess
> > >       uaccess_ttbr0_disable x1, x2
> > > +     .endif
> > >       ret
> > > +
> > > +     .if     \needs_uaccess
> > >  9:
> > >       mov     x0, #-EFAULT
> > >       b       1b
> > > +     .endif
> > > +.endm
> > > +
> > > +/*
> > > + *   flush_icache_range(start,end)
> > > + *
> > > + *   Ensure that the I and D caches are coherent within specified region.
> > > + *   This is typically used when code has been written to a memory region,
> > > + *   and will be executed.
> > > + *
> > > + *   - start   - virtual start address of region
> > > + *   - end     - virtual end address of region
> > > + */
> > > +SYM_FUNC_START(__flush_icache_range)
> > > +     __flush_cache_range needs_uaccess=0
> > >  SYM_FUNC_END(__flush_icache_range)
> > > +
> > > +/*
> > > + *   __flush_cache_user_range(start,end)
> > > + *
> > > + *   Ensure that the I and D caches are coherent within specified region.
> > > + *   This is typically used when code has been written to a memory region,
> > > + *   and will be executed.
> > > + *
> > > + *   - start   - virtual start address of region
> > > + *   - end     - virtual end address of region
> > > + */
> > > +SYM_FUNC_START(__flush_cache_user_range)
> > > +     __flush_cache_range needs_uaccess=1
> > >  SYM_FUNC_END(__flush_cache_user_range)
> > >
> > >  /*
> > > @@ -86,7 +112,7 @@ alternative_else_nop_endif
> > >
> > >       uaccess_ttbr0_enable x2, x3, x4
> > >
> > > -     invalidate_icache_by_line x0, x1, x2, x3, 2f
> > > +     invalidate_icache_by_line x0, x1, x2, x3, 1, 2f
> > >       mov     x0, xzr
> > >  1:
> > >       uaccess_ttbr0_disable x1, x2
> > > --
> > > 2.31.1.607.g51e8a6a459-goog
> > >
Fuad Tabba May 12, 2021, 10:29 a.m. UTC | #6
Hi Mark,

On Wed, May 12, 2021 at 10:59 AM Mark Rutland <mark.rutland@arm.com> wrote:
>
> On Wed, May 12, 2021 at 09:52:28AM +0100, Fuad Tabba wrote:
> > Hi Mark,
> >
> > > > No functional change intended.
> > >
> > > There is a performance change here, since the existing
> > > `__flush_cache_user_range` takes IDC and DIC into account, whereas
> > > `invalidate_icache_by_line` does not.
> >
> > You're right. There is a performance change in this patch and a couple
> > of the others, which I will note in v2. However, I don't think that
> > this patch changes the behavior when it comes to IDC and DIC, does it?
>
> It shouldn't be a functional problem, but it means that the new
> `__flush_icache_range` will always perform redundant I-cache maintenance
> rather than skipping this when the cpu has DIC=1.

Sorry, but I can't quite see how this patch is making a difference in
that regard. The existing code has __flush_icache_range fallthrough to
__flush_cache_user_range, where the alternative_if
ARM64_HAS_CACHE_{IDC,DIC} are invoked.

In this patch, __flush_icache_range and __flush_cache_user_range share
the same code via the macro, where the alternative_ifs and branches
over invalidate_icache_by_line are still there and behave the same:
the macro jumps to 8 if ARM64_HAS_CACHE_DIC, avoiding any redundant
cache maintenance.

Am I missing something else?

> It would be nice if we could structure this to take DIC into account
> either in the new `__flush_icache_range`, or in the
> `invalidate_icache_by_line` helper.

> > > Arguably similar is true in `swsusp_arch_suspend_exit`, but for that
> > > we could add a comment and always use `DC CIVAC`.
> >
> > I can do that in v2 as well.
>
> A separate patch for `swsusp_arch_suspend_exit` would be great, since
> that is something we should backport to stable as a fix.

Will do.

Thanks,
/fuad

> Thanks,
> Mark.
>
> > > > Reported-by: Catalin Marinas <catalin.marinas@arm.com>
> > > > Reported-by: Will Deacon <will@kernel.org>
> > > > Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
> > > > Signed-off-by: Fuad Tabba <tabba@google.com>
> > > > ---
> > > >  arch/arm64/include/asm/assembler.h | 13 ++++--
> > > >  arch/arm64/mm/cache.S              | 64 +++++++++++++++++++++---------
> > > >  2 files changed, 54 insertions(+), 23 deletions(-)
> > > >
> > > > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> > > > index 8418c1bd8f04..6ff7a3a3b238 100644
> > > > --- a/arch/arm64/include/asm/assembler.h
> > > > +++ b/arch/arm64/include/asm/assembler.h
> > > > @@ -426,16 +426,21 @@ alternative_endif
> > > >   * Macro to perform an instruction cache maintenance for the interval
> > > >   * [start, end)
> > > >   *
> > > > - *   start, end:     virtual addresses describing the region
> > > > - *   label:          A label to branch to on user fault.
> > > > - *   Corrupts:       tmp1, tmp2
> > > > + *   start, end:     virtual addresses describing the region
> > > > + *   needs_uaccess:  might access user space memory
> > > > + *   label:          label to branch to on user fault (if needs_uaccess)
> > > > + *   Corrupts:       tmp1, tmp2
> > > >   */
> > > > -     .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
> > > > +     .macro invalidate_icache_by_line start, end, tmp1, tmp2, needs_uaccess, label
> > > >       icache_line_size \tmp1, \tmp2
> > > >       sub     \tmp2, \tmp1, #1
> > > >       bic     \tmp2, \start, \tmp2
> > > >  9997:
> > > > +     .if     \needs_uaccess
> > > >  USER(\label, ic      ivau, \tmp2)                    // invalidate I line PoU
> > > > +     .else
> > > > +     ic      ivau, \tmp2
> > > > +     .endif
> > > >       add     \tmp2, \tmp2, \tmp1
> > > >       cmp     \tmp2, \end
> > > >       b.lo    9997b
> > > > diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> > > > index 2d881f34dd9d..092f73acdf9a 100644
> > > > --- a/arch/arm64/mm/cache.S
> > > > +++ b/arch/arm64/mm/cache.S
> > > > @@ -15,30 +15,20 @@
> > > >  #include <asm/asm-uaccess.h>
> > > >
> > > >  /*
> > > > - *   flush_icache_range(start,end)
> > > > + *   __flush_cache_range(start,end) [needs_uaccess]
> > > >   *
> > > >   *   Ensure that the I and D caches are coherent within specified region.
> > > >   *   This is typically used when code has been written to a memory region,
> > > >   *   and will be executed.
> > > >   *
> > > > - *   - start   - virtual start address of region
> > > > - *   - end     - virtual end address of region
> > > > + *   - start         - virtual start address of region
> > > > + *   - end           - virtual end address of region
> > > > + *   - needs_uaccess - (macro parameter) might access user space memory
> > > >   */
> > > > -SYM_FUNC_START(__flush_icache_range)
> > > > -     /* FALLTHROUGH */
> > > > -
> > > > -/*
> > > > - *   __flush_cache_user_range(start,end)
> > > > - *
> > > > - *   Ensure that the I and D caches are coherent within specified region.
> > > > - *   This is typically used when code has been written to a memory region,
> > > > - *   and will be executed.
> > > > - *
> > > > - *   - start   - virtual start address of region
> > > > - *   - end     - virtual end address of region
> > > > - */
> > > > -SYM_FUNC_START(__flush_cache_user_range)
> > > > +.macro       __flush_cache_range, needs_uaccess
> > > > +     .if     \needs_uaccess
> > > >       uaccess_ttbr0_enable x2, x3, x4
> > > > +     .endif
> > > >  alternative_if ARM64_HAS_CACHE_IDC
> > > >       dsb     ishst
> > > >       b       7f
> > > > @@ -47,7 +37,11 @@ alternative_else_nop_endif
> > > >       sub     x3, x2, #1
> > > >       bic     x4, x0, x3
> > > >  1:
> > > > +     .if     \needs_uaccess
> > > >  user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> > > > +     .else
> > > > +alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> > > > +     .endif
> > > >       add     x4, x4, x2
> > > >       cmp     x4, x1
> > > >       b.lo    1b
> > > > @@ -58,15 +52,47 @@ alternative_if ARM64_HAS_CACHE_DIC
> > > >       isb
> > > >       b       8f
> > > >  alternative_else_nop_endif
> > > > -     invalidate_icache_by_line x0, x1, x2, x3, 9f
> > > > +     invalidate_icache_by_line x0, x1, x2, x3, \needs_uaccess, 9f
> > > >  8:   mov     x0, #0
> > > >  1:
> > > > +     .if     \needs_uaccess
> > > >       uaccess_ttbr0_disable x1, x2
> > > > +     .endif
> > > >       ret
> > > > +
> > > > +     .if     \needs_uaccess
> > > >  9:
> > > >       mov     x0, #-EFAULT
> > > >       b       1b
> > > > +     .endif
> > > > +.endm
> > > > +
> > > > +/*
> > > > + *   flush_icache_range(start,end)
> > > > + *
> > > > + *   Ensure that the I and D caches are coherent within specified region.
> > > > + *   This is typically used when code has been written to a memory region,
> > > > + *   and will be executed.
> > > > + *
> > > > + *   - start   - virtual start address of region
> > > > + *   - end     - virtual end address of region
> > > > + */
> > > > +SYM_FUNC_START(__flush_icache_range)
> > > > +     __flush_cache_range needs_uaccess=0
> > > >  SYM_FUNC_END(__flush_icache_range)
> > > > +
> > > > +/*
> > > > + *   __flush_cache_user_range(start,end)
> > > > + *
> > > > + *   Ensure that the I and D caches are coherent within specified region.
> > > > + *   This is typically used when code has been written to a memory region,
> > > > + *   and will be executed.
> > > > + *
> > > > + *   - start   - virtual start address of region
> > > > + *   - end     - virtual end address of region
> > > > + */
> > > > +SYM_FUNC_START(__flush_cache_user_range)
> > > > +     __flush_cache_range needs_uaccess=1
> > > >  SYM_FUNC_END(__flush_cache_user_range)
> > > >
> > > >  /*
> > > > @@ -86,7 +112,7 @@ alternative_else_nop_endif
> > > >
> > > >       uaccess_ttbr0_enable x2, x3, x4
> > > >
> > > > -     invalidate_icache_by_line x0, x1, x2, x3, 2f
> > > > +     invalidate_icache_by_line x0, x1, x2, x3, 1, 2f
> > > >       mov     x0, xzr
> > > >  1:
> > > >       uaccess_ttbr0_disable x1, x2
> > > > --
> > > > 2.31.1.607.g51e8a6a459-goog
> > > >
Mark Rutland May 12, 2021, 10:53 a.m. UTC | #7
On Wed, May 12, 2021 at 11:29:53AM +0100, Fuad Tabba wrote:
> Hi Mark,
> 
> On Wed, May 12, 2021 at 10:59 AM Mark Rutland <mark.rutland@arm.com> wrote:
> >
> > On Wed, May 12, 2021 at 09:52:28AM +0100, Fuad Tabba wrote:
> > > Hi Mark,
> > >
> > > > > No functional change intended.
> > > >
> > > > There is a performance change here, since the existing
> > > > `__flush_cache_user_range` takes IDC and DIC into account, whereas
> > > > `invalidate_icache_by_line` does not.
> > >
> > > You're right. There is a performance change in this patch and a couple
> > > of the others, which I will note in v2. However, I don't think that
> > > this patch changes the behavior when it comes to IDC and DIC, does it?
> >
> > It shouldn't be a functional problem, but it means that the new
> > `__flush_icache_range` will always perform redundant I-cache maintenance
> > rather than skipping this when the cpu has DIC=1.
> 
> Sorry, but I can't quite see how this patch is making a difference in
> that regard. The existing code has __flush_icache_range fallthrough to
> __flush_cache_user_range, where the alternative_if
> ARM64_HAS_CACHE_{IDC,DIC} are invoked.
> 
> In this patch, __flush_icache_range and __flush_cache_user_range share
> the same code via the macro, where the alternative_ifs and branches
> over invalidate_icache_by_line are still there and behave the same:
> the macro jumps to 8 if ARM64_HAS_CACHE_DIC, avoiding any redundant
> cache maintenance.
> 
> Am I missing something else?

No; you're absolutely right. I had misread the patch and thought the
IDC/DIC parts didn't go into the common macro. That all looks fine.

Sorry again for the noise.

Thanks,
Mark.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 8418c1bd8f04..6ff7a3a3b238 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -426,16 +426,21 @@  alternative_endif
  * Macro to perform an instruction cache maintenance for the interval
  * [start, end)
  *
- * 	start, end:	virtual addresses describing the region
- *	label:		A label to branch to on user fault.
- * 	Corrupts:	tmp1, tmp2
+ *	start, end:	virtual addresses describing the region
+ *	needs_uaccess:	might access user space memory
+ *	label:		label to branch to on user fault (if needs_uaccess)
+ *	Corrupts:	tmp1, tmp2
  */
-	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+	.macro invalidate_icache_by_line start, end, tmp1, tmp2, needs_uaccess, label
 	icache_line_size \tmp1, \tmp2
 	sub	\tmp2, \tmp1, #1
 	bic	\tmp2, \start, \tmp2
 9997:
+	.if	\needs_uaccess
 USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
+	.else
+	ic	ivau, \tmp2
+	.endif
 	add	\tmp2, \tmp2, \tmp1
 	cmp	\tmp2, \end
 	b.lo	9997b
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 2d881f34dd9d..092f73acdf9a 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -15,30 +15,20 @@ 
 #include <asm/asm-uaccess.h>
 
 /*
- *	flush_icache_range(start,end)
+ *	__flush_cache_range(start,end) [needs_uaccess]
  *
  *	Ensure that the I and D caches are coherent within specified region.
  *	This is typically used when code has been written to a memory region,
  *	and will be executed.
  *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
+ *	- start   	- virtual start address of region
+ *	- end     	- virtual end address of region
+ *	- needs_uaccess - (macro parameter) might access user space memory
  */
-SYM_FUNC_START(__flush_icache_range)
-	/* FALLTHROUGH */
-
-/*
- *	__flush_cache_user_range(start,end)
- *
- *	Ensure that the I and D caches are coherent within specified region.
- *	This is typically used when code has been written to a memory region,
- *	and will be executed.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-SYM_FUNC_START(__flush_cache_user_range)
+.macro	__flush_cache_range, needs_uaccess
+	.if 	\needs_uaccess
 	uaccess_ttbr0_enable x2, x3, x4
+	.endif
 alternative_if ARM64_HAS_CACHE_IDC
 	dsb	ishst
 	b	7f
@@ -47,7 +37,11 @@  alternative_else_nop_endif
 	sub	x3, x2, #1
 	bic	x4, x0, x3
 1:
+	.if 	\needs_uaccess
 user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
+	.else
+alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
+	.endif
 	add	x4, x4, x2
 	cmp	x4, x1
 	b.lo	1b
@@ -58,15 +52,47 @@  alternative_if ARM64_HAS_CACHE_DIC
 	isb
 	b	8f
 alternative_else_nop_endif
-	invalidate_icache_by_line x0, x1, x2, x3, 9f
+	invalidate_icache_by_line x0, x1, x2, x3, \needs_uaccess, 9f
 8:	mov	x0, #0
 1:
+	.if	\needs_uaccess
 	uaccess_ttbr0_disable x1, x2
+	.endif
 	ret
+
+	.if 	\needs_uaccess
 9:
 	mov	x0, #-EFAULT
 	b	1b
+	.endif
+.endm
+
+/*
+ *	flush_icache_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified region.
+ *	This is typically used when code has been written to a memory region,
+ *	and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+SYM_FUNC_START(__flush_icache_range)
+	__flush_cache_range needs_uaccess=0
 SYM_FUNC_END(__flush_icache_range)
+
+/*
+ *	__flush_cache_user_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified region.
+ *	This is typically used when code has been written to a memory region,
+ *	and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+SYM_FUNC_START(__flush_cache_user_range)
+	__flush_cache_range needs_uaccess=1
 SYM_FUNC_END(__flush_cache_user_range)
 
 /*
@@ -86,7 +112,7 @@  alternative_else_nop_endif
 
 	uaccess_ttbr0_enable x2, x3, x4
 
-	invalidate_icache_by_line x0, x1, x2, x3, 2f
+	invalidate_icache_by_line x0, x1, x2, x3, 1, 2f
 	mov	x0, xzr
 1:
 	uaccess_ttbr0_disable x1, x2