diff mbox series

[v2,02/16] arm64: Do not enable uaccess for flush_icache_range

Message ID 20210517075124.152151-3-tabba@google.com (mailing list archive)
State New, archived
Headers show
Series Tidy up cache.S | expand

Commit Message

Fuad Tabba May 17, 2021, 7:51 a.m. UTC
__flush_icache_range works on the kernel linear map, and doesn't
need uaccess. The existing code is a side-effect of its current
implementation with __flush_cache_user_range fallthrough.

Instead of fallthrough to share the code, use a common macro for
the two where the caller can specify whether user-space access is
needed.

No functional change intended.
Possible performance impact due to the reduced number of
instructions.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/include/asm/assembler.h | 13 ++++--
 arch/arm64/mm/cache.S              | 64 +++++++++++++++++++++---------
 2 files changed, 54 insertions(+), 23 deletions(-)

Comments

Mark Rutland May 18, 2021, 3:33 p.m. UTC | #1
Hi Fuad,

This is great! I had a play with the series locally, and I have a few
suggestions below for how to make this a bit clearer.

On Mon, May 17, 2021 at 08:51:10AM +0100, Fuad Tabba wrote:
> __flush_icache_range works on the kernel linear map, and doesn't
> need uaccess. The existing code is a side-effect of its current
> implementation with __flush_cache_user_range fallthrough.
> 
> Instead of fallthrough to share the code, use a common macro for
> the two where the caller can specify whether user-space access is
> needed.
> 
> No functional change intended.
> Possible performance impact due to the reduced number of
> instructions.

This looks correct, but I'm not too keen on all the duplication we have
to do w.r.t. `needs_uaccess`, and I think it would be much clearer to
put the TTBR maintenance directly in `__flush_cache_user_range`
immediately, rather than doing that later in the series.

> Reported-by: Catalin Marinas <catalin.marinas@arm.com>
> Reported-by: Will Deacon <will@kernel.org>
> Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>  arch/arm64/include/asm/assembler.h | 13 ++++--
>  arch/arm64/mm/cache.S              | 64 +++++++++++++++++++++---------
>  2 files changed, 54 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 8418c1bd8f04..6ff7a3a3b238 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -426,16 +426,21 @@ alternative_endif
>   * Macro to perform an instruction cache maintenance for the interval
>   * [start, end)
>   *
> - * 	start, end:	virtual addresses describing the region
> - *	label:		A label to branch to on user fault.
> - * 	Corrupts:	tmp1, tmp2
> + *	start, end:	virtual addresses describing the region
> + *	needs_uaccess:	might access user space memory
> + *	label:		label to branch to on user fault (if needs_uaccess)
> + *	Corrupts:	tmp1, tmp2
>   */

I'm not too keen on the separate `needs_uaccess` and `label` arguments.
We should be able to collapse those into a single argument by checking
with .ifnc, e.g.

	.macro op arg, fixup
	.ifnc fixup,
	do_thing_with \fixup
	.endif
	.endm

... which I think would make things clearer overall.

> -	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
> +	.macro invalidate_icache_by_line start, end, tmp1, tmp2, needs_uaccess, label
>  	icache_line_size \tmp1, \tmp2
>  	sub	\tmp2, \tmp1, #1
>  	bic	\tmp2, \start, \tmp2
>  9997:
> +	.if	\needs_uaccess
>  USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
> +	.else
> +	ic	ivau, \tmp2
> +	.endif
>  	add	\tmp2, \tmp2, \tmp1
>  	cmp	\tmp2, \end
>  	b.lo	9997b

I'm also not keen on duplicating the instruction here. I reckon what we
should do is add a conditional extable macro:

	.macro _cond_extable insn, fixup
	.ifnc \fixup,
	_asm_extable \insn, \fixup
	.endif
	.endm

... which'd allow us to do:

        .macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup
        icache_line_size \tmp1, \tmp2
        sub     \tmp2, \tmp1, #1
        bic     \tmp2, \start, \tmp2
.Licache_op\@:
        ic      ivau, \tmp2                     // invalidate I line PoU
        add     \tmp2, \tmp2, \tmp1
        cmp     \tmp2, \end
        b.lo    .Licache_op\@
        dsb     ish 
        isb 

        _cond_extable .Licache_op\@, \fixup
        .endm

... which I think is clearer.

We could do likewise in dcache_by_line_op, and with some refactoring we
could remove the logic that we have to currently duplicate.

I pushed a couple of prearatory patches for that to:

  https://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git/log/?h=arm64/cleanups/cache
  git://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git arm64/cleanups/cache

... in case you felt like taking those as-is.

> diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> index 2d881f34dd9d..092f73acdf9a 100644
> --- a/arch/arm64/mm/cache.S
> +++ b/arch/arm64/mm/cache.S
> @@ -15,30 +15,20 @@
>  #include <asm/asm-uaccess.h>
>  
>  /*
> - *	flush_icache_range(start,end)
> + *	__flush_cache_range(start,end) [needs_uaccess]
>   *
>   *	Ensure that the I and D caches are coherent within specified region.
>   *	This is typically used when code has been written to a memory region,
>   *	and will be executed.
>   *
> - *	- start   - virtual start address of region
> - *	- end     - virtual end address of region
> + *	- start   	- virtual start address of region
> + *	- end     	- virtual end address of region
> + *	- needs_uaccess - (macro parameter) might access user space memory
>   */
> -SYM_FUNC_START(__flush_icache_range)
> -	/* FALLTHROUGH */
> -
> -/*
> - *	__flush_cache_user_range(start,end)
> - *
> - *	Ensure that the I and D caches are coherent within specified region.
> - *	This is typically used when code has been written to a memory region,
> - *	and will be executed.
> - *
> - *	- start   - virtual start address of region
> - *	- end     - virtual end address of region
> - */
> -SYM_FUNC_START(__flush_cache_user_range)
> +.macro	__flush_cache_range, needs_uaccess
> +	.if 	\needs_uaccess
>  	uaccess_ttbr0_enable x2, x3, x4
> +	.endif
>  alternative_if ARM64_HAS_CACHE_IDC
>  	dsb	ishst
>  	b	7f
> @@ -47,7 +37,11 @@ alternative_else_nop_endif
>  	sub	x3, x2, #1
>  	bic	x4, x0, x3
>  1:
> +	.if 	\needs_uaccess
>  user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> +	.else
> +alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> +	.endif
>  	add	x4, x4, x2
>  	cmp	x4, x1
>  	b.lo	1b
> @@ -58,15 +52,47 @@ alternative_if ARM64_HAS_CACHE_DIC
>  	isb
>  	b	8f
>  alternative_else_nop_endif
> -	invalidate_icache_by_line x0, x1, x2, x3, 9f
> +	invalidate_icache_by_line x0, x1, x2, x3, \needs_uaccess, 9f
>  8:	mov	x0, #0
>  1:
> +	.if	\needs_uaccess
>  	uaccess_ttbr0_disable x1, x2
> +	.endif
>  	ret
> +
> +	.if 	\needs_uaccess
>  9:
>  	mov	x0, #-EFAULT
>  	b	1b
> +	.endif
> +.endm

As above, I think we should reduce this to the core logic, moving the
ttbr manipulation and fixup handler inline in __flush_cache_user_range.

For clarity, I'd also like to leave the RETs out of the macro, since
that's required for the fixup handling anyway, and it generally amkes
the control flow clearer at the function definition.

> +/*
> + *	flush_icache_range(start,end)
> + *
> + *	Ensure that the I and D caches are coherent within specified region.
> + *	This is typically used when code has been written to a memory region,
> + *	and will be executed.
> + *
> + *	- start   - virtual start address of region
> + *	- end     - virtual end address of region
> + */
> +SYM_FUNC_START(__flush_icache_range)
> +	__flush_cache_range needs_uaccess=0
>  SYM_FUNC_END(__flush_icache_range)

...so with the suggestions above, this could be:

SYM_FUNC_START(__flush_icache_range)
	__flush_cache_range
	ret
SYM_FUNC_END(__flush_icache_range)

> +/*
> + *	__flush_cache_user_range(start,end)
> + *
> + *	Ensure that the I and D caches are coherent within specified region.
> + *	This is typically used when code has been written to a memory region,
> + *	and will be executed.
> + *
> + *	- start   - virtual start address of region
> + *	- end     - virtual end address of region
> + */
> +SYM_FUNC_START(__flush_cache_user_range)
> +	__flush_cache_range needs_uaccess=1
>  SYM_FUNC_END(__flush_cache_user_range)

... this could be:

SYM_FUNC_START(__flush_cache_user_range)
        uaccess_ttbr0_enable x2, x3, x4
        __flush_cache_range 2f
1:
        uaccess_ttbr0_disable x1, x2
        ret 
2:
        mov     x0, #-EFAULT
        b       1b  
SYM_FUNC_END(__flush_cache_user_range)

>  /*
> @@ -86,7 +112,7 @@ alternative_else_nop_endif
>  
>  	uaccess_ttbr0_enable x2, x3, x4
>  
> -	invalidate_icache_by_line x0, x1, x2, x3, 2f
> +	invalidate_icache_by_line x0, x1, x2, x3, 1, 2f

... and this wouldn't need to change.

Thanks,
Mark.

>  	mov	x0, xzr
>  1:
>  	uaccess_ttbr0_disable x1, x2
> -- 
> 2.31.1.751.gd2f1c929bd-goog
>
Fuad Tabba May 19, 2021, 4:25 p.m. UTC | #2
Hi Mark,

On Tue, May 18, 2021 at 4:33 PM Mark Rutland <mark.rutland@arm.com> wrote:
>
> Hi Fuad,
>
> This is great! I had a play with the series locally, and I have a few
> suggestions below for how to make this a bit clearer.
>
> On Mon, May 17, 2021 at 08:51:10AM +0100, Fuad Tabba wrote:
> > __flush_icache_range works on the kernel linear map, and doesn't
> > need uaccess. The existing code is a side-effect of its current
> > implementation with __flush_cache_user_range fallthrough.
> >
> > Instead of fallthrough to share the code, use a common macro for
> > the two where the caller can specify whether user-space access is
> > needed.
> >
> > No functional change intended.
> > Possible performance impact due to the reduced number of
> > instructions.
>
> This looks correct, but I'm not too keen on all the duplication we have
> to do w.r.t. `needs_uaccess`, and I think it would be much clearer to
> put the TTBR maintenance directly in `__flush_cache_user_range`
> immediately, rather than doing that later in the series.
>
> > Reported-by: Catalin Marinas <catalin.marinas@arm.com>
> > Reported-by: Will Deacon <will@kernel.org>
> > Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >  arch/arm64/include/asm/assembler.h | 13 ++++--
> >  arch/arm64/mm/cache.S              | 64 +++++++++++++++++++++---------
> >  2 files changed, 54 insertions(+), 23 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> > index 8418c1bd8f04..6ff7a3a3b238 100644
> > --- a/arch/arm64/include/asm/assembler.h
> > +++ b/arch/arm64/include/asm/assembler.h
> > @@ -426,16 +426,21 @@ alternative_endif
> >   * Macro to perform an instruction cache maintenance for the interval
> >   * [start, end)
> >   *
> > - *   start, end:     virtual addresses describing the region
> > - *   label:          A label to branch to on user fault.
> > - *   Corrupts:       tmp1, tmp2
> > + *   start, end:     virtual addresses describing the region
> > + *   needs_uaccess:  might access user space memory
> > + *   label:          label to branch to on user fault (if needs_uaccess)
> > + *   Corrupts:       tmp1, tmp2
> >   */
>
> I'm not too keen on the separate `needs_uaccess` and `label` arguments.
> We should be able to collapse those into a single argument by checking
> with .ifnc, e.g.
>
>         .macro op arg, fixup
>         .ifnc fixup,
>         do_thing_with \fixup
>         .endif
>         .endm
>
> ... which I think would make things clearer overall.
>
> > -     .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
> > +     .macro invalidate_icache_by_line start, end, tmp1, tmp2, needs_uaccess, label
> >       icache_line_size \tmp1, \tmp2
> >       sub     \tmp2, \tmp1, #1
> >       bic     \tmp2, \start, \tmp2
> >  9997:
> > +     .if     \needs_uaccess
> >  USER(\label, ic      ivau, \tmp2)                    // invalidate I line PoU
> > +     .else
> > +     ic      ivau, \tmp2
> > +     .endif
> >       add     \tmp2, \tmp2, \tmp1
> >       cmp     \tmp2, \end
> >       b.lo    9997b
>
> I'm also not keen on duplicating the instruction here. I reckon what we
> should do is add a conditional extable macro:
>
>         .macro _cond_extable insn, fixup
>         .ifnc \fixup,
>         _asm_extable \insn, \fixup
>         .endif
>         .endm
>
> ... which'd allow us to do:
>
>         .macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup
>         icache_line_size \tmp1, \tmp2
>         sub     \tmp2, \tmp1, #1
>         bic     \tmp2, \start, \tmp2
> .Licache_op\@:
>         ic      ivau, \tmp2                     // invalidate I line PoU
>         add     \tmp2, \tmp2, \tmp1
>         cmp     \tmp2, \end
>         b.lo    .Licache_op\@
>         dsb     ish
>         isb
>
>         _cond_extable .Licache_op\@, \fixup
>         .endm
>
> ... which I think is clearer.
>
> We could do likewise in dcache_by_line_op, and with some refactoring we
> could remove the logic that we have to currently duplicate.
>
> I pushed a couple of prearatory patches for that to:
>
>   https://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git/log/?h=arm64/cleanups/cache
>   git://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git arm64/cleanups/cache
>
> ... in case you felt like taking those as-is.

Thanks for this, and for the other comments and suggestions. I'll take
your patches, as well as all the fixes you suggested in the next
round.

Cheers,
/fuad

> > diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> > index 2d881f34dd9d..092f73acdf9a 100644
> > --- a/arch/arm64/mm/cache.S
> > +++ b/arch/arm64/mm/cache.S
> > @@ -15,30 +15,20 @@
> >  #include <asm/asm-uaccess.h>
> >
> >  /*
> > - *   flush_icache_range(start,end)
> > + *   __flush_cache_range(start,end) [needs_uaccess]
> >   *
> >   *   Ensure that the I and D caches are coherent within specified region.
> >   *   This is typically used when code has been written to a memory region,
> >   *   and will be executed.
> >   *
> > - *   - start   - virtual start address of region
> > - *   - end     - virtual end address of region
> > + *   - start         - virtual start address of region
> > + *   - end           - virtual end address of region
> > + *   - needs_uaccess - (macro parameter) might access user space memory
> >   */
> > -SYM_FUNC_START(__flush_icache_range)
> > -     /* FALLTHROUGH */
> > -
> > -/*
> > - *   __flush_cache_user_range(start,end)
> > - *
> > - *   Ensure that the I and D caches are coherent within specified region.
> > - *   This is typically used when code has been written to a memory region,
> > - *   and will be executed.
> > - *
> > - *   - start   - virtual start address of region
> > - *   - end     - virtual end address of region
> > - */
> > -SYM_FUNC_START(__flush_cache_user_range)
> > +.macro       __flush_cache_range, needs_uaccess
> > +     .if     \needs_uaccess
> >       uaccess_ttbr0_enable x2, x3, x4
> > +     .endif
> >  alternative_if ARM64_HAS_CACHE_IDC
> >       dsb     ishst
> >       b       7f
> > @@ -47,7 +37,11 @@ alternative_else_nop_endif
> >       sub     x3, x2, #1
> >       bic     x4, x0, x3
> >  1:
> > +     .if     \needs_uaccess
> >  user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> > +     .else
> > +alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
> > +     .endif
> >       add     x4, x4, x2
> >       cmp     x4, x1
> >       b.lo    1b
> > @@ -58,15 +52,47 @@ alternative_if ARM64_HAS_CACHE_DIC
> >       isb
> >       b       8f
> >  alternative_else_nop_endif
> > -     invalidate_icache_by_line x0, x1, x2, x3, 9f
> > +     invalidate_icache_by_line x0, x1, x2, x3, \needs_uaccess, 9f
> >  8:   mov     x0, #0
> >  1:
> > +     .if     \needs_uaccess
> >       uaccess_ttbr0_disable x1, x2
> > +     .endif
> >       ret
> > +
> > +     .if     \needs_uaccess
> >  9:
> >       mov     x0, #-EFAULT
> >       b       1b
> > +     .endif
> > +.endm
>
> As above, I think we should reduce this to the core logic, moving the
> ttbr manipulation and fixup handler inline in __flush_cache_user_range.
>
> For clarity, I'd also like to leave the RETs out of the macro, since
> that's required for the fixup handling anyway, and it generally amkes
> the control flow clearer at the function definition.
>
> > +/*
> > + *   flush_icache_range(start,end)
> > + *
> > + *   Ensure that the I and D caches are coherent within specified region.
> > + *   This is typically used when code has been written to a memory region,
> > + *   and will be executed.
> > + *
> > + *   - start   - virtual start address of region
> > + *   - end     - virtual end address of region
> > + */
> > +SYM_FUNC_START(__flush_icache_range)
> > +     __flush_cache_range needs_uaccess=0
> >  SYM_FUNC_END(__flush_icache_range)
>
> ...so with the suggestions above, this could be:
>
> SYM_FUNC_START(__flush_icache_range)
>         __flush_cache_range
>         ret
> SYM_FUNC_END(__flush_icache_range)
>
> > +/*
> > + *   __flush_cache_user_range(start,end)
> > + *
> > + *   Ensure that the I and D caches are coherent within specified region.
> > + *   This is typically used when code has been written to a memory region,
> > + *   and will be executed.
> > + *
> > + *   - start   - virtual start address of region
> > + *   - end     - virtual end address of region
> > + */
> > +SYM_FUNC_START(__flush_cache_user_range)
> > +     __flush_cache_range needs_uaccess=1
> >  SYM_FUNC_END(__flush_cache_user_range)
>
> ... this could be:
>
> SYM_FUNC_START(__flush_cache_user_range)
>         uaccess_ttbr0_enable x2, x3, x4
>         __flush_cache_range 2f
> 1:
>         uaccess_ttbr0_disable x1, x2
>         ret
> 2:
>         mov     x0, #-EFAULT
>         b       1b
> SYM_FUNC_END(__flush_cache_user_range)
>
> >  /*
> > @@ -86,7 +112,7 @@ alternative_else_nop_endif
> >
> >       uaccess_ttbr0_enable x2, x3, x4
> >
> > -     invalidate_icache_by_line x0, x1, x2, x3, 2f
> > +     invalidate_icache_by_line x0, x1, x2, x3, 1, 2f
>
> ... and this wouldn't need to change.
>
> Thanks,
> Mark.
>
> >       mov     x0, xzr
> >  1:
> >       uaccess_ttbr0_disable x1, x2
> > --
> > 2.31.1.751.gd2f1c929bd-goog
> >
Mark Rutland May 20, 2021, 10:47 a.m. UTC | #3
On Wed, May 19, 2021 at 05:25:37PM +0100, Fuad Tabba wrote:
> On Tue, May 18, 2021 at 4:33 PM Mark Rutland <mark.rutland@arm.com> wrote:
> > I pushed a couple of prearatory patches for that to:
> >
> >   https://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git/log/?h=arm64/cleanups/cache
> >   git://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git arm64/cleanups/cache
> >
> > ... in case you felt like taking those as-is.
> 
> Thanks for this, and for the other comments and suggestions. I'll take
> your patches, as well as all the fixes you suggested in the next
> round.

Great! Thanks again for working on this; it's really ncie to see all
this getting cleaned up.

Thanks,
Mark.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 8418c1bd8f04..6ff7a3a3b238 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -426,16 +426,21 @@  alternative_endif
  * Macro to perform an instruction cache maintenance for the interval
  * [start, end)
  *
- * 	start, end:	virtual addresses describing the region
- *	label:		A label to branch to on user fault.
- * 	Corrupts:	tmp1, tmp2
+ *	start, end:	virtual addresses describing the region
+ *	needs_uaccess:	might access user space memory
+ *	label:		label to branch to on user fault (if needs_uaccess)
+ *	Corrupts:	tmp1, tmp2
  */
-	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+	.macro invalidate_icache_by_line start, end, tmp1, tmp2, needs_uaccess, label
 	icache_line_size \tmp1, \tmp2
 	sub	\tmp2, \tmp1, #1
 	bic	\tmp2, \start, \tmp2
 9997:
+	.if	\needs_uaccess
 USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
+	.else
+	ic	ivau, \tmp2
+	.endif
 	add	\tmp2, \tmp2, \tmp1
 	cmp	\tmp2, \end
 	b.lo	9997b
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 2d881f34dd9d..092f73acdf9a 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -15,30 +15,20 @@ 
 #include <asm/asm-uaccess.h>
 
 /*
- *	flush_icache_range(start,end)
+ *	__flush_cache_range(start,end) [needs_uaccess]
  *
  *	Ensure that the I and D caches are coherent within specified region.
  *	This is typically used when code has been written to a memory region,
  *	and will be executed.
  *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
+ *	- start   	- virtual start address of region
+ *	- end     	- virtual end address of region
+ *	- needs_uaccess - (macro parameter) might access user space memory
  */
-SYM_FUNC_START(__flush_icache_range)
-	/* FALLTHROUGH */
-
-/*
- *	__flush_cache_user_range(start,end)
- *
- *	Ensure that the I and D caches are coherent within specified region.
- *	This is typically used when code has been written to a memory region,
- *	and will be executed.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-SYM_FUNC_START(__flush_cache_user_range)
+.macro	__flush_cache_range, needs_uaccess
+	.if 	\needs_uaccess
 	uaccess_ttbr0_enable x2, x3, x4
+	.endif
 alternative_if ARM64_HAS_CACHE_IDC
 	dsb	ishst
 	b	7f
@@ -47,7 +37,11 @@  alternative_else_nop_endif
 	sub	x3, x2, #1
 	bic	x4, x0, x3
 1:
+	.if 	\needs_uaccess
 user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
+	.else
+alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
+	.endif
 	add	x4, x4, x2
 	cmp	x4, x1
 	b.lo	1b
@@ -58,15 +52,47 @@  alternative_if ARM64_HAS_CACHE_DIC
 	isb
 	b	8f
 alternative_else_nop_endif
-	invalidate_icache_by_line x0, x1, x2, x3, 9f
+	invalidate_icache_by_line x0, x1, x2, x3, \needs_uaccess, 9f
 8:	mov	x0, #0
 1:
+	.if	\needs_uaccess
 	uaccess_ttbr0_disable x1, x2
+	.endif
 	ret
+
+	.if 	\needs_uaccess
 9:
 	mov	x0, #-EFAULT
 	b	1b
+	.endif
+.endm
+
+/*
+ *	flush_icache_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified region.
+ *	This is typically used when code has been written to a memory region,
+ *	and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+SYM_FUNC_START(__flush_icache_range)
+	__flush_cache_range needs_uaccess=0
 SYM_FUNC_END(__flush_icache_range)
+
+/*
+ *	__flush_cache_user_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified region.
+ *	This is typically used when code has been written to a memory region,
+ *	and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+SYM_FUNC_START(__flush_cache_user_range)
+	__flush_cache_range needs_uaccess=1
 SYM_FUNC_END(__flush_cache_user_range)
 
 /*
@@ -86,7 +112,7 @@  alternative_else_nop_endif
 
 	uaccess_ttbr0_enable x2, x3, x4
 
-	invalidate_icache_by_line x0, x1, x2, x3, 2f
+	invalidate_icache_by_line x0, x1, x2, x3, 1, 2f
 	mov	x0, xzr
 1:
 	uaccess_ttbr0_disable x1, x2