Message ID | 20210520124406.2731873-6-tabba@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Tidy up cache.S | expand |
On Thu, May 20, 2021 at 01:43:53PM +0100, Fuad Tabba wrote: > __flush_icache_range works on the kernel linear map, and doesn't > need uaccess. The existing code is a side-effect of its current > implementation with __flush_cache_user_range fallthrough. > > Instead of fallthrough to share the code, use a common macro for > the two where the caller specifies an optional fixup label if > user access is needed. If provided, this label would be used to > generate an extable entry. > > No functional change intended. > Possible performance impact due to the reduced number of > instructions. > > Reported-by: Catalin Marinas <catalin.marinas@arm.com> > Reported-by: Will Deacon <will@kernel.org> > Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/ > Signed-off-by: Fuad Tabba <tabba@google.com> I have one comment below, but either way this looks good to me, so: Acked-by: Mark Rutland <mark.rutland@arm.com> > --- > arch/arm64/mm/cache.S | 64 +++++++++++++++++++++++++++---------------- > 1 file changed, 41 insertions(+), 23 deletions(-) > > diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S > index 5ff8dfa86975..c6bc3b8138e1 100644 > --- a/arch/arm64/mm/cache.S > +++ b/arch/arm64/mm/cache.S > @@ -14,6 +14,41 @@ > #include <asm/alternative.h> > #include <asm/asm-uaccess.h> > > +/* > + * __flush_cache_range(start,end) [fixup] > + * > + * Ensure that the I and D caches are coherent within specified region. > + * This is typically used when code has been written to a memory region, > + * and will be executed. > + * > + * - start - virtual start address of region > + * - end - virtual end address of region > + * - fixup - optional label to branch to on user fault > + */ > +.macro __flush_cache_range, fixup > +alternative_if ARM64_HAS_CACHE_IDC > + dsb ishst > + b .Ldc_skip_\@ > +alternative_else_nop_endif > + dcache_line_size x2, x3 > + sub x3, x2, #1 > + bic x4, x0, x3 > +.Ldc_loop_\@: > +user_alt "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE, \fixup > + add x4, x4, x2 > + cmp x4, x1 > + b.lo .Ldc_loop_\@ > + dsb ish As on the prior patch, I reckon it'd be nicer overall to align with the *by_line macros and have an explicit _cond_extable here, e.g. | .Ldc_op\@: | alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE | add x4, x4, x2 | cmp x4, x1 | b.lo .Ldc_op\@ | dsb ish | ... | // just before the .endm | _cond_extable .Ldc_op\@, \fixup ... and with some rework it might be possible to use dcache_by_line_op directly here (it currently clobbers the base and end, so can't be used as-is). Thanks, Mark. > + > +.Ldc_skip_\@: > +alternative_if ARM64_HAS_CACHE_DIC > + isb > + b .Lic_skip_\@ > +alternative_else_nop_endif > + invalidate_icache_by_line x0, x1, x2, x3, \fixup > +.Lic_skip_\@: > +.endm > + > /* > * flush_icache_range(start,end) > * > @@ -25,7 +60,9 @@ > * - end - virtual end address of region > */ > SYM_FUNC_START(__flush_icache_range) > - /* FALLTHROUGH */ > + __flush_cache_range > + ret > +SYM_FUNC_END(__flush_icache_range) > > /* > * __flush_cache_user_range(start,end) > @@ -39,34 +76,15 @@ SYM_FUNC_START(__flush_icache_range) > */ > SYM_FUNC_START(__flush_cache_user_range) > uaccess_ttbr0_enable x2, x3, x4 > -alternative_if ARM64_HAS_CACHE_IDC > - dsb ishst > - b 7f > -alternative_else_nop_endif > - dcache_line_size x2, x3 > - sub x3, x2, #1 > - bic x4, x0, x3 > -1: > -user_alt "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE, 9f > - add x4, x4, x2 > - cmp x4, x1 > - b.lo 1b > - dsb ish > > -7: > -alternative_if ARM64_HAS_CACHE_DIC > - isb > - b 8f > -alternative_else_nop_endif > - invalidate_icache_by_line x0, x1, x2, x3, 9f > -8: mov x0, #0 > + __flush_cache_range 2f > + mov x0, xzr > 1: > uaccess_ttbr0_disable x1, x2 > ret > -9: > +2: > mov x0, #-EFAULT > b 1b > -SYM_FUNC_END(__flush_icache_range) > SYM_FUNC_END(__flush_cache_user_range) > > /* > -- > 2.31.1.751.gd2f1c929bd-goog >
On Thu, May 20, 2021 at 03:02:16PM +0100, Mark Rutland wrote: > On Thu, May 20, 2021 at 01:43:53PM +0100, Fuad Tabba wrote: > > __flush_icache_range works on the kernel linear map, and doesn't > > need uaccess. The existing code is a side-effect of its current > > implementation with __flush_cache_user_range fallthrough. > > > > Instead of fallthrough to share the code, use a common macro for > > the two where the caller specifies an optional fixup label if > > user access is needed. If provided, this label would be used to > > generate an extable entry. > > > > No functional change intended. > > Possible performance impact due to the reduced number of > > instructions. > > > > Reported-by: Catalin Marinas <catalin.marinas@arm.com> > > Reported-by: Will Deacon <will@kernel.org> > > Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/ > > Signed-off-by: Fuad Tabba <tabba@google.com> > > I have one comment below, but either way this looks good to me, so: > > Acked-by: Mark Rutland <mark.rutland@arm.com> > > > --- > > arch/arm64/mm/cache.S | 64 +++++++++++++++++++++++++++---------------- > > 1 file changed, 41 insertions(+), 23 deletions(-) > > > > diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S > > index 5ff8dfa86975..c6bc3b8138e1 100644 > > --- a/arch/arm64/mm/cache.S > > +++ b/arch/arm64/mm/cache.S > > @@ -14,6 +14,41 @@ > > #include <asm/alternative.h> > > #include <asm/asm-uaccess.h> > > > > +/* > > + * __flush_cache_range(start,end) [fixup] > > + * > > + * Ensure that the I and D caches are coherent within specified region. > > + * This is typically used when code has been written to a memory region, > > + * and will be executed. > > + * > > + * - start - virtual start address of region > > + * - end - virtual end address of region > > + * - fixup - optional label to branch to on user fault > > + */ > > +.macro __flush_cache_range, fixup > > +alternative_if ARM64_HAS_CACHE_IDC > > + dsb ishst > > + b .Ldc_skip_\@ > > +alternative_else_nop_endif > > + dcache_line_size x2, x3 > > + sub x3, x2, #1 > > + bic x4, x0, x3 > > +.Ldc_loop_\@: > > +user_alt "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE, \fixup > > + add x4, x4, x2 > > + cmp x4, x1 > > + b.lo .Ldc_loop_\@ > > + dsb ish > > As on the prior patch, I reckon it'd be nicer overall to align with the > *by_line macros and have an explicit _cond_extable here, e.g. > > | .Ldc_op\@: > | alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE > | add x4, x4, x2 > | cmp x4, x1 > | b.lo .Ldc_op\@ > | dsb ish > | ... > | // just before the .endm > | _cond_extable .Ldc_op\@, \fixup > > ... and with some rework it might be possible to use dcache_by_line_op > directly here (it currently clobbers the base and end, so can't be used > as-is). Having thought about this a bit more, it's simple enough to do that now: | alternative_if ARM64_HAS_CACHE_IDC | dsb ishst | b .Ldc_skip_\@ | alternative_else_nop_endif | mov x0, x2 | add x3, x0, x1 | dcache_by_line_op cvau, ishst, x2, x3, x4, x5, \fixup | .Ldc_skip_\@ ... and when we just need to change the ADD to a MOV when we change the macro to take the end in x1. Note that dcache_by_line_op will automatically upgrade 'cvau' to 'civac' when ARM64_WORKAROUND_CLEAN_CACHE is present, so the resulting logic is the same. Thanks, Mark.
On Thu, May 20, 2021 at 04:37:35PM +0100, Mark Rutland wrote: > On Thu, May 20, 2021 at 03:02:16PM +0100, Mark Rutland wrote: > Having thought about this a bit more, it's simple enough to do that now: > > | alternative_if ARM64_HAS_CACHE_IDC > | dsb ishst > | b .Ldc_skip_\@ > | alternative_else_nop_endif > | mov x0, x2 > | add x3, x0, x1 > | dcache_by_line_op cvau, ishst, x2, x3, x4, x5, \fixup > | .Ldc_skip_\@ Looking at this again, that "ishst" should be "ish", but otherwise this stands. Mark.
On Thu, May 20, 2021 at 01:43:53PM +0100, Fuad Tabba wrote: > __flush_icache_range works on the kernel linear map, and doesn't > need uaccess. The existing code is a side-effect of its current > implementation with __flush_cache_user_range fallthrough. > > Instead of fallthrough to share the code, use a common macro for > the two where the caller specifies an optional fixup label if > user access is needed. If provided, this label would be used to > generate an extable entry. > > No functional change intended. > Possible performance impact due to the reduced number of > instructions. > > Reported-by: Catalin Marinas <catalin.marinas@arm.com> > Reported-by: Will Deacon <will@kernel.org> > Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/ > Signed-off-by: Fuad Tabba <tabba@google.com> Just a few acks on the patches that have my reported-by but I'm happy with the series overall, nice clean-up. Acked-by: Catalin Marinas <catalin.marinas@arm.com>
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 5ff8dfa86975..c6bc3b8138e1 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -14,6 +14,41 @@ #include <asm/alternative.h> #include <asm/asm-uaccess.h> +/* + * __flush_cache_range(start,end) [fixup] + * + * Ensure that the I and D caches are coherent within specified region. + * This is typically used when code has been written to a memory region, + * and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * - fixup - optional label to branch to on user fault + */ +.macro __flush_cache_range, fixup +alternative_if ARM64_HAS_CACHE_IDC + dsb ishst + b .Ldc_skip_\@ +alternative_else_nop_endif + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x4, x0, x3 +.Ldc_loop_\@: +user_alt "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE, \fixup + add x4, x4, x2 + cmp x4, x1 + b.lo .Ldc_loop_\@ + dsb ish + +.Ldc_skip_\@: +alternative_if ARM64_HAS_CACHE_DIC + isb + b .Lic_skip_\@ +alternative_else_nop_endif + invalidate_icache_by_line x0, x1, x2, x3, \fixup +.Lic_skip_\@: +.endm + /* * flush_icache_range(start,end) * @@ -25,7 +60,9 @@ * - end - virtual end address of region */ SYM_FUNC_START(__flush_icache_range) - /* FALLTHROUGH */ + __flush_cache_range + ret +SYM_FUNC_END(__flush_icache_range) /* * __flush_cache_user_range(start,end) @@ -39,34 +76,15 @@ SYM_FUNC_START(__flush_icache_range) */ SYM_FUNC_START(__flush_cache_user_range) uaccess_ttbr0_enable x2, x3, x4 -alternative_if ARM64_HAS_CACHE_IDC - dsb ishst - b 7f -alternative_else_nop_endif - dcache_line_size x2, x3 - sub x3, x2, #1 - bic x4, x0, x3 -1: -user_alt "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE, 9f - add x4, x4, x2 - cmp x4, x1 - b.lo 1b - dsb ish -7: -alternative_if ARM64_HAS_CACHE_DIC - isb - b 8f -alternative_else_nop_endif - invalidate_icache_by_line x0, x1, x2, x3, 9f -8: mov x0, #0 + __flush_cache_range 2f + mov x0, xzr 1: uaccess_ttbr0_disable x1, x2 ret -9: +2: mov x0, #-EFAULT b 1b -SYM_FUNC_END(__flush_icache_range) SYM_FUNC_END(__flush_cache_user_range) /*
__flush_icache_range works on the kernel linear map, and doesn't need uaccess. The existing code is a side-effect of its current implementation with __flush_cache_user_range fallthrough. Instead of fallthrough to share the code, use a common macro for the two where the caller specifies an optional fixup label if user access is needed. If provided, this label would be used to generate an extable entry. No functional change intended. Possible performance impact due to the reduced number of instructions. Reported-by: Catalin Marinas <catalin.marinas@arm.com> Reported-by: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/ Signed-off-by: Fuad Tabba <tabba@google.com> --- arch/arm64/mm/cache.S | 64 +++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 23 deletions(-)