@@ -220,36 +220,6 @@ alternative_endif
* unprivileged instructions, and USER() only works for single instructions.
*/
#ifdef CONFIG_ARM64_UAO
- .macro uao_ldp l, reg1, reg2, addr, post_inc
- alternative_if_not ARM64_HAS_UAO
-8888: ldp \reg1, \reg2, [\addr], \post_inc;
-8889: nop;
- nop;
- alternative_else
- ldtr \reg1, [\addr];
- ldtr \reg2, [\addr, #8];
- add \addr, \addr, \post_inc;
- alternative_endif
-
- _asm_extable 8888b,\l;
- _asm_extable 8889b,\l;
- .endm
-
- .macro uao_stp l, reg1, reg2, addr, post_inc
- alternative_if_not ARM64_HAS_UAO
-8888: stp \reg1, \reg2, [\addr], \post_inc;
-8889: nop;
- nop;
- alternative_else
- sttr \reg1, [\addr];
- sttr \reg2, [\addr, #8];
- add \addr, \addr, \post_inc;
- alternative_endif
-
- _asm_extable 8888b,\l;
- _asm_extable 8889b,\l;
- .endm
-
.macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
alternative_if_not ARM64_HAS_UAO
8888: \inst \reg, [\addr], \post_inc;
@@ -262,12 +232,6 @@ alternative_endif
_asm_extable 8888b,\l;
.endm
#else
- .macro uao_ldp l, reg1, reg2, addr, post_inc
- USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
- .endm
- .macro uao_stp l, reg1, reg2, addr, post_inc
- USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
- .endm
.macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
USER(\l, \inst \reg, [\addr], \post_inc)
.endm
@@ -20,51 +20,114 @@
* x0 - bytes not copied
*/
- .macro ldrb1 ptr, regB, val
- uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
+ .macro ldrb1 reg, ptr, offset=0
+ 8888: ldtrb \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
.endm
- .macro strb1 ptr, regB, val
- strb \ptr, [\regB], \val
+ .macro strb1 reg, ptr, offset=0
+ strb \reg, [\ptr, \offset]
.endm
- .macro ldrh1 ptr, regB, val
- uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
+ .macro ldrb1_reg reg, ptr, offset
+ add \ptr, \ptr, \offset
+ 8888: ldtrb \reg, [\ptr]
+ sub \ptr, \ptr, \offset
+ _asm_extable_faultaddr 8888b,9998f;
.endm
- .macro strh1 ptr, regB, val
- strh \ptr, [\regB], \val
+ .macro strb1_reg reg, ptr, offset
+ strb \reg, [\ptr, \offset]
.endm
- .macro ldr1 ptr, regB, val
- uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
+ .macro ldr1 reg, ptr, offset=0
+ 8888: ldtr \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
.endm
- .macro str1 ptr, regB, val
- str \ptr, [\regB], \val
+ .macro str1 reg, ptr, offset=0
+ str \reg, [\ptr, \offset]
.endm
- .macro ldp1 ptr, regB, regC, val
- uao_ldp 9998f, \ptr, \regB, \regC, \val
+ .macro ldp1 regA, regB, ptr, offset=0
+ 8888: ldtr \regA, [\ptr, \offset]
+ 8889: ldtr \regB, [\ptr, \offset + 8]
+ _asm_extable_faultaddr 8888b,9998f;
+ _asm_extable_faultaddr 8889b,9998f;
.endm
- .macro stp1 ptr, regB, regC, val
- stp \ptr, \regB, [\regC], \val
+ .macro stp1 regA, regB, ptr, offset=0
+ stp \regA, \regB, [\ptr, \offset]
+ .endm
+
+ .macro ldp1_pre regA, regB, ptr, offset
+ 8888: ldtr \regA, [\ptr, \offset]
+ 8889: ldtr \regB, [\ptr, \offset + 8]
+ add \ptr, \ptr, \offset
+ _asm_extable_faultaddr 8888b,9998f;
+ _asm_extable_faultaddr 8889b,9998f;
+ .endm
+
+ .macro stp1_pre regA, regB, ptr, offset
+ stp \regA, \regB, [\ptr, \offset]!
+ .endm
+
+ .macro ldrb1_nuao reg, ptr, offset=0
+ 8888: ldrb \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro strb1_nuao reg, ptr, offset=0
+ strb \reg, [\ptr, \offset]
+ .endm
+
+ .macro ldrb1_nuao_reg reg, ptr, offset=0
+ 8888: ldrb \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro strb1_nuao_reg reg, ptr, offset=0
+ strb \reg, [\ptr, \offset]
+ .endm
+
+ .macro ldr1_nuao reg, ptr, offset=0
+ 8888: ldr \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro str1_nuao reg, ptr, offset=0
+ str \reg, [\ptr, \offset]
+ .endm
+
+ .macro ldp1_nuao regA, regB, ptr, offset=0
+ 8888: ldp \regA, \regB, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro stp1_nuao regA, regB, ptr, offset=0
+ stp \regA, \regB, [\ptr, \offset]
+ .endm
+
+ .macro ldp1_pre_nuao regA, regB, ptr, offset
+ 8888: ldp \regA, \regB, [\ptr, \offset]!
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro stp1_pre_nuao regA, regB, ptr, offset
+ stp \regA, \regB, [\ptr, \offset]!
+ .endm
+
+ .macro copy_exit
+ b .Luaccess_finish
.endm
-end .req x5
ENTRY(__arch_copy_from_user)
uaccess_enable_not_uao x3, x4, x5
- add end, x0, x2
-#include "copy_template.S"
+#include "copy_template_user.S"
+.Luaccess_finish:
uaccess_disable_not_uao x3, x4
- mov x0, #0 // Nothing to copy
+ mov x0, #0
ret
ENDPROC(__arch_copy_from_user)
EXPORT_SYMBOL(__arch_copy_from_user)
-
- .section .fixup,"ax"
- .align 2
-9998: sub x0, end, dst // bytes not copied
- ret
- .previous
+#include "copy_user_fixup.S"
@@ -21,52 +21,132 @@
* Returns:
* x0 - bytes not copied
*/
- .macro ldrb1 ptr, regB, val
- uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
+
+ .macro ldrb1 reg, ptr, offset=0
+ 8888: ldtrb \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
.endm
- .macro strb1 ptr, regB, val
- uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
+ .macro strb1 reg, ptr, offset=0
+ 8888: sttrb \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
.endm
- .macro ldrh1 ptr, regB, val
- uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
+ .macro ldrb1_reg reg, ptr, offset
+ add \ptr, \ptr, \offset
+ 8888: ldtrb \reg, [\ptr]
+ sub \ptr, \ptr, \offset
+ _asm_extable_faultaddr 8888b,9998f;
.endm
- .macro strh1 ptr, regB, val
- uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
+ .macro strb1_reg reg, ptr, offset
+ add \ptr, \ptr, \offset
+ 8888: sttrb \reg, [\ptr]
+ sub \ptr, \ptr, \offset
+ _asm_extable_faultaddr 8888b,9998f;
.endm
- .macro ldr1 ptr, regB, val
- uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
+ .macro ldr1 reg, ptr, offset=0
+ 8888: ldtr \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
.endm
- .macro str1 ptr, regB, val
- uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
+ .macro str1 reg, ptr, offset=0
+ 8888: sttr \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
.endm
- .macro ldp1 ptr, regB, regC, val
- uao_ldp 9998f, \ptr, \regB, \regC, \val
+ .macro ldp1 regA, regB, ptr, offset=0
+ 8888: ldtr \regA, [\ptr, \offset]
+ 8889: ldtr \regB, [\ptr, \offset + 8]
+ _asm_extable_faultaddr 8888b,9998f;
+ _asm_extable_faultaddr 8889b,9998f;
.endm
- .macro stp1 ptr, regB, regC, val
- uao_stp 9998f, \ptr, \regB, \regC, \val
+ .macro stp1 regA, regB, ptr, offset=0
+ 8888: sttr \regA, [\ptr, \offset]
+ 8889: sttr \regB, [\ptr, \offset + 8]
+ _asm_extable_faultaddr 8888b,9998f;
+ _asm_extable_faultaddr 8889b,9998f;
.endm
-end .req x5
+ .macro ldp1_pre regA, regB, ptr, offset
+ 8888: ldtr \regA, [\ptr, \offset]
+ 8889: ldtr \regB, [\ptr, \offset + 8]
+ add \ptr, \ptr, \offset
+ _asm_extable_faultaddr 8888b,9998f;
+ _asm_extable_faultaddr 8889b,9998f;
+ .endm
+
+ .macro stp1_pre regA, regB, ptr, offset
+ 8888: sttr \regA, [\ptr, \offset]
+ 8889: sttr \regB, [\ptr, \offset + 8]
+ add \ptr, \ptr, \offset
+ _asm_extable_faultaddr 8888b,9998f;
+ _asm_extable_faultaddr 8889b,9998f;
+ .endm
+
+ .macro ldrb1_nuao reg, ptr, offset=0
+ 8888: ldrb \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro strb1_nuao reg, ptr, offset=0
+ 8888: strb \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro ldrb1_nuao_reg reg, ptr, offset=0
+ 8888: ldrb \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro strb1_nuao_reg reg, ptr, offset=0
+ 8888: strb \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro ldr1_nuao reg, ptr, offset=0
+ 8888: ldr \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro str1_nuao reg, ptr, offset=0
+ 8888: str \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro ldp1_nuao regA, regB, ptr, offset=0
+ 8888: ldp \regA, \regB, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro stp1_nuao regA, regB, ptr, offset=0
+ 8888: stp \regA, \regB, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro ldp1_pre_nuao regA, regB, ptr, offset
+ 8888: ldp \regA, \regB, [\ptr, \offset]!
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro stp1_pre_nuao regA, regB, ptr, offset
+ 8888: stp \regA, \regB, [\ptr, \offset]!
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro copy_exit
+ b .Luaccess_finish
+ .endm
ENTRY(__arch_copy_in_user)
uaccess_enable_not_uao x3, x4, x5
- add end, x0, x2
-#include "copy_template.S"
+#include "copy_template_user.S"
+.Luaccess_finish:
uaccess_disable_not_uao x3, x4
mov x0, #0
ret
ENDPROC(__arch_copy_in_user)
EXPORT_SYMBOL(__arch_copy_in_user)
-
- .section .fixup,"ax"
- .align 2
-9998: sub x0, end, dst // bytes not copied
- ret
- .previous
+#include "copy_user_fixup.S"
@@ -1,13 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2012 Linaro Limited. All rights reserved.
+ * Copyright (c) 2015 ARM Ltd. All rights reserved.
*
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
+ * This code is based on glibc Cortex Strings work originally authored by
+ * Linaro, found at:
*
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * https://git.linaro.org/toolchain/cortex-strings.git
*/
@@ -21,161 +20,146 @@
* Returns:
* x0 - dest
*/
-dstin .req x0
-src .req x1
-count .req x2
-tmp1 .req x3
-tmp1w .req w3
-tmp2 .req x4
-tmp2w .req w4
-dst .req x6
+ #define dstin x0
+ #define src x1
+ #define count x2
+ #define dst x3
+ #define srcend x4
+ #define dstend x5
+ #define A_l x6
+ #define A_lw w6
+ #define A_h x7
+ #define A_hw w7
+ #define B_l x8
+ #define B_lw w8
+ #define B_h x9
+ #define C_l x10
+ #define C_h x11
+ #define D_l x12
+ #define D_h x13
+ #define E_l src
+ #define E_h count
+ #define F_l srcend
+ #define F_h dst
+ #define tmp1 x9
-A_l .req x7
-A_h .req x8
-B_l .req x9
-B_h .req x10
-C_l .req x11
-C_h .req x12
-D_l .req x13
-D_h .req x14
+ prfm PLDL1KEEP, [src]
+ add srcend, src, count
+ add dstend, dstin, count
+ cmp count, 16
+ b.ls L(copy16)
+ cmp count, 96
+ b.hi L(copy_long)
- mov dst, dstin
- cmp count, #16
- /*When memory length is less than 16, the accessed are not aligned.*/
- b.lo .Ltiny15
-
- neg tmp2, src
- ands tmp2, tmp2, #15/* Bytes to reach alignment. */
- b.eq .LSrcAligned
- sub count, count, tmp2
- /*
- * Copy the leading memory data from src to dst in an increasing
- * address order.By this way,the risk of overwriting the source
- * memory data is eliminated when the distance between src and
- * dst is less than 16. The memory accesses here are alignment.
- */
- tbz tmp2, #0, 1f
- ldrb1 tmp1w, src, #1
- strb1 tmp1w, dst, #1
+ /* Medium copies: 17..96 bytes. */
+ sub tmp1, count, 1
+ ldp1 A_l, A_h, src
+ tbnz tmp1, 6, L(copy96)
+ ldp1 D_l, D_h, srcend, -16
+ tbz tmp1, 5, 1f
+ ldp1 B_l, B_h, src, 16
+ ldp1 C_l, C_h, srcend, -32
+ stp1 B_l, B_h, dstin, 16
+ stp1 C_l, C_h, dstend, -32
1:
- tbz tmp2, #1, 2f
- ldrh1 tmp1w, src, #2
- strh1 tmp1w, dst, #2
+ stp1 A_l, A_h, dstin
+ stp1 D_l, D_h, dstend, -16
+ copy_exit
+
+ .p2align 4
+ /* Small copies: 0..16 bytes. */
+L(copy16):
+ cmp count, 8
+ b.lo 1f
+ ldr1 A_l, src
+ ldr1 A_h, srcend, -8
+ str1 A_l, dstin
+ str1 A_h, dstend, -8
+ copy_exit
+ .p2align 4
+1:
+ tbz count, 2, 1f
+ ldr1 A_lw, src
+ ldr1 A_hw, srcend, -4
+ str1 A_lw, dstin
+ str1 A_hw, dstend, -4
+ copy_exit
+
+ /* Copy 0..3 bytes. Use a branchless sequence that copies the same
+ byte 3 times if count==1, or the 2nd byte twice if count==2. */
+1:
+ cbz count, 2f
+ lsr tmp1, count, 1
+ ldrb1 A_lw, src
+ ldrb1 A_hw, srcend, -1
+ ldrb1_reg B_lw, src, tmp1
+ strb1 A_lw, dstin
+ strb1_reg B_lw, dstin, tmp1
+ strb1 A_hw, dstend, -1
+2: copy_exit
+
+ .p2align 4
+ /* Copy 64..96 bytes. Copy 64 bytes from the start and
+ 32 bytes from the end. */
+L(copy96):
+ ldp1 B_l, B_h, src, 16
+ ldp1 C_l, C_h, src, 32
+ ldp1 D_l, D_h, src, 48
+ ldp1 E_l, E_h, srcend, -32
+ ldp1 F_l, F_h, srcend, -16
+ stp1 A_l, A_h, dstin
+ stp1 B_l, B_h, dstin, 16
+ stp1 C_l, C_h, dstin, 32
+ stp1 D_l, D_h, dstin, 48
+ stp1 E_l, E_h, dstend, -32
+ stp1 F_l, F_h, dstend, -16
+ copy_exit
+
+ /* Align DST to 16 byte alignment so that we don't cross cache line
+ boundaries on both loads and stores. There are at least 96 bytes
+ to copy, so copy 16 bytes unaligned and then align. The loop
+ copies 64 bytes per iteration and prefetches one iteration ahead. */
+
+ .p2align 4
+L(copy_long):
+ and tmp1, dstin, 15
+ bic dst, dstin, 15
+ ldp1 D_l, D_h, src
+ sub src, src, tmp1
+ add count, count, tmp1 /* Count is now 16 too large. */
+ ldp1 A_l, A_h, src, 16
+ stp1 D_l, D_h, dstin
+ ldp1 B_l, B_h, src, 32
+ ldp1 C_l, C_h, src, 48
+ ldp1_pre D_l, D_h, src, 64
+ subs count, count, 128 + 16 /* Test and readjust count. */
+ b.ls 2f
+1:
+ stp1 A_l, A_h, dst, 16
+ ldp1 A_l, A_h, src, 16
+ stp1 B_l, B_h, dst, 32
+ ldp1 B_l, B_h, src, 32
+ stp1 C_l, C_h, dst, 48
+ ldp1 C_l, C_h, src, 48
+ stp1_pre D_l, D_h, dst, 64
+ ldp1_pre D_l, D_h, src, 64
+ subs count, count, 64
+ b.hi 1b
+
+ /* Write the last full set of 64 bytes. The remainder is at most 64
+ bytes, so it is safe to always copy 64 bytes from the end even if
+ there is just 1 byte left. */
2:
- tbz tmp2, #2, 3f
- ldr1 tmp1w, src, #4
- str1 tmp1w, dst, #4
-3:
- tbz tmp2, #3, .LSrcAligned
- ldr1 tmp1, src, #8
- str1 tmp1, dst, #8
-
-.LSrcAligned:
- cmp count, #64
- b.ge .Lcpy_over64
- /*
- * Deal with small copies quickly by dropping straight into the
- * exit block.
- */
-.Ltail63:
- /*
- * Copy up to 48 bytes of data. At this point we only need the
- * bottom 6 bits of count to be accurate.
- */
- ands tmp1, count, #0x30
- b.eq .Ltiny15
- cmp tmp1w, #0x20
- b.eq 1f
- b.lt 2f
- ldp1 A_l, A_h, src, #16
- stp1 A_l, A_h, dst, #16
-1:
- ldp1 A_l, A_h, src, #16
- stp1 A_l, A_h, dst, #16
-2:
- ldp1 A_l, A_h, src, #16
- stp1 A_l, A_h, dst, #16
-.Ltiny15:
- /*
- * Prefer to break one ldp/stp into several load/store to access
- * memory in an increasing address order,rather than to load/store 16
- * bytes from (src-16) to (dst-16) and to backward the src to aligned
- * address,which way is used in original cortex memcpy. If keeping
- * the original memcpy process here, memmove need to satisfy the
- * precondition that src address is at least 16 bytes bigger than dst
- * address,otherwise some source data will be overwritten when memove
- * call memcpy directly. To make memmove simpler and decouple the
- * memcpy's dependency on memmove, withdrew the original process.
- */
- tbz count, #3, 1f
- ldr1 tmp1, src, #8
- str1 tmp1, dst, #8
-1:
- tbz count, #2, 2f
- ldr1 tmp1w, src, #4
- str1 tmp1w, dst, #4
-2:
- tbz count, #1, 3f
- ldrh1 tmp1w, src, #2
- strh1 tmp1w, dst, #2
-3:
- tbz count, #0, .Lexitfunc
- ldrb1 tmp1w, src, #1
- strb1 tmp1w, dst, #1
-
- b .Lexitfunc
-
-.Lcpy_over64:
- subs count, count, #128
- b.ge .Lcpy_body_large
- /*
- * Less than 128 bytes to copy, so handle 64 here and then jump
- * to the tail.
- */
- ldp1 A_l, A_h, src, #16
- stp1 A_l, A_h, dst, #16
- ldp1 B_l, B_h, src, #16
- ldp1 C_l, C_h, src, #16
- stp1 B_l, B_h, dst, #16
- stp1 C_l, C_h, dst, #16
- ldp1 D_l, D_h, src, #16
- stp1 D_l, D_h, dst, #16
-
- tst count, #0x3f
- b.ne .Ltail63
- b .Lexitfunc
-
- /*
- * Critical loop. Start at a new cache line boundary. Assuming
- * 64 bytes per line this ensures the entire loop is in one line.
- */
- .p2align L1_CACHE_SHIFT
-.Lcpy_body_large:
- /* pre-get 64 bytes data. */
- ldp1 A_l, A_h, src, #16
- ldp1 B_l, B_h, src, #16
- ldp1 C_l, C_h, src, #16
- ldp1 D_l, D_h, src, #16
-1:
- /*
- * interlace the load of next 64 bytes data block with store of the last
- * loaded 64 bytes data.
- */
- stp1 A_l, A_h, dst, #16
- ldp1 A_l, A_h, src, #16
- stp1 B_l, B_h, dst, #16
- ldp1 B_l, B_h, src, #16
- stp1 C_l, C_h, dst, #16
- ldp1 C_l, C_h, src, #16
- stp1 D_l, D_h, dst, #16
- ldp1 D_l, D_h, src, #16
- subs count, count, #64
- b.ge 1b
- stp1 A_l, A_h, dst, #16
- stp1 B_l, B_h, dst, #16
- stp1 C_l, C_h, dst, #16
- stp1 D_l, D_h, dst, #16
-
- tst count, #0x3f
- b.ne .Ltail63
-.Lexitfunc:
+ ldp1 E_l, E_h, srcend, -64
+ stp1 A_l, A_h, dst, 16
+ ldp1 A_l, A_h, srcend, -48
+ stp1 B_l, B_h, dst, 32
+ ldp1 B_l, B_h, srcend, -32
+ stp1 C_l, C_h, dst, 48
+ ldp1 C_l, C_h, srcend, -16
+ stp1 D_l, D_h, dst, 64
+ stp1 E_l, E_h, dstend, -64
+ stp1 A_l, A_h, dstend, -48
+ stp1 B_l, B_h, dstend, -32
+ stp1 C_l, C_h, dstend, -16
+ copy_exit
new file mode 100644
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#define L(l) .L ## l
+
+ alternative_if_not ARM64_HAS_UAO
+ b L(copy_non_uao)
+ alternative_else_nop_endif
+#include "copy_template.S"
+
+#define ldp1 ldp1_nuao
+#define ldp1_pre ldp1_pre_nuao
+#define stp1 stp1_nuao
+#define stp1_pre stp1_pre_nuao
+#define ldr1 ldr1_nuao
+#define str1 str1_nuao
+#define ldrb1 ldrb1_nuao
+#define strb1 strb1_nuao
+#define ldrb1_reg ldrb1_nuao_reg
+#define strb1_reg strb1_nuao_reg
+
+L(copy_non_uao):
+#undef L
+#define L(l) .Lnuao ## l
+#include "copy_template.S"
@@ -19,51 +19,114 @@
* Returns:
* x0 - bytes not copied
*/
- .macro ldrb1 ptr, regB, val
- ldrb \ptr, [\regB], \val
+
+ .macro ldrb1 reg, ptr, offset=0
+ ldrb \reg, [\ptr, \offset]
.endm
- .macro strb1 ptr, regB, val
- uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
+ .macro strb1 reg, ptr, offset=0
+ 8888: sttrb \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
.endm
- .macro ldrh1 ptr, regB, val
- ldrh \ptr, [\regB], \val
+ .macro ldrb1_reg reg, ptr, offset
+ ldrb \reg, [\ptr, \offset]
.endm
- .macro strh1 ptr, regB, val
- uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
+ .macro strb1_reg reg, ptr, offset
+ add \ptr, \ptr, \offset
+ 8888: sttrb \reg, [\ptr]
+ sub \ptr, \ptr, \offset
+ _asm_extable_faultaddr 8888b,9998f;
.endm
- .macro ldr1 ptr, regB, val
- ldr \ptr, [\regB], \val
+ .macro ldr1 reg, ptr, offset=0
+ ldr \reg, [\ptr, \offset]
.endm
- .macro str1 ptr, regB, val
- uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
+ .macro str1 reg, ptr, offset=0
+ 8888: sttr \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
.endm
- .macro ldp1 ptr, regB, regC, val
- ldp \ptr, \regB, [\regC], \val
+ .macro ldp1 regA, regB, ptr, offset=0
+ ldp \regA, \regB, [\ptr, \offset]
.endm
- .macro stp1 ptr, regB, regC, val
- uao_stp 9998f, \ptr, \regB, \regC, \val
+ .macro stp1 regA, regB, ptr, offset=0
+ 8888: sttr \regA, [\ptr, \offset]
+ 8889: sttr \regB, [\ptr, \offset + 8]
+ _asm_extable_faultaddr 8888b,9998f;
+ _asm_extable_faultaddr 8889b,9998f;
+ .endm
+
+ .macro ldp1_pre regA, regB, ptr, offset
+ ldp \regA, \regB, [\ptr, \offset]!
+ .endm
+
+ .macro stp1_pre regA, regB, ptr, offset
+ 8888: sttr \regA, [\ptr, \offset]
+ 8889: sttr \regB, [\ptr, \offset + 8]
+ add \ptr, \ptr, \offset
+ _asm_extable_faultaddr 8888b,9998f;
+ _asm_extable_faultaddr 8889b,9998f;
+ .endm
+
+ .macro ldrb1_nuao reg, ptr, offset=0
+ ldrb \reg, [\ptr, \offset]
+ .endm
+
+ .macro strb1_nuao reg, ptr, offset=0
+ 8888: strb \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro ldrb1_nuao_reg reg, ptr, offset=0
+ ldrb \reg, [\ptr, \offset]
+ .endm
+
+ .macro strb1_nuao_reg reg, ptr, offset=0
+ strb \reg, [\ptr, \offset]
+ .endm
+
+ .macro ldr1_nuao reg, ptr, offset=0
+ ldr \reg, [\ptr, \offset]
+ .endm
+
+ .macro str1_nuao reg, ptr, offset=0
+ 8888: str \reg, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro ldp1_nuao regA, regB, ptr, offset=0
+ ldp \regA, \regB, [\ptr, \offset]
+ .endm
+
+ .macro ldp1_pre_nuao regA, regB, ptr, offset
+ ldp \regA, \regB, [\ptr, \offset]!
+ .endm
+
+ .macro stp1_nuao regA, regB, ptr, offset=0
+ 8888: stp \regA, \regB, [\ptr, \offset]
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro stp1_pre_nuao regA, regB, ptr, offset
+ 8888: stp \regA, \regB, [\ptr, \offset]!
+ _asm_extable_faultaddr 8888b,9998f;
+ .endm
+
+ .macro copy_exit
+ b .Luaccess_finish
.endm
-end .req x5
ENTRY(__arch_copy_to_user)
uaccess_enable_not_uao x3, x4, x5
- add end, x0, x2
-#include "copy_template.S"
+#include "copy_template_user.S"
+.Luaccess_finish:
uaccess_disable_not_uao x3, x4
mov x0, #0
ret
ENDPROC(__arch_copy_to_user)
EXPORT_SYMBOL(__arch_copy_to_user)
-
- .section .fixup,"ax"
- .align 2
-9998: sub x0, end, dst // bytes not copied
- ret
- .previous
+#include "copy_user_fixup.S"
new file mode 100644
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+addr .req x15
+.section .fixup,"ax"
+.align 2
+9998:
+ // If it falls in the src range then it was a load that failed,
+ // otherwise it was a store
+ cmp addr, src
+ ccmp addr, srcend, #0x0, ge
+ csel x0, srcend, dstend, lt
+ sub x0, x0, addr
+ ret
+
@@ -24,43 +24,57 @@
* Returns:
* x0 - dest
*/
- .macro ldrb1 ptr, regB, val
- ldrb \ptr, [\regB], \val
+
+ #define L(l) .L ## l
+
+ .macro ldrb1 reg, ptr, offset=0
+ ldrb \reg, [\ptr, \offset]
.endm
- .macro strb1 ptr, regB, val
- strb \ptr, [\regB], \val
+ .macro strb1 reg, ptr, offset=0
+ strb \reg, [\ptr, \offset]
.endm
- .macro ldrh1 ptr, regB, val
- ldrh \ptr, [\regB], \val
+ .macro ldr1 reg, ptr, offset=0
+ ldr \reg, [\ptr, \offset]
.endm
- .macro strh1 ptr, regB, val
- strh \ptr, [\regB], \val
+ .macro str1 reg, ptr, offset=0
+ str \reg, [\ptr, \offset]
.endm
- .macro ldr1 ptr, regB, val
- ldr \ptr, [\regB], \val
+ .macro ldp1 regA, regB, ptr, offset=0
+ ldp \regA, \regB, [\ptr, \offset]
.endm
- .macro str1 ptr, regB, val
- str \ptr, [\regB], \val
+ .macro stp1 regA, regB, ptr, offset=0
+ stp \regA, \regB, [\ptr, \offset]
.endm
- .macro ldp1 ptr, regB, regC, val
- ldp \ptr, \regB, [\regC], \val
+ .macro ldrb1_reg reg, ptr, offset
+ ldrb1 \reg, \ptr, \offset
.endm
- .macro stp1 ptr, regB, regC, val
- stp \ptr, \regB, [\regC], \val
+ .macro strb1_reg reg, ptr, offset
+ strb1 \reg, \ptr, \offset
+ .endm
+
+ .macro ldp1_pre regA, regB, ptr, offset
+ ldp \regA, \regB, [\ptr, \offset]!
+ .endm
+
+ .macro stp1_pre regA, regB, ptr, offset
+ stp \regA, \regB, [\ptr, \offset]!
+ .endm
+
+ .macro copy_exit
+ ret
.endm
.weak memcpy
ENTRY(__memcpy)
ENTRY(memcpy)
#include "copy_template.S"
- ret
ENDPIPROC(memcpy)
EXPORT_SYMBOL(memcpy)
ENDPROC(__memcpy)