@@ -1746,7 +1746,7 @@ ENDPROC(aesni_gcm_enc)
.align 4
_key_expansion_128:
-_key_expansion_256a:
+ENTRY_LOCAL(_key_expansion_256a)
pshufd $0b11111111, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
pxor %xmm4, %xmm0
@@ -1759,8 +1759,7 @@ _key_expansion_256a:
ENDPROC(_key_expansion_128)
ENDPROC(_key_expansion_256a)
-.align 4
-_key_expansion_192a:
+ENTRY_LOCAL(_key_expansion_192a)
pshufd $0b01010101, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
pxor %xmm4, %xmm0
@@ -1784,8 +1783,7 @@ _key_expansion_192a:
ret
ENDPROC(_key_expansion_192a)
-.align 4
-_key_expansion_192b:
+ENTRY_LOCAL(_key_expansion_192b)
pshufd $0b01010101, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
pxor %xmm4, %xmm0
@@ -1804,8 +1802,7 @@ _key_expansion_192b:
ret
ENDPROC(_key_expansion_192b)
-.align 4
-_key_expansion_256b:
+ENTRY_LOCAL(_key_expansion_256b)
pshufd $0b10101010, %xmm1, %xmm1
shufps $0b00010000, %xmm2, %xmm4
pxor %xmm4, %xmm2
@@ -1968,8 +1965,7 @@ ENDPROC(aesni_enc)
* KEY
* TKEYP (T1)
*/
-.align 4
-_aesni_enc1:
+ENTRY_LOCAL(_aesni_enc1)
movaps (KEYP), KEY # key
mov KEYP, TKEYP
pxor KEY, STATE # round 0
@@ -2032,8 +2028,7 @@ ENDPROC(_aesni_enc1)
* KEY
* TKEYP (T1)
*/
-.align 4
-_aesni_enc4:
+ENTRY_LOCAL(_aesni_enc4)
movaps (KEYP), KEY # key
mov KEYP, TKEYP
pxor KEY, STATE1 # round 0
@@ -2160,8 +2155,7 @@ ENDPROC(aesni_dec)
* KEY
* TKEYP (T1)
*/
-.align 4
-_aesni_dec1:
+ENTRY_LOCAL(_aesni_dec1)
movaps (KEYP), KEY # key
mov KEYP, TKEYP
pxor KEY, STATE # round 0
@@ -2224,8 +2218,7 @@ ENDPROC(_aesni_dec1)
* KEY
* TKEYP (T1)
*/
-.align 4
-_aesni_dec4:
+ENTRY_LOCAL(_aesni_dec4)
movaps (KEYP), KEY # key
mov KEYP, TKEYP
pxor KEY, STATE1 # round 0
@@ -2591,8 +2584,7 @@ ENDPROC(aesni_cbc_dec)
* INC: == 1, in little endian
* BSWAP_MASK == endian swapping mask
*/
-.align 4
-_aesni_inc_init:
+ENTRY_LOCAL(_aesni_inc_init)
movaps .Lbswap_mask, BSWAP_MASK
movaps IV, CTR
PSHUFB_XMM BSWAP_MASK CTR
@@ -2617,8 +2609,7 @@ ENDPROC(_aesni_inc_init)
* CTR: == output IV, in little endian
* TCTR_LOW: == lower qword of CTR
*/
-.align 4
-_aesni_inc:
+ENTRY_LOCAL(_aesni_inc)
paddq INC, CTR
add $1, TCTR_LOW
jnc .Linc_low
@@ -188,7 +188,7 @@
* larger and would only be 0.5% faster (on sandy-bridge).
*/
.align 8
-roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
+ENTRY_LOCAL(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
%rcx, (%r9));
@@ -196,7 +196,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
.align 8
-roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
+ENTRY_LOCAL(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
%xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
%rax, (%r9));
@@ -721,7 +721,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.text
.align 8
-__camellia_enc_blk16:
+ENTRY_LOCAL(__camellia_enc_blk16)
/* input:
* %rdi: ctx, CTX
* %rax: temporary storage, 256 bytes
@@ -808,7 +808,7 @@ __camellia_enc_blk16:
ENDPROC(__camellia_enc_blk16)
.align 8
-__camellia_dec_blk16:
+ENTRY_LOCAL(__camellia_dec_blk16)
/* input:
* %rdi: ctx, CTX
* %rax: temporary storage, 256 bytes
@@ -1119,7 +1119,7 @@ ENDPROC(camellia_ctr_16way)
vpxor tmp, iv, iv;
.align 8
-camellia_xts_crypt_16way:
+ENTRY_LOCAL(camellia_xts_crypt_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -227,7 +227,7 @@
* larger and would only marginally faster.
*/
.align 8
-roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
+ENTRY_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
%rcx, (%r9));
@@ -235,7 +235,7 @@ roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
ENDPROC(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
.align 8
-roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
+ENTRY_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
%ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
%rax, (%r9));
@@ -764,7 +764,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.text
.align 8
-__camellia_enc_blk32:
+ENTRY_LOCAL(__camellia_enc_blk32)
/* input:
* %rdi: ctx, CTX
* %rax: temporary storage, 512 bytes
@@ -851,7 +851,7 @@ __camellia_enc_blk32:
ENDPROC(__camellia_enc_blk32)
.align 8
-__camellia_dec_blk32:
+ENTRY_LOCAL(__camellia_dec_blk32)
/* input:
* %rdi: ctx, CTX
* %rax: temporary storage, 512 bytes
@@ -1226,7 +1226,7 @@ ENDPROC(camellia_ctr_32way)
vpxor tmp1, iv, iv;
.align 8
-camellia_xts_crypt_32way:
+ENTRY_LOCAL(camellia_xts_crypt_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (32 blocks)
@@ -224,7 +224,7 @@
.text
.align 16
-__cast5_enc_blk16:
+ENTRY_LOCAL(__cast5_enc_blk16)
/* input:
* %rdi: ctx, CTX
* RL1: blocks 1 and 2
@@ -296,7 +296,7 @@ __cast5_enc_blk16:
ENDPROC(__cast5_enc_blk16)
.align 16
-__cast5_dec_blk16:
+ENTRY_LOCAL(__cast5_dec_blk16)
/* input:
* %rdi: ctx, CTX
* RL1: encrypted blocks 1 and 2
@@ -262,7 +262,7 @@
.text
.align 8
-__cast6_enc_blk8:
+ENTRY_LOCAL(__cast6_enc_blk8)
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
@@ -308,7 +308,7 @@ __cast6_enc_blk8:
ENDPROC(__cast6_enc_blk8)
.align 8
-__cast6_dec_blk8:
+ENTRY_LOCAL(__cast6_dec_blk8)
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
@@ -47,7 +47,7 @@
* T2
* T3
*/
-__clmul_gf128mul_ble:
+ENTRY_LOCAL(__clmul_gf128mul_ble)
movaps DATA, T1
pshufd $0b01001110, DATA, T2
pshufd $0b01001110, SHASH, T3
@@ -570,7 +570,7 @@
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
.align 8
-__serpent_enc_blk8_avx:
+ENTRY_LOCAL(__serpent_enc_blk8_avx)
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
@@ -624,7 +624,7 @@ __serpent_enc_blk8_avx:
ENDPROC(__serpent_enc_blk8_avx)
.align 8
-__serpent_dec_blk8_avx:
+ENTRY_LOCAL(__serpent_dec_blk8_avx)
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
@@ -566,7 +566,7 @@
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
.align 8
-__serpent_enc_blk16:
+ENTRY_LOCAL(__serpent_enc_blk16)
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext
@@ -620,7 +620,7 @@ __serpent_enc_blk16:
ENDPROC(__serpent_enc_blk16)
.align 8
-__serpent_dec_blk16:
+ENTRY_LOCAL(__serpent_dec_blk16)
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext
@@ -249,7 +249,7 @@
vpxor x3, wkey, x3;
.align 8
-__twofish_enc_blk8:
+ENTRY_LOCAL(__twofish_enc_blk8)
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
@@ -291,7 +291,7 @@ __twofish_enc_blk8:
ENDPROC(__twofish_enc_blk8)
.align 8
-__twofish_dec_blk8:
+ENTRY_LOCAL(__twofish_dec_blk8)
/* input:
* %rdi: ctx, CTX
* RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
Use the newly added ENTRY_LOCAL to annotate starts of all functions which do not have ".globl" annotation, but their ends are annotated by ENDPROC. This is needed to balance ENDPROC for tools that are about to generate debuginfo. Signed-off-by: Jiri Slaby <jslaby@suse.cz> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: <x86@kernel.org> Cc: <linux-crypto@vger.kernel.org> --- arch/x86/crypto/aesni-intel_asm.S | 29 ++++++++++------------------ arch/x86/crypto/camellia-aesni-avx-asm_64.S | 10 +++++----- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 10 +++++----- arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 4 ++-- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 4 ++-- arch/x86/crypto/ghash-clmulni-intel_asm.S | 2 +- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 4 ++-- arch/x86/crypto/serpent-avx2-asm_64.S | 4 ++-- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 4 ++-- 9 files changed, 31 insertions(+), 40 deletions(-)