@@ -15,7 +15,7 @@
* void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
* u32 *macp, u8 const rk[], u32 rounds);
*/
-SYM_FUNC_START(ce_aes_ccm_auth_data)
+SYM_FUNC_START_SECTION(ce_aes_ccm_auth_data)
ldr w8, [x3] /* leftover from prev round? */
ld1 {v0.16b}, [x0] /* load mac */
cbz w8, 1f
@@ -81,13 +81,13 @@ SYM_FUNC_START(ce_aes_ccm_auth_data)
st1 {v0.16b}, [x0]
10: str w8, [x3]
ret
-SYM_FUNC_END(ce_aes_ccm_auth_data)
+SYM_FUNC_END_SECTION(ce_aes_ccm_auth_data)
/*
* void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
* u32 rounds);
*/
-SYM_FUNC_START(ce_aes_ccm_final)
+SYM_FUNC_START_SECTION(ce_aes_ccm_final)
ld1 {v3.4s}, [x2], #16 /* load first round key */
ld1 {v0.16b}, [x0] /* load mac */
cmp w3, #12 /* which key size? */
@@ -121,7 +121,7 @@ SYM_FUNC_START(ce_aes_ccm_final)
eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
st1 {v0.16b}, [x0] /* store result */
ret
-SYM_FUNC_END(ce_aes_ccm_final)
+SYM_FUNC_END_SECTION(ce_aes_ccm_final)
.macro aes_ccm_do_crypt,enc
ldr x8, [x6, #8] /* load lower ctr */
@@ -212,10 +212,10 @@ CPU_LE( rev x8, x8 )
* u8 const rk[], u32 rounds, u8 mac[],
* u8 ctr[]);
*/
-SYM_FUNC_START(ce_aes_ccm_encrypt)
+SYM_FUNC_START_SECTION(ce_aes_ccm_encrypt)
aes_ccm_do_crypt 1
-SYM_FUNC_END(ce_aes_ccm_encrypt)
+SYM_FUNC_END_SECTION(ce_aes_ccm_encrypt)
-SYM_FUNC_START(ce_aes_ccm_decrypt)
+SYM_FUNC_START_SECTION(ce_aes_ccm_decrypt)
aes_ccm_do_crypt 0
-SYM_FUNC_END(ce_aes_ccm_decrypt)
+SYM_FUNC_END_SECTION(ce_aes_ccm_decrypt)
@@ -8,7 +8,7 @@
.arch armv8-a+crypto
-SYM_FUNC_START(__aes_ce_encrypt)
+SYM_FUNC_START_SECTION(__aes_ce_encrypt)
sub w3, w3, #2
ld1 {v0.16b}, [x2]
ld1 {v1.4s}, [x0], #16
@@ -34,9 +34,9 @@ SYM_FUNC_START(__aes_ce_encrypt)
eor v0.16b, v0.16b, v3.16b
st1 {v0.16b}, [x1]
ret
-SYM_FUNC_END(__aes_ce_encrypt)
+SYM_FUNC_END_SECTION(__aes_ce_encrypt)
-SYM_FUNC_START(__aes_ce_decrypt)
+SYM_FUNC_START_SECTION(__aes_ce_decrypt)
sub w3, w3, #2
ld1 {v0.16b}, [x2]
ld1 {v1.4s}, [x0], #16
@@ -62,23 +62,23 @@ SYM_FUNC_START(__aes_ce_decrypt)
eor v0.16b, v0.16b, v3.16b
st1 {v0.16b}, [x1]
ret
-SYM_FUNC_END(__aes_ce_decrypt)
+SYM_FUNC_END_SECTION(__aes_ce_decrypt)
/*
* __aes_ce_sub() - use the aese instruction to perform the AES sbox
* substitution on each byte in 'input'
*/
-SYM_FUNC_START(__aes_ce_sub)
+SYM_FUNC_START_SECTION(__aes_ce_sub)
dup v1.4s, w0
movi v0.16b, #0
aese v0.16b, v1.16b
umov w0, v0.s[0]
ret
-SYM_FUNC_END(__aes_ce_sub)
+SYM_FUNC_END_SECTION(__aes_ce_sub)
-SYM_FUNC_START(__aes_ce_invert)
+SYM_FUNC_START_SECTION(__aes_ce_invert)
ld1 {v0.4s}, [x1]
aesimc v1.16b, v0.16b
st1 {v1.4s}, [x0]
ret
-SYM_FUNC_END(__aes_ce_invert)
+SYM_FUNC_END_SECTION(__aes_ce_invert)
@@ -9,8 +9,8 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
-#define AES_FUNC_START(func) SYM_FUNC_START(ce_ ## func)
-#define AES_FUNC_END(func) SYM_FUNC_END(ce_ ## func)
+#define AES_FUNC_START(func) SYM_FUNC_START_SECTION(ce_ ## func)
+#define AES_FUNC_END(func) SYM_FUNC_END_SECTION(ce_ ## func)
.arch armv8-a+crypto
@@ -122,11 +122,11 @@ CPU_BE( rev w7, w7 )
ret
.endm
-SYM_FUNC_START(__aes_arm64_encrypt)
+SYM_FUNC_START_SECTION(__aes_arm64_encrypt)
do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
-SYM_FUNC_END(__aes_arm64_encrypt)
+SYM_FUNC_END_SECTION(__aes_arm64_encrypt)
+SYM_FUNC_START_SECTION(__aes_arm64_decrypt)
.align 5
-SYM_FUNC_START(__aes_arm64_decrypt)
do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0
-SYM_FUNC_END(__aes_arm64_decrypt)
+SYM_FUNC_END_SECTION(__aes_arm64_decrypt)
@@ -22,26 +22,26 @@
#define ST5(x...) x
#endif
-SYM_FUNC_START_LOCAL(aes_encrypt_block4x)
+SYM_FUNC_START_LOCAL_SECTION(aes_encrypt_block4x)
encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
ret
-SYM_FUNC_END(aes_encrypt_block4x)
+SYM_FUNC_END_SECTION(aes_encrypt_block4x)
-SYM_FUNC_START_LOCAL(aes_decrypt_block4x)
+SYM_FUNC_START_LOCAL_SECTION(aes_decrypt_block4x)
decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
ret
-SYM_FUNC_END(aes_decrypt_block4x)
+SYM_FUNC_END_SECTION(aes_decrypt_block4x)
#if MAX_STRIDE == 5
-SYM_FUNC_START_LOCAL(aes_encrypt_block5x)
+SYM_FUNC_START_LOCAL_SECTION(aes_encrypt_block5x)
encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
ret
-SYM_FUNC_END(aes_encrypt_block5x)
+SYM_FUNC_END_SECTION(aes_encrypt_block5x)
-SYM_FUNC_START_LOCAL(aes_decrypt_block5x)
+SYM_FUNC_START_LOCAL_SECTION(aes_decrypt_block5x)
decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
ret
-SYM_FUNC_END(aes_decrypt_block5x)
+SYM_FUNC_END_SECTION(aes_decrypt_block5x)
#endif
/*
@@ -8,8 +8,8 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
-#define AES_FUNC_START(func) SYM_FUNC_START(neon_ ## func)
-#define AES_FUNC_END(func) SYM_FUNC_END(neon_ ## func)
+#define AES_FUNC_START(func) SYM_FUNC_START_SECTION(neon_ ## func)
+#define AES_FUNC_END(func) SYM_FUNC_END_SECTION(neon_ ## func)
xtsmask .req v7
cbciv .req v7
@@ -380,7 +380,7 @@ ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f
/*
* void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
*/
-SYM_FUNC_START(aesbs_convert_key)
+SYM_FUNC_START_SECTION(aesbs_convert_key)
ld1 {v7.4s}, [x1], #16 // load round 0 key
ld1 {v17.4s}, [x1], #16 // load round 1 key
@@ -425,10 +425,10 @@ SYM_FUNC_START(aesbs_convert_key)
eor v17.16b, v17.16b, v7.16b
str q17, [x0]
ret
-SYM_FUNC_END(aesbs_convert_key)
+SYM_FUNC_END_SECTION(aesbs_convert_key)
+SYM_FUNC_START_LOCAL_SECTION(aesbs_encrypt8)
.align 4
-SYM_FUNC_START_LOCAL(aesbs_encrypt8)
ldr q9, [bskey], #16 // round 0 key
ldr q8, M0SR
ldr q24, SR
@@ -488,10 +488,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8)
eor v2.16b, v2.16b, v12.16b
eor v5.16b, v5.16b, v12.16b
ret
-SYM_FUNC_END(aesbs_encrypt8)
+SYM_FUNC_END_SECTION(aesbs_encrypt8)
+SYM_FUNC_START_LOCAL_SECTION(aesbs_decrypt8)
.align 4
-SYM_FUNC_START_LOCAL(aesbs_decrypt8)
lsl x9, rounds, #7
add bskey, bskey, x9
@@ -553,7 +553,7 @@ SYM_FUNC_START_LOCAL(aesbs_decrypt8)
eor v3.16b, v3.16b, v12.16b
eor v5.16b, v5.16b, v12.16b
ret
-SYM_FUNC_END(aesbs_decrypt8)
+SYM_FUNC_END_SECTION(aesbs_decrypt8)
/*
* aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
@@ -619,13 +619,13 @@ SYM_FUNC_END(aesbs_decrypt8)
ret
.endm
+SYM_FUNC_START_SECTION(aesbs_ecb_encrypt)
.align 4
-SYM_FUNC_START(aesbs_ecb_encrypt)
__ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
-SYM_FUNC_END(aesbs_ecb_encrypt)
+SYM_FUNC_END_SECTION(aesbs_ecb_encrypt)
+SYM_FUNC_START_SECTION(aesbs_ecb_decrypt)
.align 4
-SYM_FUNC_START(aesbs_ecb_decrypt)
__ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
SYM_FUNC_END(aesbs_ecb_decrypt)
@@ -633,8 +633,8 @@ SYM_FUNC_END(aesbs_ecb_decrypt)
* aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[])
*/
+SYM_FUNC_START_SECTION(aesbs_cbc_decrypt)
.align 4
-SYM_FUNC_START(aesbs_cbc_decrypt)
frame_push 6
mov x19, x0
@@ -718,7 +718,7 @@ SYM_FUNC_START(aesbs_cbc_decrypt)
2: frame_pop
ret
-SYM_FUNC_END(aesbs_cbc_decrypt)
+SYM_FUNC_END_SECTION(aesbs_cbc_decrypt)
.macro next_tweak, out, in, const, tmp
sshr \tmp\().2d, \in\().2d, #63
@@ -734,7 +734,7 @@ SYM_FUNC_END(aesbs_cbc_decrypt)
* aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[])
*/
-SYM_FUNC_START_LOCAL(__xts_crypt8)
+SYM_FUNC_START_LOCAL_SECTION(__xts_crypt8)
mov x6, #1
lsl x6, x6, x23
subs w23, w23, #8
@@ -787,7 +787,7 @@ SYM_FUNC_START_LOCAL(__xts_crypt8)
0: mov bskey, x21
mov rounds, x22
br x16
-SYM_FUNC_END(__xts_crypt8)
+SYM_FUNC_END_SECTION(__xts_crypt8)
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
frame_push 6, 64
@@ -851,13 +851,13 @@ SYM_FUNC_END(__xts_crypt8)
ret
.endm
-SYM_FUNC_START(aesbs_xts_encrypt)
+SYM_FUNC_START_SECTION(aesbs_xts_encrypt)
__xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
-SYM_FUNC_END(aesbs_xts_encrypt)
+SYM_FUNC_END_SECTION(aesbs_xts_encrypt)
-SYM_FUNC_START(aesbs_xts_decrypt)
+SYM_FUNC_START_SECTION(aesbs_xts_decrypt)
__xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
-SYM_FUNC_END(aesbs_xts_decrypt)
+SYM_FUNC_END_SECTION(aesbs_xts_decrypt)
.macro next_ctr, v
mov \v\().d[1], x8
@@ -871,7 +871,7 @@ SYM_FUNC_END(aesbs_xts_decrypt)
* aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
* int rounds, int blocks, u8 iv[], u8 final[])
*/
-SYM_FUNC_START(aesbs_ctr_encrypt)
+SYM_FUNC_START_SECTION(aesbs_ctr_encrypt)
frame_push 8
mov x19, x0
@@ -998,4 +998,4 @@ CPU_LE( rev x8, x8 )
7: cbz x25, 8b
st1 {v5.16b}, [x25]
b 8b
-SYM_FUNC_END(aesbs_ctr_encrypt)
+SYM_FUNC_END_SECTION(aesbs_ctr_encrypt)
@@ -23,7 +23,6 @@
#include <asm/cache.h>
.text
- .align 6
/*
* chacha_permute - permute one block
@@ -36,7 +35,8 @@
*
* Clobbers: w3, x10, v4, v12
*/
-SYM_FUNC_START_LOCAL(chacha_permute)
+SYM_FUNC_START_LOCAL_SECTION(chacha_permute)
+ .align 6
adr_l x10, ROT8
ld1 {v12.4s}, [x10]
@@ -104,9 +104,9 @@ SYM_FUNC_START_LOCAL(chacha_permute)
b.ne .Ldoubleround
ret
-SYM_FUNC_END(chacha_permute)
+SYM_FUNC_END_SECTION(chacha_permute)
-SYM_FUNC_START(chacha_block_xor_neon)
+SYM_FUNC_START_SECTION(chacha_block_xor_neon)
// x0: Input state matrix, s
// x1: 1 data block output, o
// x2: 1 data block input, i
@@ -143,9 +143,9 @@ SYM_FUNC_START(chacha_block_xor_neon)
ldp x29, x30, [sp], #16
ret
-SYM_FUNC_END(chacha_block_xor_neon)
+SYM_FUNC_END_SECTION(chacha_block_xor_neon)
-SYM_FUNC_START(hchacha_block_neon)
+SYM_FUNC_START_SECTION(hchacha_block_neon)
// x0: Input state matrix, s
// x1: output (8 32-bit words)
// w2: nrounds
@@ -163,7 +163,7 @@ SYM_FUNC_START(hchacha_block_neon)
ldp x29, x30, [sp], #16
ret
-SYM_FUNC_END(hchacha_block_neon)
+SYM_FUNC_END_SECTION(hchacha_block_neon)
a0 .req w12
a1 .req w13
@@ -182,8 +182,8 @@ SYM_FUNC_END(hchacha_block_neon)
a14 .req w27
a15 .req w28
+SYM_FUNC_START_SECTION(chacha_4block_xor_neon)
.align 6
-SYM_FUNC_START(chacha_4block_xor_neon)
frame_push 10
// x0: Input state matrix, s
@@ -790,7 +790,7 @@ CPU_BE( rev a15, a15 )
st1 {v28.16b-v31.16b}, [x7] // overlapping stores
3: st1 {v24.16b-v27.16b}, [x1]
b .Lout
-SYM_FUNC_END(chacha_4block_xor_neon)
+SYM_FUNC_END_SECTION(chacha_4block_xor_neon)
.section ".rodata", "a", %progbits
.align L1_CACHE_SHIFT
@@ -131,7 +131,7 @@
tbl bd4.16b, {\bd\().16b}, perm4.16b
.endm
-SYM_FUNC_START_LOCAL(__pmull_p8_core)
+SYM_FUNC_START_LOCAL_SECTION(__pmull_p8_core)
.L__pmull_p8_core:
ext t4.8b, ad.8b, ad.8b, #1 // A1
ext t5.8b, ad.8b, ad.8b, #2 // A2
@@ -194,7 +194,7 @@ SYM_FUNC_START_LOCAL(__pmull_p8_core)
eor t4.16b, t4.16b, t5.16b
eor t6.16b, t6.16b, t3.16b
ret
-SYM_FUNC_END(__pmull_p8_core)
+SYM_FUNC_END_SECTION(__pmull_p8_core)
.macro __pmull_p8, rq, ad, bd, i
.ifnc \bd, fold_consts
@@ -465,21 +465,21 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
//
// Assumes len >= 16.
//
-SYM_FUNC_START(crc_t10dif_pmull_p8)
+SYM_FUNC_START_SECTION(crc_t10dif_pmull_p8)
stp x29, x30, [sp, #-16]!
mov x29, sp
crc_t10dif_pmull p8
-SYM_FUNC_END(crc_t10dif_pmull_p8)
+SYM_FUNC_END_SECTION(crc_t10dif_pmull_p8)
- .align 5
//
// u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
//
// Assumes len >= 16.
//
-SYM_FUNC_START(crc_t10dif_pmull_p64)
+SYM_FUNC_START_SECTION(crc_t10dif_pmull_p64)
+ .align 5
crc_t10dif_pmull p64
-SYM_FUNC_END(crc_t10dif_pmull_p64)
+SYM_FUNC_END_SECTION(crc_t10dif_pmull_p64)
.section ".rodata", "a"
.align 4
@@ -350,13 +350,13 @@ CPU_LE( rev64 T1.16b, T1.16b )
* void pmull_ghash_update(int blocks, u64 dg[], const char *src,
* struct ghash_key const *k, const char *head)
*/
-SYM_FUNC_START(pmull_ghash_update_p64)
+SYM_FUNC_START_SECTION(pmull_ghash_update_p64)
__pmull_ghash p64
-SYM_FUNC_END(pmull_ghash_update_p64)
+SYM_FUNC_END_SECTION(pmull_ghash_update_p64)
-SYM_FUNC_START(pmull_ghash_update_p8)
+SYM_FUNC_START_SECTION(pmull_ghash_update_p8)
__pmull_ghash p8
-SYM_FUNC_END(pmull_ghash_update_p8)
+SYM_FUNC_END_SECTION(pmull_ghash_update_p8)
KS0 .req v8
KS1 .req v9
@@ -602,20 +602,20 @@ CPU_LE( rev w8, w8 )
* struct ghash_key const *k, u64 dg[], u8 ctr[],
* int rounds, u8 tag)
*/
-SYM_FUNC_START(pmull_gcm_encrypt)
+SYM_FUNC_START_SECTION(pmull_gcm_encrypt)
pmull_gcm_do_crypt 1
-SYM_FUNC_END(pmull_gcm_encrypt)
+SYM_FUNC_END_SECTION(pmull_gcm_encrypt)
/*
* void pmull_gcm_decrypt(int blocks, u8 dst[], const u8 src[],
* struct ghash_key const *k, u64 dg[], u8 ctr[],
* int rounds, u8 tag)
*/
-SYM_FUNC_START(pmull_gcm_decrypt)
+SYM_FUNC_START_SECTION(pmull_gcm_decrypt)
pmull_gcm_do_crypt 0
-SYM_FUNC_END(pmull_gcm_decrypt)
+SYM_FUNC_END_SECTION(pmull_gcm_decrypt)
-SYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x)
+SYM_FUNC_START_LOCAL_SECTION(pmull_gcm_ghash_4x)
movi MASK.16b, #0xe1
shl MASK.2d, MASK.2d, #57
@@ -696,9 +696,9 @@ SYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x)
eor XL.16b, XL.16b, T2.16b
ret
-SYM_FUNC_END(pmull_gcm_ghash_4x)
+SYM_FUNC_END_SECTION(pmull_gcm_ghash_4x)
-SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
+SYM_FUNC_START_LOCAL_SECTION(pmull_gcm_enc_4x)
ld1 {KS0.16b}, [x5] // load upper counter
sub w10, w8, #4
sub w11, w8, #3
@@ -761,7 +761,7 @@ SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
eor INP3.16b, INP3.16b, KS3.16b
ret
-SYM_FUNC_END(pmull_gcm_enc_4x)
+SYM_FUNC_END_SECTION(pmull_gcm_enc_4x)
.section ".rodata", "a"
.align 6
@@ -62,7 +62,7 @@
*
* It's guaranteed that message_len % 16 == 0.
*/
-SYM_FUNC_START(nh_neon)
+SYM_FUNC_START_SECTION(nh_neon)
ld1 {K0.4s,K1.4s}, [KEY], #32
movi PASS0_SUMS.2d, #0
@@ -100,4 +100,4 @@ SYM_FUNC_START(nh_neon)
addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
st1 {T0.16b,T1.16b}, [HASH]
ret
-SYM_FUNC_END(nh_neon)
+SYM_FUNC_END_SECTION(nh_neon)
@@ -48,8 +48,12 @@ my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
$code.=<<___;
#ifndef __KERNEL__
+# define SYM_TEXT_SECTION()
+# define SYM_TEXT_END_SECTION
# include "arm_arch.h"
.extern OPENSSL_armcap_P
+#else
+# include <linux/linkage.h>
#endif
.text
@@ -58,6 +62,7 @@ $code.=<<___;
.globl poly1305_blocks
.globl poly1305_emit
+SYM_TEXT_SECTION(poly1305_init)
.globl poly1305_init
.type poly1305_init,%function
.align 5
@@ -107,7 +112,9 @@ poly1305_init:
.Lno_key:
ret
.size poly1305_init,.-poly1305_init
+SYM_TEXT_END_SECTION
+SYM_TEXT_SECTION(poly1305_blocks)
.type poly1305_blocks,%function
.align 5
poly1305_blocks:
@@ -198,7 +205,9 @@ poly1305_blocks:
.Lno_data:
ret
.size poly1305_blocks,.-poly1305_blocks
+SYM_TEXT_END_SECTION
+SYM_TEXT_SECTION(poly1305_emit)
.type poly1305_emit,%function
.align 5
poly1305_emit:
@@ -258,6 +267,7 @@ poly1305_emit:
ret
.size poly1305_emit,.-poly1305_emit
+SYM_TEXT_END_SECTION
___
my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
@@ -270,6 +280,7 @@ my ($in2,$zeros)=("x16","x17");
my $is_base2_26 = $zeros; # borrow
$code.=<<___;
+SYM_TEXT_SECTION(poly1305_mult)
.type poly1305_mult,%function
.align 5
poly1305_mult:
@@ -306,7 +317,9 @@ poly1305_mult:
ret
.size poly1305_mult,.-poly1305_mult
+SYM_TEXT_END_SECTION
+SYM_TEXT_SECTION(poly1305_splat)
.type poly1305_splat,%function
.align 4
poly1305_splat:
@@ -333,7 +346,9 @@ poly1305_splat:
ret
.size poly1305_splat,.-poly1305_splat
+SYM_TEXT_END_SECTION
+SYM_TEXT_SECTION(poly1305_blocks_neon)
#ifdef __KERNEL__
.globl poly1305_blocks_neon
#endif
@@ -888,6 +903,8 @@ poly1305_blocks_neon:
.align 5
.Lzeros:
.long 0,0,0,0,0,0,0,0
+SYM_TEXT_END_SECTION
+
.asciz "Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm"
.align 2
#if !defined(__KERNEL__) && !defined(_WIN64)
@@ -65,7 +65,7 @@
* int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
* int blocks)
*/
-SYM_FUNC_START(sha1_ce_transform)
+SYM_FUNC_START_SECTION(sha1_ce_transform)
/* load round constants */
loadrc k0.4s, 0x5a827999, w6
loadrc k1.4s, 0x6ed9eba1, w6
@@ -147,4 +147,4 @@ CPU_LE( rev32 v11.16b, v11.16b )
str dgb, [x0, #16]
mov w0, w2
ret
-SYM_FUNC_END(sha1_ce_transform)
+SYM_FUNC_END_SECTION(sha1_ce_transform)
@@ -75,7 +75,7 @@
* int blocks)
*/
.text
-SYM_FUNC_START(sha2_ce_transform)
+SYM_FUNC_START_SECTION(sha2_ce_transform)
/* load round constants */
adr_l x8, .Lsha2_rcon
ld1 { v0.4s- v3.4s}, [x8], #64
@@ -154,4 +154,4 @@ CPU_LE( rev32 v19.16b, v19.16b )
3: st1 {dgav.4s, dgbv.4s}, [x0]
mov w0, w2
ret
-SYM_FUNC_END(sha2_ce_transform)
+SYM_FUNC_END_SECTION(sha2_ce_transform)
@@ -40,7 +40,7 @@
* int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
*/
.text
-SYM_FUNC_START(sha3_ce_transform)
+SYM_FUNC_START_SECTION(sha3_ce_transform)
/* load state */
add x8, x0, #32
ld1 { v0.1d- v3.1d}, [x0]
@@ -197,7 +197,7 @@ SYM_FUNC_START(sha3_ce_transform)
st1 {v24.1d}, [x0]
mov w0, w2
ret
-SYM_FUNC_END(sha3_ce_transform)
+SYM_FUNC_END_SECTION(sha3_ce_transform)
.section ".rodata", "a"
.align 8
@@ -195,11 +195,16 @@ ___
$code.=<<___;
#ifndef __KERNEL__
# include "arm_arch.h"
+# define SYM_TEXT_SECTION()
+# define SYM_TEXT_END_SECTION
+#else
+# include <linux/linkage.h>
#endif
.text
.extern OPENSSL_armcap_P
+SYM_TEXT_SECTION($func)
.globl $func
.type $func,%function
.align 6
@@ -285,7 +290,9 @@ $code.=<<___;
ldp x29,x30,[sp],#128
ret
.size $func,.-$func
+SYM_TEXT_END_SECTION
+SYM_TEXT_SECTION(K$BITS)
.align 6
.type .LK$BITS,%object
.LK$BITS:
@@ -354,6 +361,8 @@ $code.=<<___ if ($SZ==4);
___
$code.=<<___;
.size .LK$BITS,.-.LK$BITS
+SYM_TEXT_END_SECTION
+
#ifndef __KERNEL__
.align 3
.LOPENSSL_armcap_P:
@@ -637,6 +646,7 @@ sub body_00_15 () {
}
$code.=<<___;
+SYM_TEXT_SECTION(sha256_block_neon)
#ifdef __KERNEL__
.globl sha256_block_neon
#endif
@@ -736,6 +746,7 @@ $code.=<<___;
add sp,sp,#16*4+16
ret
.size sha256_block_neon,.-sha256_block_neon
+SYM_TEXT_END_SECTION
___
}
@@ -106,7 +106,7 @@
* int blocks)
*/
.text
-SYM_FUNC_START(sha512_ce_transform)
+SYM_FUNC_START_SECTION(sha512_ce_transform)
/* load state */
ld1 {v8.2d-v11.2d}, [x0]
@@ -203,4 +203,4 @@ CPU_LE( rev64 v19.16b, v19.16b )
3: st1 {v8.2d-v11.2d}, [x0]
mov w0, w2
ret
-SYM_FUNC_END(sha512_ce_transform)
+SYM_FUNC_END_SECTION(sha512_ce_transform)
@@ -73,7 +73,7 @@
* int blocks)
*/
.text
-SYM_FUNC_START(sm3_ce_transform)
+SYM_FUNC_START_SECTION(sm3_ce_transform)
/* load state */
ld1 {v8.4s-v9.4s}, [x0]
rev64 v8.4s, v8.4s
@@ -131,7 +131,7 @@ CPU_LE( rev32 v3.16b, v3.16b )
ext v9.16b, v9.16b, v9.16b, #8
st1 {v8.4s-v9.4s}, [x0]
ret
-SYM_FUNC_END(sm3_ce_transform)
+SYM_FUNC_END_SECTION(sm3_ce_transform)
.section ".rodata", "a"
.align 3
@@ -15,7 +15,7 @@
* void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
*/
.text
-SYM_FUNC_START(sm4_ce_do_crypt)
+SYM_FUNC_START_SECTION(sm4_ce_do_crypt)
ld1 {v8.4s}, [x2]
ld1 {v0.4s-v3.4s}, [x0], #64
CPU_LE( rev32 v8.16b, v8.16b )
@@ -33,4 +33,4 @@ CPU_LE( rev32 v8.16b, v8.16b )
CPU_LE( rev32 v8.16b, v8.16b )
st1 {v8.4s}, [x1]
ret
-SYM_FUNC_END(sm4_ce_do_crypt)
+SYM_FUNC_END_SECTION(sm4_ce_do_crypt)
The resulting LD script generated by FG-KASLR script contains a size assertion for the input .text function. In case if it's not empty, the build will stop plug a potentional layout leakage. As FG-KASLR for modules tends to be arch-independent, we should take care of the modular ASM code of every architecture to not break the build. This is the ARM64 part. Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com> --- arch/arm64/crypto/aes-ce-ccm-core.S | 16 +++++------ arch/arm64/crypto/aes-ce-core.S | 16 +++++------ arch/arm64/crypto/aes-ce.S | 4 +-- arch/arm64/crypto/aes-cipher-core.S | 8 +++--- arch/arm64/crypto/aes-modes.S | 16 +++++------ arch/arm64/crypto/aes-neon.S | 4 +-- arch/arm64/crypto/aes-neonbs-core.S | 38 +++++++++++++-------------- arch/arm64/crypto/chacha-neon-core.S | 18 ++++++------- arch/arm64/crypto/crct10dif-ce-core.S | 14 +++++----- arch/arm64/crypto/ghash-ce-core.S | 24 ++++++++--------- arch/arm64/crypto/nh-neon-core.S | 4 +-- arch/arm64/crypto/poly1305-armv8.pl | 17 ++++++++++++ arch/arm64/crypto/sha1-ce-core.S | 4 +-- arch/arm64/crypto/sha2-ce-core.S | 4 +-- arch/arm64/crypto/sha3-ce-core.S | 4 +-- arch/arm64/crypto/sha512-armv8.pl | 11 ++++++++ arch/arm64/crypto/sha512-ce-core.S | 4 +-- arch/arm64/crypto/sm3-ce-core.S | 4 +-- arch/arm64/crypto/sm4-ce-core.S | 4 +-- 19 files changed, 121 insertions(+), 93 deletions(-)