From patchwork Thu Dec 2 22:32:10 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alexander Lobakin X-Patchwork-Id: 12653695 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6D494C433EF for ; Thu, 2 Dec 2021 22:33:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1377312AbhLBWgw (ORCPT ); Thu, 2 Dec 2021 17:36:52 -0500 Received: from mga11.intel.com ([192.55.52.93]:40642 "EHLO mga11.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1349599AbhLBWgl (ORCPT ); Thu, 2 Dec 2021 17:36:41 -0500 X-IronPort-AV: E=McAfee;i="6200,9189,10186"; a="234366350" X-IronPort-AV: E=Sophos;i="5.87,282,1631602800"; d="scan'208";a="234366350" Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga102.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 02 Dec 2021 14:33:17 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.87,282,1631602800"; d="scan'208";a="459836433" Received: from irvmail001.ir.intel.com ([10.43.11.63]) by orsmga003.jf.intel.com with ESMTP; 02 Dec 2021 14:33:09 -0800 Received: from newjersey.igk.intel.com (newjersey.igk.intel.com [10.102.20.203]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id 1B2MWmYb028552; Thu, 2 Dec 2021 22:33:07 GMT From: Alexander Lobakin To: linux-hardening@vger.kernel.org, x86@kernel.org Cc: Alexander Lobakin , Jesse Brandeburg , Kristen Carlson Accardi , Kees Cook , Miklos Szeredi , Ard Biesheuvel , Tony Luck , Bruce Schlobohm , Jessica Yu , kernel test robot , Miroslav Benes , Evgenii Shatokhin , Jonathan Corbet , Masahiro Yamada , Michal Marek , Nick Desaulniers , Herbert Xu , "David S. Miller" , Thomas Gleixner , Will Deacon , Ingo Molnar , Borislav Petkov , Dave Hansen , "H. Peter Anvin" , Andy Lutomirski , Peter Zijlstra , Arnd Bergmann , Josh Poimboeuf , Nathan Chancellor , Masami Hiramatsu , Marios Pomonis , Sami Tolvanen , linux-kernel@vger.kernel.org, linux-kbuild@vger.kernel.org, linux-arch@vger.kernel.org, live-patching@vger.kernel.org, llvm@lists.linux.dev Subject: [PATCH v8 10/14] arm64/crypto: conditionally place ASM functions into separate sections Date: Thu, 2 Dec 2021 23:32:10 +0100 Message-Id: <20211202223214.72888-11-alexandr.lobakin@intel.com> X-Mailer: git-send-email 2.33.1 In-Reply-To: <20211202223214.72888-1-alexandr.lobakin@intel.com> References: <20211202223214.72888-1-alexandr.lobakin@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-hardening@vger.kernel.org The resulting LD script generated by FG-KASLR script contains a size assertion for the input .text function. In case if it's not empty, the build will stop plug a potentional layout leakage. As FG-KASLR for modules tends to be arch-independent, we should take care of the modular ASM code of every architecture to not break the build. This is the ARM64 part. Signed-off-by: Alexander Lobakin --- arch/arm64/crypto/aes-ce-ccm-core.S | 16 +++++------ arch/arm64/crypto/aes-ce-core.S | 16 +++++------ arch/arm64/crypto/aes-ce.S | 4 +-- arch/arm64/crypto/aes-cipher-core.S | 8 +++--- arch/arm64/crypto/aes-modes.S | 16 +++++------ arch/arm64/crypto/aes-neon.S | 4 +-- arch/arm64/crypto/aes-neonbs-core.S | 38 +++++++++++++-------------- arch/arm64/crypto/chacha-neon-core.S | 18 ++++++------- arch/arm64/crypto/crct10dif-ce-core.S | 14 +++++----- arch/arm64/crypto/ghash-ce-core.S | 24 ++++++++--------- arch/arm64/crypto/nh-neon-core.S | 4 +-- arch/arm64/crypto/poly1305-armv8.pl | 17 ++++++++++++ arch/arm64/crypto/sha1-ce-core.S | 4 +-- arch/arm64/crypto/sha2-ce-core.S | 4 +-- arch/arm64/crypto/sha3-ce-core.S | 4 +-- arch/arm64/crypto/sha512-armv8.pl | 11 ++++++++ arch/arm64/crypto/sha512-ce-core.S | 4 +-- arch/arm64/crypto/sm3-ce-core.S | 4 +-- arch/arm64/crypto/sm4-ce-core.S | 4 +-- 19 files changed, 121 insertions(+), 93 deletions(-) diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index b03f7f71f893..03c8606f45b4 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -15,7 +15,7 @@ * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, * u32 macp, u8 const rk[], u32 rounds); */ -SYM_FUNC_START(ce_aes_ccm_auth_data) +SYM_FUNC_START_SECTION(ce_aes_ccm_auth_data) ld1 {v0.16b}, [x0] /* load mac */ cbz w3, 1f sub w3, w3, #16 @@ -80,13 +80,13 @@ SYM_FUNC_START(ce_aes_ccm_auth_data) st1 {v0.16b}, [x0] 10: mov w0, w3 ret -SYM_FUNC_END(ce_aes_ccm_auth_data) +SYM_FUNC_END_SECTION(ce_aes_ccm_auth_data) /* * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], * u32 rounds); */ -SYM_FUNC_START(ce_aes_ccm_final) +SYM_FUNC_START_SECTION(ce_aes_ccm_final) ld1 {v3.4s}, [x2], #16 /* load first round key */ ld1 {v0.16b}, [x0] /* load mac */ cmp w3, #12 /* which key size? */ @@ -120,7 +120,7 @@ SYM_FUNC_START(ce_aes_ccm_final) eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ st1 {v0.16b}, [x0] /* store result */ ret -SYM_FUNC_END(ce_aes_ccm_final) +SYM_FUNC_END_SECTION(ce_aes_ccm_final) .macro aes_ccm_do_crypt,enc cbz x2, 5f @@ -212,10 +212,10 @@ CPU_LE( rev x8, x8 ) * u8 const rk[], u32 rounds, u8 mac[], * u8 ctr[]); */ -SYM_FUNC_START(ce_aes_ccm_encrypt) +SYM_FUNC_START_SECTION(ce_aes_ccm_encrypt) aes_ccm_do_crypt 1 -SYM_FUNC_END(ce_aes_ccm_encrypt) +SYM_FUNC_END_SECTION(ce_aes_ccm_encrypt) -SYM_FUNC_START(ce_aes_ccm_decrypt) +SYM_FUNC_START_SECTION(ce_aes_ccm_decrypt) aes_ccm_do_crypt 0 -SYM_FUNC_END(ce_aes_ccm_decrypt) +SYM_FUNC_END_SECTION(ce_aes_ccm_decrypt) diff --git a/arch/arm64/crypto/aes-ce-core.S b/arch/arm64/crypto/aes-ce-core.S index e52e13eb8fdb..abe6ee0501bf 100644 --- a/arch/arm64/crypto/aes-ce-core.S +++ b/arch/arm64/crypto/aes-ce-core.S @@ -8,7 +8,7 @@ .arch armv8-a+crypto -SYM_FUNC_START(__aes_ce_encrypt) +SYM_FUNC_START_SECTION(__aes_ce_encrypt) sub w3, w3, #2 ld1 {v0.16b}, [x2] ld1 {v1.4s}, [x0], #16 @@ -34,9 +34,9 @@ SYM_FUNC_START(__aes_ce_encrypt) eor v0.16b, v0.16b, v3.16b st1 {v0.16b}, [x1] ret -SYM_FUNC_END(__aes_ce_encrypt) +SYM_FUNC_END_SECTION(__aes_ce_encrypt) -SYM_FUNC_START(__aes_ce_decrypt) +SYM_FUNC_START_SECTION(__aes_ce_decrypt) sub w3, w3, #2 ld1 {v0.16b}, [x2] ld1 {v1.4s}, [x0], #16 @@ -62,23 +62,23 @@ SYM_FUNC_START(__aes_ce_decrypt) eor v0.16b, v0.16b, v3.16b st1 {v0.16b}, [x1] ret -SYM_FUNC_END(__aes_ce_decrypt) +SYM_FUNC_END_SECTION(__aes_ce_decrypt) /* * __aes_ce_sub() - use the aese instruction to perform the AES sbox * substitution on each byte in 'input' */ -SYM_FUNC_START(__aes_ce_sub) +SYM_FUNC_START_SECTION(__aes_ce_sub) dup v1.4s, w0 movi v0.16b, #0 aese v0.16b, v1.16b umov w0, v0.s[0] ret -SYM_FUNC_END(__aes_ce_sub) +SYM_FUNC_END_SECTION(__aes_ce_sub) -SYM_FUNC_START(__aes_ce_invert) +SYM_FUNC_START_SECTION(__aes_ce_invert) ld1 {v0.4s}, [x1] aesimc v1.16b, v0.16b st1 {v1.4s}, [x0] ret -SYM_FUNC_END(__aes_ce_invert) +SYM_FUNC_END_SECTION(__aes_ce_invert) diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 1dc5bbbfeed2..909d2dcf0907 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -9,8 +9,8 @@ #include #include -#define AES_FUNC_START(func) SYM_FUNC_START(ce_ ## func) -#define AES_FUNC_END(func) SYM_FUNC_END(ce_ ## func) +#define AES_FUNC_START(func) SYM_FUNC_START_SECTION(ce_ ## func) +#define AES_FUNC_END(func) SYM_FUNC_END_SECTION(ce_ ## func) .arch armv8-a+crypto diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S index c9d6955f8404..e47c0aef7a7d 100644 --- a/arch/arm64/crypto/aes-cipher-core.S +++ b/arch/arm64/crypto/aes-cipher-core.S @@ -122,11 +122,11 @@ CPU_BE( rev w7, w7 ) ret .endm -SYM_FUNC_START(__aes_arm64_encrypt) +SYM_FUNC_START_SECTION(__aes_arm64_encrypt) do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 -SYM_FUNC_END(__aes_arm64_encrypt) +SYM_FUNC_END_SECTION(__aes_arm64_encrypt) +SYM_FUNC_START_SECTION(__aes_arm64_decrypt) .align 5 -SYM_FUNC_START(__aes_arm64_decrypt) do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 -SYM_FUNC_END(__aes_arm64_decrypt) +SYM_FUNC_END_SECTION(__aes_arm64_decrypt) diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index b495de22bb38..5f7a43fa8438 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -22,26 +22,26 @@ #define ST5(x...) x #endif -SYM_FUNC_START_LOCAL(aes_encrypt_block4x) +SYM_FUNC_START_LOCAL_SECTION(aes_encrypt_block4x) encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 ret -SYM_FUNC_END(aes_encrypt_block4x) +SYM_FUNC_END_SECTION(aes_encrypt_block4x) -SYM_FUNC_START_LOCAL(aes_decrypt_block4x) +SYM_FUNC_START_LOCAL_SECTION(aes_decrypt_block4x) decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 ret -SYM_FUNC_END(aes_decrypt_block4x) +SYM_FUNC_END_SECTION(aes_decrypt_block4x) #if MAX_STRIDE == 5 -SYM_FUNC_START_LOCAL(aes_encrypt_block5x) +SYM_FUNC_START_LOCAL_SECTION(aes_encrypt_block5x) encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 ret -SYM_FUNC_END(aes_encrypt_block5x) +SYM_FUNC_END_SECTION(aes_encrypt_block5x) -SYM_FUNC_START_LOCAL(aes_decrypt_block5x) +SYM_FUNC_START_LOCAL_SECTION(aes_decrypt_block5x) decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 ret -SYM_FUNC_END(aes_decrypt_block5x) +SYM_FUNC_END_SECTION(aes_decrypt_block5x) #endif /* diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index e47d3ec2cfb4..9c8d6cccd2cd 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -8,8 +8,8 @@ #include #include -#define AES_FUNC_START(func) SYM_FUNC_START(neon_ ## func) -#define AES_FUNC_END(func) SYM_FUNC_END(neon_ ## func) +#define AES_FUNC_START(func) SYM_FUNC_START_SECTION(neon_ ## func) +#define AES_FUNC_END(func) SYM_FUNC_END_SECTION(neon_ ## func) xtsmask .req v7 cbciv .req v7 diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index a3405b8c344b..582343f18ad0 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -380,7 +380,7 @@ ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f /* * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) */ -SYM_FUNC_START(aesbs_convert_key) +SYM_FUNC_START_SECTION(aesbs_convert_key) ld1 {v7.4s}, [x1], #16 // load round 0 key ld1 {v17.4s}, [x1], #16 // load round 1 key @@ -425,10 +425,10 @@ SYM_FUNC_START(aesbs_convert_key) eor v17.16b, v17.16b, v7.16b str q17, [x0] ret -SYM_FUNC_END(aesbs_convert_key) +SYM_FUNC_END_SECTION(aesbs_convert_key) +SYM_FUNC_START_LOCAL_SECTION(aesbs_encrypt8) .align 4 -SYM_FUNC_START_LOCAL(aesbs_encrypt8) ldr q9, [bskey], #16 // round 0 key ldr q8, M0SR ldr q24, SR @@ -488,10 +488,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8) eor v2.16b, v2.16b, v12.16b eor v5.16b, v5.16b, v12.16b ret -SYM_FUNC_END(aesbs_encrypt8) +SYM_FUNC_END_SECTION(aesbs_encrypt8) +SYM_FUNC_START_LOCAL_SECTION(aesbs_decrypt8) .align 4 -SYM_FUNC_START_LOCAL(aesbs_decrypt8) lsl x9, rounds, #7 add bskey, bskey, x9 @@ -553,7 +553,7 @@ SYM_FUNC_START_LOCAL(aesbs_decrypt8) eor v3.16b, v3.16b, v12.16b eor v5.16b, v5.16b, v12.16b ret -SYM_FUNC_END(aesbs_decrypt8) +SYM_FUNC_END_SECTION(aesbs_decrypt8) /* * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, @@ -619,13 +619,13 @@ SYM_FUNC_END(aesbs_decrypt8) ret .endm +SYM_FUNC_START_SECTION(aesbs_ecb_encrypt) .align 4 -SYM_FUNC_START(aesbs_ecb_encrypt) __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 -SYM_FUNC_END(aesbs_ecb_encrypt) +SYM_FUNC_END_SECTION(aesbs_ecb_encrypt) +SYM_FUNC_START_SECTION(aesbs_ecb_decrypt) .align 4 -SYM_FUNC_START(aesbs_ecb_decrypt) __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 SYM_FUNC_END(aesbs_ecb_decrypt) @@ -633,8 +633,8 @@ SYM_FUNC_END(aesbs_ecb_decrypt) * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[]) */ +SYM_FUNC_START_SECTION(aesbs_cbc_decrypt) .align 4 -SYM_FUNC_START(aesbs_cbc_decrypt) frame_push 6 mov x19, x0 @@ -718,7 +718,7 @@ SYM_FUNC_START(aesbs_cbc_decrypt) 2: frame_pop ret -SYM_FUNC_END(aesbs_cbc_decrypt) +SYM_FUNC_END_SECTION(aesbs_cbc_decrypt) .macro next_tweak, out, in, const, tmp sshr \tmp\().2d, \in\().2d, #63 @@ -734,7 +734,7 @@ SYM_FUNC_END(aesbs_cbc_decrypt) * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[]) */ -SYM_FUNC_START_LOCAL(__xts_crypt8) +SYM_FUNC_START_LOCAL_SECTION(__xts_crypt8) mov x6, #1 lsl x6, x6, x23 subs w23, w23, #8 @@ -787,7 +787,7 @@ SYM_FUNC_START_LOCAL(__xts_crypt8) 0: mov bskey, x21 mov rounds, x22 br x16 -SYM_FUNC_END(__xts_crypt8) +SYM_FUNC_END_SECTION(__xts_crypt8) .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 frame_push 6, 64 @@ -851,13 +851,13 @@ SYM_FUNC_END(__xts_crypt8) ret .endm -SYM_FUNC_START(aesbs_xts_encrypt) +SYM_FUNC_START_SECTION(aesbs_xts_encrypt) __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 -SYM_FUNC_END(aesbs_xts_encrypt) +SYM_FUNC_END_SECTION(aesbs_xts_encrypt) -SYM_FUNC_START(aesbs_xts_decrypt) +SYM_FUNC_START_SECTION(aesbs_xts_decrypt) __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 -SYM_FUNC_END(aesbs_xts_decrypt) +SYM_FUNC_END_SECTION(aesbs_xts_decrypt) .macro next_ctr, v mov \v\().d[1], x8 @@ -871,7 +871,7 @@ SYM_FUNC_END(aesbs_xts_decrypt) * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], * int rounds, int blocks, u8 iv[], u8 final[]) */ -SYM_FUNC_START(aesbs_ctr_encrypt) +SYM_FUNC_START_SECTION(aesbs_ctr_encrypt) frame_push 8 mov x19, x0 @@ -998,4 +998,4 @@ CPU_LE( rev x8, x8 ) 7: cbz x25, 8b st1 {v5.16b}, [x25] b 8b -SYM_FUNC_END(aesbs_ctr_encrypt) +SYM_FUNC_END_SECTION(aesbs_ctr_encrypt) diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S index b70ac76f2610..34a3087055f8 100644 --- a/arch/arm64/crypto/chacha-neon-core.S +++ b/arch/arm64/crypto/chacha-neon-core.S @@ -23,7 +23,6 @@ #include .text - .align 6 /* * chacha_permute - permute one block @@ -36,7 +35,8 @@ * * Clobbers: w3, x10, v4, v12 */ -SYM_FUNC_START_LOCAL(chacha_permute) +SYM_FUNC_START_LOCAL_SECTION(chacha_permute) + .align 6 adr_l x10, ROT8 ld1 {v12.4s}, [x10] @@ -104,9 +104,9 @@ SYM_FUNC_START_LOCAL(chacha_permute) b.ne .Ldoubleround ret -SYM_FUNC_END(chacha_permute) +SYM_FUNC_END_SECTION(chacha_permute) -SYM_FUNC_START(chacha_block_xor_neon) +SYM_FUNC_START_SECTION(chacha_block_xor_neon) // x0: Input state matrix, s // x1: 1 data block output, o // x2: 1 data block input, i @@ -143,9 +143,9 @@ SYM_FUNC_START(chacha_block_xor_neon) ldp x29, x30, [sp], #16 ret -SYM_FUNC_END(chacha_block_xor_neon) +SYM_FUNC_END_SECTION(chacha_block_xor_neon) -SYM_FUNC_START(hchacha_block_neon) +SYM_FUNC_START_SECTION(hchacha_block_neon) // x0: Input state matrix, s // x1: output (8 32-bit words) // w2: nrounds @@ -163,7 +163,7 @@ SYM_FUNC_START(hchacha_block_neon) ldp x29, x30, [sp], #16 ret -SYM_FUNC_END(hchacha_block_neon) +SYM_FUNC_END_SECTION(hchacha_block_neon) a0 .req w12 a1 .req w13 @@ -182,8 +182,8 @@ SYM_FUNC_END(hchacha_block_neon) a14 .req w27 a15 .req w28 +SYM_FUNC_START_SECTION(chacha_4block_xor_neon) .align 6 -SYM_FUNC_START(chacha_4block_xor_neon) frame_push 10 // x0: Input state matrix, s @@ -790,7 +790,7 @@ CPU_BE( rev a15, a15 ) st1 {v28.16b-v31.16b}, [x7] // overlapping stores 3: st1 {v24.16b-v27.16b}, [x1] b .Lout -SYM_FUNC_END(chacha_4block_xor_neon) +SYM_FUNC_END_SECTION(chacha_4block_xor_neon) .section ".rodata", "a", %progbits .align L1_CACHE_SHIFT diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S index dce6dcebfca1..54e121a56895 100644 --- a/arch/arm64/crypto/crct10dif-ce-core.S +++ b/arch/arm64/crypto/crct10dif-ce-core.S @@ -131,7 +131,7 @@ tbl bd4.16b, {\bd\().16b}, perm4.16b .endm -SYM_FUNC_START_LOCAL(__pmull_p8_core) +SYM_FUNC_START_LOCAL_SECTION(__pmull_p8_core) .L__pmull_p8_core: ext t4.8b, ad.8b, ad.8b, #1 // A1 ext t5.8b, ad.8b, ad.8b, #2 // A2 @@ -194,7 +194,7 @@ SYM_FUNC_START_LOCAL(__pmull_p8_core) eor t4.16b, t4.16b, t5.16b eor t6.16b, t6.16b, t3.16b ret -SYM_FUNC_END(__pmull_p8_core) +SYM_FUNC_END_SECTION(__pmull_p8_core) .macro __pmull_p8, rq, ad, bd, i .ifnc \bd, fold_consts @@ -465,21 +465,21 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) // // Assumes len >= 16. // -SYM_FUNC_START(crc_t10dif_pmull_p8) +SYM_FUNC_START_SECTION(crc_t10dif_pmull_p8) stp x29, x30, [sp, #-16]! mov x29, sp crc_t10dif_pmull p8 -SYM_FUNC_END(crc_t10dif_pmull_p8) +SYM_FUNC_END_SECTION(crc_t10dif_pmull_p8) - .align 5 // // u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len); // // Assumes len >= 16. // -SYM_FUNC_START(crc_t10dif_pmull_p64) +SYM_FUNC_START_SECTION(crc_t10dif_pmull_p64) + .align 5 crc_t10dif_pmull p64 -SYM_FUNC_END(crc_t10dif_pmull_p64) +SYM_FUNC_END_SECTION(crc_t10dif_pmull_p64) .section ".rodata", "a" .align 4 diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index 7868330dd54e..a69c1d4479db 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -350,13 +350,13 @@ CPU_LE( rev64 T1.16b, T1.16b ) * void pmull_ghash_update(int blocks, u64 dg[], const char *src, * struct ghash_key const *k, const char *head) */ -SYM_FUNC_START(pmull_ghash_update_p64) +SYM_FUNC_START_SECTION(pmull_ghash_update_p64) __pmull_ghash p64 -SYM_FUNC_END(pmull_ghash_update_p64) +SYM_FUNC_END_SECTION(pmull_ghash_update_p64) -SYM_FUNC_START(pmull_ghash_update_p8) +SYM_FUNC_START_SECTION(pmull_ghash_update_p8) __pmull_ghash p8 -SYM_FUNC_END(pmull_ghash_update_p8) +SYM_FUNC_END_SECTION(pmull_ghash_update_p8) KS0 .req v8 KS1 .req v9 @@ -602,20 +602,20 @@ CPU_LE( rev w8, w8 ) * struct ghash_key const *k, u64 dg[], u8 ctr[], * int rounds, u8 tag) */ -SYM_FUNC_START(pmull_gcm_encrypt) +SYM_FUNC_START_SECTION(pmull_gcm_encrypt) pmull_gcm_do_crypt 1 -SYM_FUNC_END(pmull_gcm_encrypt) +SYM_FUNC_END_SECTION(pmull_gcm_encrypt) /* * void pmull_gcm_decrypt(int blocks, u8 dst[], const u8 src[], * struct ghash_key const *k, u64 dg[], u8 ctr[], * int rounds, u8 tag) */ -SYM_FUNC_START(pmull_gcm_decrypt) +SYM_FUNC_START_SECTION(pmull_gcm_decrypt) pmull_gcm_do_crypt 0 -SYM_FUNC_END(pmull_gcm_decrypt) +SYM_FUNC_END_SECTION(pmull_gcm_decrypt) -SYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x) +SYM_FUNC_START_LOCAL_SECTION(pmull_gcm_ghash_4x) movi MASK.16b, #0xe1 shl MASK.2d, MASK.2d, #57 @@ -696,9 +696,9 @@ SYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x) eor XL.16b, XL.16b, T2.16b ret -SYM_FUNC_END(pmull_gcm_ghash_4x) +SYM_FUNC_END_SECTION(pmull_gcm_ghash_4x) -SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x) +SYM_FUNC_START_LOCAL_SECTION(pmull_gcm_enc_4x) ld1 {KS0.16b}, [x5] // load upper counter sub w10, w8, #4 sub w11, w8, #3 @@ -761,7 +761,7 @@ SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x) eor INP3.16b, INP3.16b, KS3.16b ret -SYM_FUNC_END(pmull_gcm_enc_4x) +SYM_FUNC_END_SECTION(pmull_gcm_enc_4x) .section ".rodata", "a" .align 6 diff --git a/arch/arm64/crypto/nh-neon-core.S b/arch/arm64/crypto/nh-neon-core.S index 51c0a534ef87..cb354d3f7e7b 100644 --- a/arch/arm64/crypto/nh-neon-core.S +++ b/arch/arm64/crypto/nh-neon-core.S @@ -62,7 +62,7 @@ * * It's guaranteed that message_len % 16 == 0. */ -SYM_FUNC_START(nh_neon) +SYM_FUNC_START_SECTION(nh_neon) ld1 {K0.4s,K1.4s}, [KEY], #32 movi PASS0_SUMS.2d, #0 @@ -100,4 +100,4 @@ SYM_FUNC_START(nh_neon) addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d st1 {T0.16b,T1.16b}, [HASH] ret -SYM_FUNC_END(nh_neon) +SYM_FUNC_END_SECTION(nh_neon) diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl index cbc980fb02e3..039e6a9ce68c 100644 --- a/arch/arm64/crypto/poly1305-armv8.pl +++ b/arch/arm64/crypto/poly1305-armv8.pl @@ -48,8 +48,12 @@ my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14)); $code.=<<___; #ifndef __KERNEL__ +# define SYM_TEXT_SECTION() +# define SYM_TEXT_END_SECTION # include "arm_arch.h" .extern OPENSSL_armcap_P +#else +# include #endif .text @@ -58,6 +62,7 @@ $code.=<<___; .globl poly1305_blocks .globl poly1305_emit +SYM_TEXT_SECTION(poly1305_init) .globl poly1305_init .type poly1305_init,%function .align 5 @@ -107,7 +112,9 @@ poly1305_init: .Lno_key: ret .size poly1305_init,.-poly1305_init +SYM_TEXT_END_SECTION +SYM_TEXT_SECTION(poly1305_blocks) .type poly1305_blocks,%function .align 5 poly1305_blocks: @@ -198,7 +205,9 @@ poly1305_blocks: .Lno_data: ret .size poly1305_blocks,.-poly1305_blocks +SYM_TEXT_END_SECTION +SYM_TEXT_SECTION(poly1305_emit) .type poly1305_emit,%function .align 5 poly1305_emit: @@ -258,6 +267,7 @@ poly1305_emit: ret .size poly1305_emit,.-poly1305_emit +SYM_TEXT_END_SECTION ___ my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8)); my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13)); @@ -270,6 +280,7 @@ my ($in2,$zeros)=("x16","x17"); my $is_base2_26 = $zeros; # borrow $code.=<<___; +SYM_TEXT_SECTION(poly1305_mult) .type poly1305_mult,%function .align 5 poly1305_mult: @@ -306,7 +317,9 @@ poly1305_mult: ret .size poly1305_mult,.-poly1305_mult +SYM_TEXT_END_SECTION +SYM_TEXT_SECTION(poly1305_splat) .type poly1305_splat,%function .align 4 poly1305_splat: @@ -333,7 +346,9 @@ poly1305_splat: ret .size poly1305_splat,.-poly1305_splat +SYM_TEXT_END_SECTION +SYM_TEXT_SECTION(poly1305_blocks_neon) #ifdef __KERNEL__ .globl poly1305_blocks_neon #endif @@ -888,6 +903,8 @@ poly1305_blocks_neon: .align 5 .Lzeros: .long 0,0,0,0,0,0,0,0 +SYM_TEXT_END_SECTION + .asciz "Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm" .align 2 #if !defined(__KERNEL__) && !defined(_WIN64) diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 889ca0f8972b..2ba5f8ea39fc 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -65,7 +65,7 @@ * int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, * int blocks) */ -SYM_FUNC_START(sha1_ce_transform) +SYM_FUNC_START_SECTION(sha1_ce_transform) /* load round constants */ loadrc k0.4s, 0x5a827999, w6 loadrc k1.4s, 0x6ed9eba1, w6 @@ -147,4 +147,4 @@ CPU_LE( rev32 v11.16b, v11.16b ) str dgb, [x0, #16] mov w0, w2 ret -SYM_FUNC_END(sha1_ce_transform) +SYM_FUNC_END_SECTION(sha1_ce_transform) diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 491179922f49..6c1a4a128355 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -75,7 +75,7 @@ * int blocks) */ .text -SYM_FUNC_START(sha2_ce_transform) +SYM_FUNC_START_SECTION(sha2_ce_transform) /* load round constants */ adr_l x8, .Lsha2_rcon ld1 { v0.4s- v3.4s}, [x8], #64 @@ -154,4 +154,4 @@ CPU_LE( rev32 v19.16b, v19.16b ) 3: st1 {dgav.4s, dgbv.4s}, [x0] mov w0, w2 ret -SYM_FUNC_END(sha2_ce_transform) +SYM_FUNC_END_SECTION(sha2_ce_transform) diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S index 9c77313f5a60..6105cc815c9a 100644 --- a/arch/arm64/crypto/sha3-ce-core.S +++ b/arch/arm64/crypto/sha3-ce-core.S @@ -40,7 +40,7 @@ * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) */ .text -SYM_FUNC_START(sha3_ce_transform) +SYM_FUNC_START_SECTION(sha3_ce_transform) /* load state */ add x8, x0, #32 ld1 { v0.1d- v3.1d}, [x0] @@ -197,7 +197,7 @@ SYM_FUNC_START(sha3_ce_transform) st1 {v24.1d}, [x0] mov w0, w2 ret -SYM_FUNC_END(sha3_ce_transform) +SYM_FUNC_END_SECTION(sha3_ce_transform) .section ".rodata", "a" .align 8 diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/crypto/sha512-armv8.pl index 2d8655d5b1af..7952696d3c88 100644 --- a/arch/arm64/crypto/sha512-armv8.pl +++ b/arch/arm64/crypto/sha512-armv8.pl @@ -195,11 +195,16 @@ ___ $code.=<<___; #ifndef __KERNEL__ # include "arm_arch.h" +# define SYM_TEXT_SECTION() +# define SYM_TEXT_END_SECTION +#else +# include #endif .text .extern OPENSSL_armcap_P +SYM_TEXT_SECTION($func) .globl $func .type $func,%function .align 6 @@ -285,7 +290,9 @@ $code.=<<___; ldp x29,x30,[sp],#128 ret .size $func,.-$func +SYM_TEXT_END_SECTION +SYM_TEXT_SECTION(K$BITS) .align 6 .type .LK$BITS,%object .LK$BITS: @@ -354,6 +361,8 @@ $code.=<<___ if ($SZ==4); ___ $code.=<<___; .size .LK$BITS,.-.LK$BITS +SYM_TEXT_END_SECTION + #ifndef __KERNEL__ .align 3 .LOPENSSL_armcap_P: @@ -637,6 +646,7 @@ sub body_00_15 () { } $code.=<<___; +SYM_TEXT_SECTION(sha256_block_neon) #ifdef __KERNEL__ .globl sha256_block_neon #endif @@ -736,6 +746,7 @@ $code.=<<___; add sp,sp,#16*4+16 ret .size sha256_block_neon,.-sha256_block_neon +SYM_TEXT_END_SECTION ___ } diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S index b6a3a36e15f5..7d34aabb3daa 100644 --- a/arch/arm64/crypto/sha512-ce-core.S +++ b/arch/arm64/crypto/sha512-ce-core.S @@ -106,7 +106,7 @@ * int blocks) */ .text -SYM_FUNC_START(sha512_ce_transform) +SYM_FUNC_START_SECTION(sha512_ce_transform) /* load state */ ld1 {v8.2d-v11.2d}, [x0] @@ -203,4 +203,4 @@ CPU_LE( rev64 v19.16b, v19.16b ) 3: st1 {v8.2d-v11.2d}, [x0] mov w0, w2 ret -SYM_FUNC_END(sha512_ce_transform) +SYM_FUNC_END_SECTION(sha512_ce_transform) diff --git a/arch/arm64/crypto/sm3-ce-core.S b/arch/arm64/crypto/sm3-ce-core.S index ef97d3187cb7..7be60c41e36d 100644 --- a/arch/arm64/crypto/sm3-ce-core.S +++ b/arch/arm64/crypto/sm3-ce-core.S @@ -73,7 +73,7 @@ * int blocks) */ .text -SYM_FUNC_START(sm3_ce_transform) +SYM_FUNC_START_SECTION(sm3_ce_transform) /* load state */ ld1 {v8.4s-v9.4s}, [x0] rev64 v8.4s, v8.4s @@ -131,7 +131,7 @@ CPU_LE( rev32 v3.16b, v3.16b ) ext v9.16b, v9.16b, v9.16b, #8 st1 {v8.4s-v9.4s}, [x0] ret -SYM_FUNC_END(sm3_ce_transform) +SYM_FUNC_END_SECTION(sm3_ce_transform) .section ".rodata", "a" .align 3 diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S index 4ac6cfbc5797..5f64ed209a26 100644 --- a/arch/arm64/crypto/sm4-ce-core.S +++ b/arch/arm64/crypto/sm4-ce-core.S @@ -15,7 +15,7 @@ * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in); */ .text -SYM_FUNC_START(sm4_ce_do_crypt) +SYM_FUNC_START_SECTION(sm4_ce_do_crypt) ld1 {v8.4s}, [x2] ld1 {v0.4s-v3.4s}, [x0], #64 CPU_LE( rev32 v8.16b, v8.16b ) @@ -33,4 +33,4 @@ CPU_LE( rev32 v8.16b, v8.16b ) CPU_LE( rev32 v8.16b, v8.16b ) st1 {v8.4s}, [x1] ret -SYM_FUNC_END(sm4_ce_do_crypt) +SYM_FUNC_END_SECTION(sm4_ce_do_crypt)