[RFT] crypto: arm64/ghash - add NEON accelerated fallback for 64-bit PMULL

Message ID	20170703102919.21714-1-ard.biesheuvel@linaro.org (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-arm-kernel-bounces+patchwork-linux-arm=patchwork.kernel.org@lists.infradead.org> From: Ard Biesheuvel <ard.biesheuvel@linaro.org> To: linux-crypto@vger.kernel.org, linux-arm-kernel@lists.infradead.org Subject: [RFT PATCH] crypto: arm64/ghash - add NEON accelerated fallback for 64-bit PMULL Date: Mon, 3 Jul 2017 11:29:19 +0100 Message-Id: <20170703102919.21714-1-ard.biesheuvel@linaro.org> Precedence: list Cc: catalin.marinas@arm.com, will.deacon@arm.com, herbert@gondor.apana.org.au, Ard Biesheuvel <ard.biesheuvel@linaro.org> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: "linux-arm-kernel" <linux-arm-kernel-bounces@lists.infradead.org> Errors-To: linux-arm-kernel-bounces+patchwork-linux-arm=patchwork.kernel.org@lists.infradead.org

diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index cb22459eba85..8a789f6154fc 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -1,7 +1,7 @@ /* * Accelerated GHASH implementation with ARMv8 PMULL instructions. * - * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -11,24 +11,119 @@ #include <linux/linkage.h> #include <asm/assembler.h> - SHASH .req v0 - SHASH2 .req v1 - T1 .req v2 - T2 .req v3 - MASK .req v4 - XL .req v5 - XM .req v6 - XH .req v7 - IN1 .req v7 + SHASH .req v0 + SHASH2 .req v1 + T1 .req v2 + T2 .req v3 + MASK .req v4 + XL .req v5 + XM .req v6 + XH .req v7 + IN1 .req v7 + + k00_16 .req v8 + k32_48 .req v9 + + t3 .req v10 + t4 .req v11 + t5 .req v12 + t6 .req v13 + t7 .req v14 + t8 .req v15 + t9 .req v16 + + perm1 .req v17 + perm2 .req v18 + perm3 .req v19 + perm4 .req v20 .text .arch armv8-a+crypto - /* - * void pmull_ghash_update(int blocks, u64 dg[], const char *src, - * struct ghash_key const *k, const char *head) - */ -ENTRY(pmull_ghash_update) + .macro __pmull_p64, rd, rn, rm, i + .ifb \i + pmull \rd\().1q, \rn\().1d, \rm\().1d + .else + pmull2 \rd\().1q, \rn\().2d, \rm\().2d + .endif + .endm + + .macro __pmull_p8, rq, ad, bd, i + .ifb \i + ext t4.8b, \ad\().8b, \ad\().8b, #1 // A1 + ext t8.8b, \bd\().8b, \bd\().8b, #1 // B1 + ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2 + ext t7.8b, \bd\().8b, \bd\().8b, #2 // B2 + ext t6.8b, \ad\().8b, \ad\().8b, #3 // A3 + ext t9.8b, \bd\().8b, \bd\().8b, #3 // B3 + ext t3.8b, \bd\().8b, \bd\().8b, #4 // B4 + + pmull t4.8h, t4.8b, \bd\().8b // F = A1*B + pmull t8.8h, \ad\().8b, t8.8b // E = A*B1 + pmull t5.8h, t5.8b, \bd\().8b // H = A2*B + pmull t7.8h, \ad\().8b, t7.8b // G = A*B2 + pmull t6.8h, t6.8b, \bd\().8b // J = A3*B + pmull t9.8h, \ad\().8b, t9.8b // I = A*B3 + pmull t3.8h, \ad\().8b, t3.8b // K = A*B4 + pmull \rq\().8h, \ad\().8b, \bd\().8b // D = A*B + .else + tbl t4.16b, {\ad\().16b}, perm1.16b // A1 + tbl t8.16b, {\bd\().16b}, perm1.16b // B1 + tbl t5.16b, {\ad\().16b}, perm2.16b // A2 + tbl t7.16b, {\bd\().16b}, perm2.16b // B2 + tbl t6.16b, {\ad\().16b}, perm3.16b // A3 + tbl t9.16b, {\bd\().16b}, perm3.16b // B3 + tbl t3.16b, {\bd\().16b}, perm4.16b // B4 + + pmull2 t4.8h, t4.16b, \bd\().16b // F = A1*B + pmull2 t8.8h, \ad\().16b, t8.16b // E = A*B1 + pmull2 t5.8h, t5.16b, \bd\().16b // H = A2*B + pmull2 t7.8h, \ad\().16b, t7.16b // G = A*B2 + pmull2 t6.8h, t6.16b, \bd\().16b // J = A3*B + pmull2 t9.8h, \ad\().16b, t9.16b // I = A*B3 + pmull2 t3.8h, \ad\().16b, t3.16b // K = A*B4 + pmull2 \rq\().8h, \ad\().16b, \bd\().16b // D = A*B + .endif + + eor t4.16b, t4.16b, t8.16b // L = E + F + eor t5.16b, t5.16b, t7.16b // M = G + H + eor t6.16b, t6.16b, t9.16b // N = I + J + + uzp1 t8.2d, t4.2d, t5.2d + uzp2 t4.2d, t4.2d, t5.2d + uzp1 t7.2d, t6.2d, t3.2d + uzp2 t6.2d, t6.2d, t3.2d + + // t4 = (L) (P0 + P1) << 8 + // t5 = (M) (P2 + P3) << 16 + eor t8.16b, t8.16b, t4.16b + and t4.16b, t4.16b, k32_48.16b + + // t6 = (N) (P4 + P5) << 24 + // t7 = (K) (P6 + P7) << 32 + eor t7.16b, t7.16b, t6.16b + and t6.16b, t6.16b, k00_16.16b + + eor t8.16b, t8.16b, t4.16b + eor t7.16b, t7.16b, t6.16b + + zip2 t5.2d, t8.2d, t4.2d + zip1 t4.2d, t8.2d, t4.2d + zip2 t3.2d, t7.2d, t6.2d + zip1 t6.2d, t7.2d, t6.2d + + ext t4.16b, t4.16b, t4.16b, #15 + ext t5.16b, t5.16b, t5.16b, #14 + ext t6.16b, t6.16b, t6.16b, #13 + ext t3.16b, t3.16b, t3.16b, #12 + + eor t4.16b, t4.16b, t5.16b + eor t6.16b, t6.16b, t3.16b + eor \rq\().16b, \rq\().16b, t4.16b + eor \rq\().16b, \rq\().16b, t6.16b + .endm + + .macro __pmull_ghash, pm ld1 {SHASH.2d}, [x3] ld1 {XL.2d}, [x1] movi MASK.16b, #0xe1 @@ -52,23 +147,23 @@ CPU_LE( rev64 T1.16b, T1.16b ) eor T1.16b, T1.16b, T2.16b eor XL.16b, XL.16b, IN1.16b - pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1 + \pm XH, SHASH, XL, 2 // a1 * b1 eor T1.16b, T1.16b, XL.16b - pmull XL.1q, SHASH.1d, XL.1d // a0 * b0 - pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) + \pm XL, SHASH, XL // a0 * b0 + \pm XM, SHASH2, T1 // (a1 + a0)(b1 + b0) ext T1.16b, XL.16b, XH.16b, #8 eor T2.16b, XL.16b, XH.16b eor XM.16b, XM.16b, T1.16b eor XM.16b, XM.16b, T2.16b - pmull T2.1q, XL.1d, MASK.1d + \pm T2, XL, MASK mov XH.d[0], XM.d[1] mov XM.d[1], XL.d[0] eor XL.16b, XM.16b, T2.16b ext T2.16b, XL.16b, XL.16b, #8 - pmull XL.1q, XL.1d, MASK.1d + \pm XL, XL, MASK eor T2.16b, T2.16b, XH.16b eor XL.16b, XL.16b, T2.16b @@ -76,7 +171,31 @@ CPU_LE( rev64 T1.16b, T1.16b ) st1 {XL.2d}, [x1] ret -ENDPROC(pmull_ghash_update) + .endm + + /* + * void pmull_ghash_update(int blocks, u64 dg[], const char *src, + * struct ghash_key const *k, const char *head) + */ +ENTRY(pmull_ghash_update_p64) + __pmull_ghash __pmull_p64 +ENDPROC(pmull_ghash_update_p64) + +ENTRY(pmull_ghash_update_p8) + // k00_16 := 0x0000000000000000_000000000000ffff + // k32_48 := 0x00000000ffffffff_0000ffffffffffff + movi k32_48.2d, #0xffffffff + mov k32_48.h[2], k32_48.h[0] + ushr k00_16.2d, k32_48.2d, #32 + + mov_q x5, 0x080f0e0d0c0b0a09 + dup perm1.2d, x5 + ext perm2.16b, perm1.16b, perm1.16b, #1 + ext perm3.16b, perm1.16b, perm1.16b, #2 + ext perm4.16b, perm1.16b, perm1.16b, #3 + + __pmull_ghash __pmull_p8 +ENDPROC(pmull_ghash_update_p8) KS .req v8 CTR .req v9 diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 524dd5a5aca1..6bf08e4d84fe 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -26,6 +26,7 @@ MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("ghash"); #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 @@ -48,8 +49,17 @@ struct gcm_aes_ctx { struct ghash_key ghash_key; }; -asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src, - struct ghash_key const *k, const char *head); +asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, + const char *head); + +asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, + const char *head); + +static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, + const char *head); asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], struct ghash_key const *k, @@ -554,9 +564,18 @@ static int __init ghash_ce_mod_init(void) { int ret; - ret = crypto_register_aead(&gcm_aes_alg); - if (ret) - return ret; + if (!(elf_hwcap & HWCAP_ASIMD)) + return -ENODEV; + + if (elf_hwcap & HWCAP_PMULL) { + pmull_ghash_update = pmull_ghash_update_p64; + + ret = crypto_register_aead(&gcm_aes_alg); + if (ret) + return ret; + } else { + pmull_ghash_update = pmull_ghash_update_p8; + } ret = crypto_register_shash(&ghash_alg); if (ret) @@ -570,5 +589,10 @@ static void __exit ghash_ce_mod_exit(void) crypto_unregister_aead(&gcm_aes_alg); } -module_cpu_feature_match(PMULL, ghash_ce_mod_init); +static const struct cpu_feature ghash_cpu_feature[] = { + { cpu_feature(PMULL) }, { } +}; +MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature); + +module_init(ghash_ce_mod_init); module_exit(ghash_ce_mod_exit);

[RFT] crypto: arm64/ghash - add NEON accelerated fallback for 64-bit PMULL

Commit Message

Patch