From patchwork Thu Dec 31 17:23:17 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994273 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2027BC433DB for ; Thu, 31 Dec 2020 17:24:49 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D1827223E8 for ; Thu, 31 Dec 2020 17:24:48 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726632AbgLaRYe (ORCPT ); Thu, 31 Dec 2020 12:24:34 -0500 Received: from mail.kernel.org ([198.145.29.99]:54704 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726540AbgLaRYe (ORCPT ); Thu, 31 Dec 2020 12:24:34 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id C08BA223DB; Thu, 31 Dec 2020 17:23:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435433; bh=qHONsnWp7MgBci62RPGEelvXfoc8G3PN6tAVCK/49E4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=E9WxOUXcF9Xg3+t/XILTh3VfAPa0jdUJEJpkr8whgWS2riwRZjxZfjX2eQEzlLKYo 0C+IOZdqROLzznmDu5LJ3v2l1Hi/DmciTnvlO7kfQltek+Q2CXoU/0nOhHst0pnL+i QMTvSrVQWPPx/SWNNZU0jCaJMlGJuM2aGjeCXUKxkccmouk4ht8HI2PhYqGTvTMzNc I0bvpuGZ6rLOHps/7LeXj8bihLV85uR9iWL8BT2yv9+f2tLCBgDb81SU6psgpdm846 xUWY5DWRl2V7PugtNeWRVSlXDDdVHe+hL5UH+oMULSo0tKk+ZW48kI383u/++kvayJ fe/SIKnR2CsqQ== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 01/21] crypto: x86/camellia - switch to XTS template Date: Thu, 31 Dec 2020 18:23:17 +0100 Message-Id: <20201231172337.23073-2-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Now that the XTS template can wrap accelerated ECB modes, it can be used to implement Camellia in XTS mode as well, which turns out to be at least as fast, and sometimes even faster. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel --- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 181 ----------------- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 207 -------------------- arch/x86/crypto/camellia_aesni_avx2_glue.c | 70 ------- arch/x86/crypto/camellia_aesni_avx_glue.c | 101 +--------- arch/x86/include/asm/crypto/camellia.h | 18 -- crypto/Kconfig | 2 +- 6 files changed, 2 insertions(+), 577 deletions(-) diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index ecc0a9a905c4..471c34e6cac2 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -17,7 +17,6 @@ #include #include -#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -593,10 +592,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -/* For XTS mode IV generation */ -.Lxts_gf128mul_and_shl1_mask: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 - /* * pre-SubByte transform * @@ -1111,179 +1106,3 @@ SYM_FUNC_START(camellia_ctr_16way) FRAME_END ret; SYM_FUNC_END(camellia_ctr_16way) - -#define gf128mul_x_ble(iv, mask, tmp) \ - vpsrad $31, iv, tmp; \ - vpaddq iv, iv, iv; \ - vpshufd $0x13, tmp, tmp; \ - vpand mask, tmp, tmp; \ - vpxor tmp, iv, iv; - -.align 8 -SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - * %r8: index for input whitening key - * %r9: pointer to __camellia_enc_blk16 or __camellia_dec_blk16 - */ - FRAME_BEGIN - - subq $(16 * 16), %rsp; - movq %rsp, %rax; - - vmovdqa .Lxts_gf128mul_and_shl1_mask, %xmm14; - - /* load IV */ - vmovdqu (%rcx), %xmm0; - vpxor 0 * 16(%rdx), %xmm0, %xmm15; - vmovdqu %xmm15, 15 * 16(%rax); - vmovdqu %xmm0, 0 * 16(%rsi); - - /* construct IVs */ - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 1 * 16(%rdx), %xmm0, %xmm15; - vmovdqu %xmm15, 14 * 16(%rax); - vmovdqu %xmm0, 1 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 2 * 16(%rdx), %xmm0, %xmm13; - vmovdqu %xmm0, 2 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 3 * 16(%rdx), %xmm0, %xmm12; - vmovdqu %xmm0, 3 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 4 * 16(%rdx), %xmm0, %xmm11; - vmovdqu %xmm0, 4 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 5 * 16(%rdx), %xmm0, %xmm10; - vmovdqu %xmm0, 5 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 6 * 16(%rdx), %xmm0, %xmm9; - vmovdqu %xmm0, 6 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 7 * 16(%rdx), %xmm0, %xmm8; - vmovdqu %xmm0, 7 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 8 * 16(%rdx), %xmm0, %xmm7; - vmovdqu %xmm0, 8 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 9 * 16(%rdx), %xmm0, %xmm6; - vmovdqu %xmm0, 9 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 10 * 16(%rdx), %xmm0, %xmm5; - vmovdqu %xmm0, 10 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 11 * 16(%rdx), %xmm0, %xmm4; - vmovdqu %xmm0, 11 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 12 * 16(%rdx), %xmm0, %xmm3; - vmovdqu %xmm0, 12 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 13 * 16(%rdx), %xmm0, %xmm2; - vmovdqu %xmm0, 13 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 14 * 16(%rdx), %xmm0, %xmm1; - vmovdqu %xmm0, 14 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 15 * 16(%rdx), %xmm0, %xmm15; - vmovdqu %xmm15, 0 * 16(%rax); - vmovdqu %xmm0, 15 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vmovdqu %xmm0, (%rcx); - - /* inpack16_pre: */ - vmovq (key_table)(CTX, %r8, 8), %xmm15; - vpshufb .Lpack_bswap, %xmm15, %xmm15; - vpxor 0 * 16(%rax), %xmm15, %xmm0; - vpxor %xmm1, %xmm15, %xmm1; - vpxor %xmm2, %xmm15, %xmm2; - vpxor %xmm3, %xmm15, %xmm3; - vpxor %xmm4, %xmm15, %xmm4; - vpxor %xmm5, %xmm15, %xmm5; - vpxor %xmm6, %xmm15, %xmm6; - vpxor %xmm7, %xmm15, %xmm7; - vpxor %xmm8, %xmm15, %xmm8; - vpxor %xmm9, %xmm15, %xmm9; - vpxor %xmm10, %xmm15, %xmm10; - vpxor %xmm11, %xmm15, %xmm11; - vpxor %xmm12, %xmm15, %xmm12; - vpxor %xmm13, %xmm15, %xmm13; - vpxor 14 * 16(%rax), %xmm15, %xmm14; - vpxor 15 * 16(%rax), %xmm15, %xmm15; - - CALL_NOSPEC r9; - - addq $(16 * 16), %rsp; - - vpxor 0 * 16(%rsi), %xmm7, %xmm7; - vpxor 1 * 16(%rsi), %xmm6, %xmm6; - vpxor 2 * 16(%rsi), %xmm5, %xmm5; - vpxor 3 * 16(%rsi), %xmm4, %xmm4; - vpxor 4 * 16(%rsi), %xmm3, %xmm3; - vpxor 5 * 16(%rsi), %xmm2, %xmm2; - vpxor 6 * 16(%rsi), %xmm1, %xmm1; - vpxor 7 * 16(%rsi), %xmm0, %xmm0; - vpxor 8 * 16(%rsi), %xmm15, %xmm15; - vpxor 9 * 16(%rsi), %xmm14, %xmm14; - vpxor 10 * 16(%rsi), %xmm13, %xmm13; - vpxor 11 * 16(%rsi), %xmm12, %xmm12; - vpxor 12 * 16(%rsi), %xmm11, %xmm11; - vpxor 13 * 16(%rsi), %xmm10, %xmm10; - vpxor 14 * 16(%rsi), %xmm9, %xmm9; - vpxor 15 * 16(%rsi), %xmm8, %xmm8; - write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, - %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, - %xmm8, %rsi); - - FRAME_END - ret; -SYM_FUNC_END(camellia_xts_crypt_16way) - -SYM_FUNC_START(camellia_xts_enc_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - xorl %r8d, %r8d; /* input whitening key, 0 for enc */ - - leaq __camellia_enc_blk16, %r9; - - jmp camellia_xts_crypt_16way; -SYM_FUNC_END(camellia_xts_enc_16way) - -SYM_FUNC_START(camellia_xts_dec_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - - cmpl $16, key_length(CTX); - movl $32, %r8d; - movl $24, %eax; - cmovel %eax, %r8d; /* input whitening key, last for dec */ - - leaq __camellia_dec_blk16, %r9; - - jmp camellia_xts_crypt_16way; -SYM_FUNC_END(camellia_xts_dec_16way) diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 0907243c501c..9561dee52de0 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -7,7 +7,6 @@ #include #include -#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -629,12 +628,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -/* For XTS mode */ -.Lxts_gf128mul_and_shl1_mask_0: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 -.Lxts_gf128mul_and_shl1_mask_1: - .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 - /* * pre-SubByte transform * @@ -1201,203 +1194,3 @@ SYM_FUNC_START(camellia_ctr_32way) FRAME_END ret; SYM_FUNC_END(camellia_ctr_32way) - -#define gf128mul_x_ble(iv, mask, tmp) \ - vpsrad $31, iv, tmp; \ - vpaddq iv, iv, iv; \ - vpshufd $0x13, tmp, tmp; \ - vpand mask, tmp, tmp; \ - vpxor tmp, iv, iv; - -#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \ - vpsrad $31, iv, tmp0; \ - vpaddq iv, iv, tmp1; \ - vpsllq $2, iv, iv; \ - vpshufd $0x13, tmp0, tmp0; \ - vpsrad $31, tmp1, tmp1; \ - vpand mask2, tmp0, tmp0; \ - vpshufd $0x13, tmp1, tmp1; \ - vpxor tmp0, iv, iv; \ - vpand mask1, tmp1, tmp1; \ - vpxor tmp1, iv, iv; - -.align 8 -SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (32 blocks) - * %rdx: src (32 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - * %r8: index for input whitening key - * %r9: pointer to __camellia_enc_blk32 or __camellia_dec_blk32 - */ - FRAME_BEGIN - - vzeroupper; - - subq $(16 * 32), %rsp; - movq %rsp, %rax; - - vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_0, %ymm12; - - /* load IV and construct second IV */ - vmovdqu (%rcx), %xmm0; - vmovdqa %xmm0, %xmm15; - gf128mul_x_ble(%xmm0, %xmm12, %xmm13); - vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_1, %ymm13; - vinserti128 $1, %xmm0, %ymm15, %ymm0; - vpxor 0 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 15 * 32(%rax); - vmovdqu %ymm0, 0 * 32(%rsi); - - /* construct IVs */ - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 1 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 14 * 32(%rax); - vmovdqu %ymm0, 1 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 2 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 13 * 32(%rax); - vmovdqu %ymm0, 2 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 3 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 12 * 32(%rax); - vmovdqu %ymm0, 3 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 4 * 32(%rdx), %ymm0, %ymm11; - vmovdqu %ymm0, 4 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 5 * 32(%rdx), %ymm0, %ymm10; - vmovdqu %ymm0, 5 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 6 * 32(%rdx), %ymm0, %ymm9; - vmovdqu %ymm0, 6 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 7 * 32(%rdx), %ymm0, %ymm8; - vmovdqu %ymm0, 7 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 8 * 32(%rdx), %ymm0, %ymm7; - vmovdqu %ymm0, 8 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 9 * 32(%rdx), %ymm0, %ymm6; - vmovdqu %ymm0, 9 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 10 * 32(%rdx), %ymm0, %ymm5; - vmovdqu %ymm0, 10 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 11 * 32(%rdx), %ymm0, %ymm4; - vmovdqu %ymm0, 11 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 12 * 32(%rdx), %ymm0, %ymm3; - vmovdqu %ymm0, 12 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 13 * 32(%rdx), %ymm0, %ymm2; - vmovdqu %ymm0, 13 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 14 * 32(%rdx), %ymm0, %ymm1; - vmovdqu %ymm0, 14 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 15 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 0 * 32(%rax); - vmovdqu %ymm0, 15 * 32(%rsi); - - vextracti128 $1, %ymm0, %xmm0; - gf128mul_x_ble(%xmm0, %xmm12, %xmm15); - vmovdqu %xmm0, (%rcx); - - /* inpack32_pre: */ - vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15; - vpshufb .Lpack_bswap, %ymm15, %ymm15; - vpxor 0 * 32(%rax), %ymm15, %ymm0; - vpxor %ymm1, %ymm15, %ymm1; - vpxor %ymm2, %ymm15, %ymm2; - vpxor %ymm3, %ymm15, %ymm3; - vpxor %ymm4, %ymm15, %ymm4; - vpxor %ymm5, %ymm15, %ymm5; - vpxor %ymm6, %ymm15, %ymm6; - vpxor %ymm7, %ymm15, %ymm7; - vpxor %ymm8, %ymm15, %ymm8; - vpxor %ymm9, %ymm15, %ymm9; - vpxor %ymm10, %ymm15, %ymm10; - vpxor %ymm11, %ymm15, %ymm11; - vpxor 12 * 32(%rax), %ymm15, %ymm12; - vpxor 13 * 32(%rax), %ymm15, %ymm13; - vpxor 14 * 32(%rax), %ymm15, %ymm14; - vpxor 15 * 32(%rax), %ymm15, %ymm15; - - CALL_NOSPEC r9; - - addq $(16 * 32), %rsp; - - vpxor 0 * 32(%rsi), %ymm7, %ymm7; - vpxor 1 * 32(%rsi), %ymm6, %ymm6; - vpxor 2 * 32(%rsi), %ymm5, %ymm5; - vpxor 3 * 32(%rsi), %ymm4, %ymm4; - vpxor 4 * 32(%rsi), %ymm3, %ymm3; - vpxor 5 * 32(%rsi), %ymm2, %ymm2; - vpxor 6 * 32(%rsi), %ymm1, %ymm1; - vpxor 7 * 32(%rsi), %ymm0, %ymm0; - vpxor 8 * 32(%rsi), %ymm15, %ymm15; - vpxor 9 * 32(%rsi), %ymm14, %ymm14; - vpxor 10 * 32(%rsi), %ymm13, %ymm13; - vpxor 11 * 32(%rsi), %ymm12, %ymm12; - vpxor 12 * 32(%rsi), %ymm11, %ymm11; - vpxor 13 * 32(%rsi), %ymm10, %ymm10; - vpxor 14 * 32(%rsi), %ymm9, %ymm9; - vpxor 15 * 32(%rsi), %ymm8, %ymm8; - write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, - %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, - %ymm8, %rsi); - - vzeroupper; - - FRAME_END - ret; -SYM_FUNC_END(camellia_xts_crypt_32way) - -SYM_FUNC_START(camellia_xts_enc_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (32 blocks) - * %rdx: src (32 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - - xorl %r8d, %r8d; /* input whitening key, 0 for enc */ - - leaq __camellia_enc_blk32, %r9; - - jmp camellia_xts_crypt_32way; -SYM_FUNC_END(camellia_xts_enc_32way) - -SYM_FUNC_START(camellia_xts_dec_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (32 blocks) - * %rdx: src (32 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - - cmpl $16, key_length(CTX); - movl $32, %r8d; - movl $24, %eax; - cmovel %eax, %r8d; /* input whitening key, last for dec */ - - leaq __camellia_dec_blk32, %r9; - - jmp camellia_xts_crypt_32way; -SYM_FUNC_END(camellia_xts_dec_32way) diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index ccda647422d6..d956d0473668 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -26,11 +25,6 @@ asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void camellia_xts_enc_32way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -asmlinkage void camellia_xts_dec_32way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - static const struct common_glue_ctx camellia_enc = { .num_funcs = 4, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -69,22 +63,6 @@ static const struct common_glue_ctx camellia_ctr = { } } }; -static const struct common_glue_ctx camellia_enc_xts = { - .num_funcs = 3, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .xts = camellia_xts_enc_32way } - }, { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = camellia_xts_enc_16way } - }, { - .num_blocks = 1, - .fn_u = { .xts = camellia_xts_enc } - } } -}; - static const struct common_glue_ctx camellia_dec = { .num_funcs = 4, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -123,22 +101,6 @@ static const struct common_glue_ctx camellia_dec_cbc = { } } }; -static const struct common_glue_ctx camellia_dec_xts = { - .num_funcs = 3, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .xts = camellia_xts_dec_32way } - }, { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = camellia_xts_dec_16way } - }, { - .num_blocks = 1, - .fn_u = { .xts = camellia_xts_dec } - } } -}; - static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -170,24 +132,6 @@ static int ctr_crypt(struct skcipher_request *req) return glue_ctr_req_128bit(&camellia_ctr, req); } -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk, - &ctx->tweak_ctx, &ctx->crypt_ctx, false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk, - &ctx->tweak_ctx, &ctx->crypt_ctx, true); -} - static struct skcipher_alg camellia_algs[] = { { .base.cra_name = "__ecb(camellia)", @@ -231,20 +175,6 @@ static struct skcipher_alg camellia_algs[] = { .setkey = camellia_setkey, .encrypt = ctr_crypt, .decrypt = ctr_crypt, - }, { - .base.cra_name = "__xts(camellia)", - .base.cra_driver_name = "__xts-camellia-aesni-avx2", - .base.cra_priority = 500, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct camellia_xts_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE, - .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, }, }; diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 4e5de6ef206e..44614f8a452c 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -31,26 +30,6 @@ asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); EXPORT_SYMBOL_GPL(camellia_ctr_16way); -asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -EXPORT_SYMBOL_GPL(camellia_xts_enc_16way); - -asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -EXPORT_SYMBOL_GPL(camellia_xts_dec_16way); - -void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_enc_blk); -} -EXPORT_SYMBOL_GPL(camellia_xts_enc); - -void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_dec_blk); -} -EXPORT_SYMBOL_GPL(camellia_xts_dec); - static const struct common_glue_ctx camellia_enc = { .num_funcs = 3, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -83,19 +62,6 @@ static const struct common_glue_ctx camellia_ctr = { } } }; -static const struct common_glue_ctx camellia_enc_xts = { - .num_funcs = 2, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = camellia_xts_enc_16way } - }, { - .num_blocks = 1, - .fn_u = { .xts = camellia_xts_enc } - } } -}; - static const struct common_glue_ctx camellia_dec = { .num_funcs = 3, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -128,19 +94,6 @@ static const struct common_glue_ctx camellia_dec_cbc = { } } }; -static const struct common_glue_ctx camellia_dec_xts = { - .num_funcs = 2, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = camellia_xts_dec_16way } - }, { - .num_blocks = 1, - .fn_u = { .xts = camellia_xts_dec } - } } -}; - static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -172,44 +125,6 @@ static int ctr_crypt(struct skcipher_request *req) return glue_ctr_req_128bit(&camellia_ctr, req); } -int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} -EXPORT_SYMBOL_GPL(xts_camellia_setkey); - -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk, - &ctx->tweak_ctx, &ctx->crypt_ctx, false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk, - &ctx->tweak_ctx, &ctx->crypt_ctx, true); -} - static struct skcipher_alg camellia_algs[] = { { .base.cra_name = "__ecb(camellia)", @@ -253,21 +168,7 @@ static struct skcipher_alg camellia_algs[] = { .setkey = camellia_setkey, .encrypt = ctr_crypt, .decrypt = ctr_crypt, - }, { - .base.cra_name = "__xts(camellia)", - .base.cra_driver_name = "__xts-camellia-aesni", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct camellia_xts_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE, - .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, + } }; static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)]; diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index f6d91861cb14..0e5f82adbaf9 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -19,18 +19,10 @@ struct camellia_ctx { u32 key_length; }; -struct camellia_xts_ctx { - struct camellia_ctx tweak_ctx; - struct camellia_ctx crypt_ctx; -}; - extern int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, unsigned int key_len); -extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen); - /* regular block cipher functions */ asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, bool xor); @@ -49,11 +41,6 @@ asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk(ctx, dst, src, false); @@ -83,9 +70,4 @@ extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -extern void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - #endif /* ASM_X86_CAMELLIA_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index c48ca26e2169..b9ea4e262ebe 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1305,7 +1305,7 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64 select CRYPTO_CAMELLIA_X86_64 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SIMD - select CRYPTO_XTS + imply CRYPTO_XTS help Camellia cipher algorithm module (x86_64/AES-NI/AVX). From patchwork Thu Dec 31 17:23:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994277 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 43FF1C433E6 for ; Thu, 31 Dec 2020 17:24:49 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 0D195223E4 for ; Thu, 31 Dec 2020 17:24:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726661AbgLaRYg (ORCPT ); Thu, 31 Dec 2020 12:24:36 -0500 Received: from mail.kernel.org ([198.145.29.99]:54722 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726540AbgLaRYg (ORCPT ); Thu, 31 Dec 2020 12:24:36 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id F2B60223E4; Thu, 31 Dec 2020 17:23:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435435; bh=UUW+iE+Q72l+r3IpvgOdswPBKdJpAeXzpkMms7XMhY4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=dKzjqP0kcy3Y3q2OwWB2YxSxUHYF7Hp77Q10A/3v3EKnM703DC6xjxUBm6Pxc487R VS7T1Gg0MjFQaNg5XfokhKnuZHFu7kWlQam1m8ravgL8+wAi/uXhBXJxRd021Ts9hM 09HNoVS3j32p0BB81fk5LEADwSTRKYNi3tZqlyHEIsoXkJ0LWYqlWqFF6YC4xV78c+ Zerf6M5F+MruZbW9qRkYg5kbGxEqJ/aoExs1ZHdEb2IPRhrJ7OQU0vt9gFQ7Pea2IE WlN1LomTEXj8DiOlxCGtobU0qNoxQ56XZft/6htpvZlUngBmz043cpl8UWl6S7QOK+ ePINAzBF3G9Tg== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 02/21] crypto: x86/cast6 - switch to XTS template Date: Thu, 31 Dec 2020 18:23:18 +0100 Message-Id: <20201231172337.23073-3-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Now that the XTS template can wrap accelerated ECB modes, it can be used to implement CAST6 in XTS mode as well, which turns out to be at least as fast, and sometimes even faster Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel --- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 56 ----------- arch/x86/crypto/cast6_avx_glue.c | 98 -------------------- crypto/Kconfig | 2 +- 3 files changed, 1 insertion(+), 155 deletions(-) diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 932a3ce32a88..0c1ea836215a 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -212,8 +212,6 @@ .section .rodata.cst16, "aM", @progbits, 16 .align 16 -.Lxts_gf128mul_and_shl1_mask: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 .Lbswap_mask: .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 .Lbswap128_mask: @@ -440,57 +438,3 @@ SYM_FUNC_START(cast6_ctr_8way) FRAME_END ret; SYM_FUNC_END(cast6_ctr_8way) - -SYM_FUNC_START(cast6_xts_enc_8way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - pushq %r15; - - movq %rdi, CTX - movq %rsi, %r11; - - /* regs <= src, dst <= IVs, regs <= regs xor IVs */ - load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, - RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask); - - call __cast6_enc_blk8; - - /* dst <= regs xor IVs(in dst) */ - store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - popq %r15; - FRAME_END - ret; -SYM_FUNC_END(cast6_xts_enc_8way) - -SYM_FUNC_START(cast6_xts_dec_8way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - pushq %r15; - - movq %rdi, CTX - movq %rsi, %r11; - - /* regs <= src, dst <= IVs, regs <= regs xor IVs */ - load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, - RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask); - - call __cast6_dec_blk8; - - /* dst <= regs xor IVs(in dst) */ - store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - popq %r15; - FRAME_END - ret; -SYM_FUNC_END(cast6_xts_dec_8way) diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 48e0f37796fa..5a21d3e9041c 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #define CAST6_PARALLEL_BLOCKS 8 @@ -27,27 +26,12 @@ asmlinkage void cast6_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void cast6_ctr_8way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void cast6_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -asmlinkage void cast6_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { return cast6_setkey(&tfm->base, key, keylen); } -static void cast6_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_encrypt); -} - -static void cast6_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_decrypt); -} - static void cast6_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; @@ -87,19 +71,6 @@ static const struct common_glue_ctx cast6_ctr = { } } }; -static const struct common_glue_ctx cast6_enc_xts = { - .num_funcs = 2, - .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .xts = cast6_xts_enc_8way } - }, { - .num_blocks = 1, - .fn_u = { .xts = cast6_xts_enc } - } } -}; - static const struct common_glue_ctx cast6_dec = { .num_funcs = 2, .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, @@ -126,19 +97,6 @@ static const struct common_glue_ctx cast6_dec_cbc = { } } }; -static const struct common_glue_ctx cast6_dec_xts = { - .num_funcs = 2, - .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .xts = cast6_xts_dec_8way } - }, { - .num_blocks = 1, - .fn_u = { .xts = cast6_xts_dec } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { return glue_ecb_req_128bit(&cast6_enc, req); @@ -164,48 +122,6 @@ static int ctr_crypt(struct skcipher_request *req) return glue_ctr_req_128bit(&cast6_ctr, req); } -struct cast6_xts_ctx { - struct cast6_ctx tweak_ctx; - struct cast6_ctx crypt_ctx; -}; - -static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} - -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&cast6_enc_xts, req, __cast6_encrypt, - &ctx->tweak_ctx, &ctx->crypt_ctx, false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&cast6_dec_xts, req, __cast6_encrypt, - &ctx->tweak_ctx, &ctx->crypt_ctx, true); -} - static struct skcipher_alg cast6_algs[] = { { .base.cra_name = "__ecb(cast6)", @@ -249,20 +165,6 @@ static struct skcipher_alg cast6_algs[] = { .setkey = cast6_setkey_skcipher, .encrypt = ctr_crypt, .decrypt = ctr_crypt, - }, { - .base.cra_name = "__xts(cast6)", - .base.cra_driver_name = "__xts-cast6-avx", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = CAST6_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cast6_xts_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * CAST6_MIN_KEY_SIZE, - .max_keysize = 2 * CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = xts_cast6_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, }, }; diff --git a/crypto/Kconfig b/crypto/Kconfig index b9ea4e262ebe..03e8468e57df 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1394,7 +1394,7 @@ config CRYPTO_CAST6_AVX_X86_64 select CRYPTO_CAST_COMMON select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SIMD - select CRYPTO_XTS + imply CRYPTO_XTS help The CAST6 encryption algorithm (synonymous with CAST-256) is described in RFC2612. From patchwork Thu Dec 31 17:23:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994279 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5F0EFC43381 for ; Thu, 31 Dec 2020 17:24:49 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 259E3223E8 for ; Thu, 31 Dec 2020 17:24:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726690AbgLaRYi (ORCPT ); Thu, 31 Dec 2020 12:24:38 -0500 Received: from mail.kernel.org ([198.145.29.99]:54734 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726540AbgLaRYi (ORCPT ); Thu, 31 Dec 2020 12:24:38 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 0E486223E8; Thu, 31 Dec 2020 17:23:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435437; bh=jnpRkMCxKXk4JHRKJPJ8KaTGW9pSuv2eZARbURmztK0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=rBVPHRf8NNMm2W1EaG7pp4K8rFwiX9JKpZnAj23wxe1PYtgqcwSqHqBof3YTKtfNB VOnmkjIdHbsoKm4Wwc4Q0p2qVgb7HHtaWgzSnVLoV621N+Qi0hr5YbeRf5JabEWevP i+LGC1HiS7JY3OOBzEVnO4YdHqd30KBnNhJEsn7cPlBOahtaRvHpWSiP/XwqhmXVtr bHcl6ARVNCmTI2MwV44M8zJ4yT84OBrN0QMwalh/LONTFPI9zwt2MTmnALSBTVjy4B xe1+6LY+SmbTETFRH/FPRNOFVULVcOHy2mOmiajQkAWJjUVi7WOfOkC8rGO694mNNE W4wolquFpAzUQ== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 03/21] crypto: x86/serpent- switch to XTS template Date: Thu, 31 Dec 2020 18:23:19 +0100 Message-Id: <20201231172337.23073-4-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Now that the XTS template can wrap accelerated ECB modes, it can be used to implement Serpent in XTS mode as well, which turns out to be at least as fast, and sometimes even faster Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel --- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 48 ---------- arch/x86/crypto/serpent-avx2-asm_64.S | 62 ------------ arch/x86/crypto/serpent_avx2_glue.c | 72 -------------- arch/x86/crypto/serpent_avx_glue.c | 101 -------------------- arch/x86/include/asm/crypto/serpent-avx.h | 21 ---- crypto/Kconfig | 2 +- 6 files changed, 1 insertion(+), 305 deletions(-) diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index ba9e4c1e7f5c..6b41f46bcc76 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -18,10 +18,6 @@ .align 16 .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16 -.align 16 -.Lxts_gf128mul_and_shl1_mask: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 .text @@ -735,47 +731,3 @@ SYM_FUNC_START(serpent_ctr_8way_avx) FRAME_END ret; SYM_FUNC_END(serpent_ctr_8way_avx) - -SYM_FUNC_START(serpent_xts_enc_8way_avx) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - - /* regs <= src, dst <= IVs, regs <= regs xor IVs */ - load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, - RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask); - - call __serpent_enc_blk8_avx; - - /* dst <= regs xor IVs(in dst) */ - store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - FRAME_END - ret; -SYM_FUNC_END(serpent_xts_enc_8way_avx) - -SYM_FUNC_START(serpent_xts_dec_8way_avx) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - - /* regs <= src, dst <= IVs, regs <= regs xor IVs */ - load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, - RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask); - - call __serpent_dec_blk8_avx; - - /* dst <= regs xor IVs(in dst) */ - store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); - - FRAME_END - ret; -SYM_FUNC_END(serpent_xts_dec_8way_avx) diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index c9648aeae705..a510a949f02f 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -20,16 +20,6 @@ .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -.section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16 -.align 16 -.Lxts_gf128mul_and_shl1_mask_0: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 - -.section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16 -.align 16 -.Lxts_gf128mul_and_shl1_mask_1: - .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 - .text #define CTX %rdi @@ -759,55 +749,3 @@ SYM_FUNC_START(serpent_ctr_16way) FRAME_END ret; SYM_FUNC_END(serpent_ctr_16way) - -SYM_FUNC_START(serpent_xts_enc_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - - vzeroupper; - - load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, - RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, - .Lxts_gf128mul_and_shl1_mask_0, - .Lxts_gf128mul_and_shl1_mask_1); - - call __serpent_enc_blk16; - - store_xts_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - vzeroupper; - - FRAME_END - ret; -SYM_FUNC_END(serpent_xts_enc_16way) - -SYM_FUNC_START(serpent_xts_dec_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - - vzeroupper; - - load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, - RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, - .Lxts_gf128mul_and_shl1_mask_0, - .Lxts_gf128mul_and_shl1_mask_1); - - call __serpent_dec_blk16; - - store_xts_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); - - vzeroupper; - - FRAME_END - ret; -SYM_FUNC_END(serpent_xts_dec_16way) diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index f973ace44ad3..9cdf2c078e21 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -25,11 +24,6 @@ asmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void serpent_ctr_16way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void serpent_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -asmlinkage void serpent_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -68,22 +62,6 @@ static const struct common_glue_ctx serpent_ctr = { } } }; -static const struct common_glue_ctx serpent_enc_xts = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .xts = serpent_xts_enc_16way } - }, { - .num_blocks = 8, - .fn_u = { .xts = serpent_xts_enc_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .xts = serpent_xts_enc } - } } -}; - static const struct common_glue_ctx serpent_dec = { .num_funcs = 3, .fpu_blocks_limit = 8, @@ -116,22 +94,6 @@ static const struct common_glue_ctx serpent_dec_cbc = { } } }; -static const struct common_glue_ctx serpent_dec_xts = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .xts = serpent_xts_dec_16way } - }, { - .num_blocks = 8, - .fn_u = { .xts = serpent_xts_dec_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .xts = serpent_xts_dec } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { return glue_ecb_req_128bit(&serpent_enc, req); @@ -157,26 +119,6 @@ static int ctr_crypt(struct skcipher_request *req) return glue_ctr_req_128bit(&serpent_ctr, req); } -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&serpent_enc_xts, req, - __serpent_encrypt, &ctx->tweak_ctx, - &ctx->crypt_ctx, false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&serpent_dec_xts, req, - __serpent_encrypt, &ctx->tweak_ctx, - &ctx->crypt_ctx, true); -} - static struct skcipher_alg serpent_algs[] = { { .base.cra_name = "__ecb(serpent)", @@ -220,20 +162,6 @@ static struct skcipher_alg serpent_algs[] = { .setkey = serpent_setkey_skcipher, .encrypt = ctr_crypt, .decrypt = ctr_crypt, - }, { - .base.cra_name = "__xts(serpent)", - .base.cra_driver_name = "__xts-serpent-avx2", - .base.cra_priority = 600, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = SERPENT_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct serpent_xts_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * SERPENT_MIN_KEY_SIZE, - .max_keysize = 2 * SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, }, }; diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 7806d1cbe854..b17a08b57a91 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -36,14 +35,6 @@ asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src, le128 *iv); EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx); -asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst, - const u8 *src, le128 *iv); -EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx); - -asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst, - const u8 *src, le128 *iv); -EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx); - void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; @@ -58,44 +49,12 @@ void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) } EXPORT_SYMBOL_GPL(__serpent_crypt_ctr); -void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_encrypt); -} -EXPORT_SYMBOL_GPL(serpent_xts_enc); - -void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_decrypt); -} -EXPORT_SYMBOL_GPL(serpent_xts_dec); - static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); } -int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} -EXPORT_SYMBOL_GPL(xts_serpent_setkey); - static const struct common_glue_ctx serpent_enc = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, @@ -122,19 +81,6 @@ static const struct common_glue_ctx serpent_ctr = { } } }; -static const struct common_glue_ctx serpent_enc_xts = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .xts = serpent_xts_enc_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .xts = serpent_xts_enc } - } } -}; - static const struct common_glue_ctx serpent_dec = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, @@ -161,19 +107,6 @@ static const struct common_glue_ctx serpent_dec_cbc = { } } }; -static const struct common_glue_ctx serpent_dec_xts = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .xts = serpent_xts_dec_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .xts = serpent_xts_dec } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { return glue_ecb_req_128bit(&serpent_enc, req); @@ -199,26 +132,6 @@ static int ctr_crypt(struct skcipher_request *req) return glue_ctr_req_128bit(&serpent_ctr, req); } -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&serpent_enc_xts, req, - __serpent_encrypt, &ctx->tweak_ctx, - &ctx->crypt_ctx, false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&serpent_dec_xts, req, - __serpent_encrypt, &ctx->tweak_ctx, - &ctx->crypt_ctx, true); -} - static struct skcipher_alg serpent_algs[] = { { .base.cra_name = "__ecb(serpent)", @@ -262,20 +175,6 @@ static struct skcipher_alg serpent_algs[] = { .setkey = serpent_setkey_skcipher, .encrypt = ctr_crypt, .decrypt = ctr_crypt, - }, { - .base.cra_name = "__xts(serpent)", - .base.cra_driver_name = "__xts-serpent-avx", - .base.cra_priority = 500, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = SERPENT_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct serpent_xts_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * SERPENT_MIN_KEY_SIZE, - .max_keysize = 2 * SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, }, }; diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index 251c2c89d7cf..23f3361a0e72 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h @@ -10,11 +10,6 @@ struct crypto_skcipher; #define SERPENT_PARALLEL_BLOCKS 8 -struct serpent_xts_ctx { - struct serpent_ctx tweak_ctx; - struct serpent_ctx crypt_ctx; -}; - asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src); asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst, @@ -22,21 +17,5 @@ asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst, asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - -asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst, - const u8 *src, le128 *iv); - -extern void __serpent_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - -extern void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv); - -extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen); #endif diff --git a/crypto/Kconfig b/crypto/Kconfig index 03e8468e57df..ce69a5ae26b5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1576,7 +1576,7 @@ config CRYPTO_SERPENT_AVX_X86_64 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SIMD - select CRYPTO_XTS + imply CRYPTO_XTS help Serpent cipher algorithm, by Anderson, Biham & Knudsen. From patchwork Thu Dec 31 17:23:20 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994275 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5A36EC433E9 for ; Thu, 31 Dec 2020 17:24:49 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3E9B42242A for ; Thu, 31 Dec 2020 17:24:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726743AbgLaRYl (ORCPT ); Thu, 31 Dec 2020 12:24:41 -0500 Received: from mail.kernel.org ([198.145.29.99]:54758 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726540AbgLaRYk (ORCPT ); Thu, 31 Dec 2020 12:24:40 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 234312242A; Thu, 31 Dec 2020 17:23:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435439; bh=RVUOgzblhVxXipF+TH0F1dBrD2if8f5ZJNxV4J4xNV4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=k37Cem8O6+IRr0iM01P410y3YlQwpSR4lwRNSXFR2OkoWZkDHsW7MdbmG3qiz2hMQ d3wECijLyiVWldSkxQo5r8wkj19sybYipqyZHgOZKP0w3/qjV65KPzXubVzF1lREly rtWGsnlBIMY/ghxxSgg0x1c2ur3Cwi7/ZnWGyWRhI3GMm7LuA0HgMYHBNEt/Aa9J7W /m41EQ5Y0kRBZowDg7JoE+6hbR7Gf2iu1jA6AhycV2xsUcJJM6w6L+rd3rYEY7MKaA 3er/fs0iS9iJqvJuVpIrBhN5OpADfj6qsIRmxVQsHx/ZNLyIZSm5fbRwQ2pOk8MTQQ cPWxLqrqPlK3Q== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 04/21] crypto: x86/twofish - switch to XTS template Date: Thu, 31 Dec 2020 18:23:20 +0100 Message-Id: <20201231172337.23073-5-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Now that the XTS template can wrap accelerated ECB modes, it can be used to implement Twofish in XTS mode as well, which turns out to be at least as fast, and sometimes even faster Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel --- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 53 ----------- arch/x86/crypto/twofish_avx_glue.c | 98 -------------------- crypto/Kconfig | 1 + 3 files changed, 1 insertion(+), 151 deletions(-) diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index a5151393bb2f..84e61ef03638 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -19,11 +19,6 @@ .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16 -.align 16 -.Lxts_gf128mul_and_shl1_mask: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 - .text /* structure of crypto context */ @@ -406,51 +401,3 @@ SYM_FUNC_START(twofish_ctr_8way) FRAME_END ret; SYM_FUNC_END(twofish_ctr_8way) - -SYM_FUNC_START(twofish_xts_enc_8way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - - movq %rsi, %r11; - - /* regs <= src, dst <= IVs, regs <= regs xor IVs */ - load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, - RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask); - - call __twofish_enc_blk8; - - /* dst <= regs xor IVs(in dst) */ - store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); - - FRAME_END - ret; -SYM_FUNC_END(twofish_xts_enc_8way) - -SYM_FUNC_START(twofish_xts_dec_8way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - - movq %rsi, %r11; - - /* regs <= src, dst <= IVs, regs <= regs xor IVs */ - load_xts_8way(%rcx, %rdx, %rsi, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2, - RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask); - - call __twofish_dec_blk8; - - /* dst <= regs xor IVs(in dst) */ - store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - FRAME_END - ret; -SYM_FUNC_END(twofish_xts_dec_8way) diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 2dbc8ce3730e..7b539bbb108f 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -29,11 +28,6 @@ asmlinkage void twofish_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void twofish_ctr_8way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void twofish_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -asmlinkage void twofish_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -45,40 +39,6 @@ static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) __twofish_enc_blk_3way(ctx, dst, src, false); } -static void twofish_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_enc_blk); -} - -static void twofish_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_dec_blk); -} - -struct twofish_xts_ctx { - struct twofish_ctx tweak_ctx; - struct twofish_ctx crypt_ctx; -}; - -static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} - static const struct common_glue_ctx twofish_enc = { .num_funcs = 3, .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, @@ -111,19 +71,6 @@ static const struct common_glue_ctx twofish_ctr = { } } }; -static const struct common_glue_ctx twofish_enc_xts = { - .num_funcs = 2, - .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .xts = twofish_xts_enc_8way } - }, { - .num_blocks = 1, - .fn_u = { .xts = twofish_xts_enc } - } } -}; - static const struct common_glue_ctx twofish_dec = { .num_funcs = 3, .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, @@ -156,19 +103,6 @@ static const struct common_glue_ctx twofish_dec_cbc = { } } }; -static const struct common_glue_ctx twofish_dec_xts = { - .num_funcs = 2, - .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .xts = twofish_xts_dec_8way } - }, { - .num_blocks = 1, - .fn_u = { .xts = twofish_xts_dec } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { return glue_ecb_req_128bit(&twofish_enc, req); @@ -194,24 +128,6 @@ static int ctr_crypt(struct skcipher_request *req) return glue_ctr_req_128bit(&twofish_ctr, req); } -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&twofish_enc_xts, req, twofish_enc_blk, - &ctx->tweak_ctx, &ctx->crypt_ctx, false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&twofish_dec_xts, req, twofish_enc_blk, - &ctx->tweak_ctx, &ctx->crypt_ctx, true); -} - static struct skcipher_alg twofish_algs[] = { { .base.cra_name = "__ecb(twofish)", @@ -255,20 +171,6 @@ static struct skcipher_alg twofish_algs[] = { .setkey = twofish_setkey_skcipher, .encrypt = ctr_crypt, .decrypt = ctr_crypt, - }, { - .base.cra_name = "__xts(twofish)", - .base.cra_driver_name = "__xts-twofish-avx", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = TF_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct twofish_xts_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * TF_MIN_KEY_SIZE, - .max_keysize = 2 * TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = xts_twofish_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, }, }; diff --git a/crypto/Kconfig b/crypto/Kconfig index ce69a5ae26b5..7ad9bf84f4a0 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1731,6 +1731,7 @@ config CRYPTO_TWOFISH_AVX_X86_64 select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 select CRYPTO_TWOFISH_X86_64_3WAY + imply CRYPTO_XTS help Twofish cipher algorithm (x86_64/AVX). From patchwork Thu Dec 31 17:23:21 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994281 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 41073C433E6 for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 1B163223E8 for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726821AbgLaRZO (ORCPT ); Thu, 31 Dec 2020 12:25:14 -0500 Received: from mail.kernel.org ([198.145.29.99]:55026 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726540AbgLaRZO (ORCPT ); Thu, 31 Dec 2020 12:25:14 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 3921E2246B; Thu, 31 Dec 2020 17:24:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435441; bh=61mvMHq0AsNIhYbOTt+l55NM/1lXzwpdo+RutssceJQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=YDNZwqEdIu/dr6XDbXqbVlRtS6f/Lg3suFTFb7IBAJW3j4RnuPdSCDgWxdx94Ndxa UqM3KTjgKHCClbL1OkIs+g0G+EK9CQPviySqSr4KuT6Q6gPowUXi+Ojt38ErKpC3GR P8j4LwXNkTKr8K4WK1W9wgpYyee8kcsPQnMLkvF1bo1sHyGwarK6cwt44iSGPwcNch JvShnwsCuU6RxoVe548XBKtFv+MorOjXnp6OxhISnvKtK2yjrFtpVcuZg7tvhMMbLL 75Ah9fM1EcFbxutHoF5qDZB6gHVQiIyRM8/lTc9kBG8uOuta6vZsbDCIdg2Huyt8fA NxmBrAgR34Kng== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 05/21] crypto: x86/glue-helper - drop XTS helper routines Date: Thu, 31 Dec 2020 18:23:21 +0100 Message-Id: <20201231172337.23073-6-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org The glue helper's XTS routines are no longer used, so drop them. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel --- arch/x86/crypto/glue_helper-asm-avx.S | 59 -------- arch/x86/crypto/glue_helper-asm-avx2.S | 78 ---------- arch/x86/crypto/glue_helper.c | 154 -------------------- arch/x86/include/asm/crypto/glue_helper.h | 12 -- 4 files changed, 303 deletions(-) diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S index d08fc575ef7f..a94511432803 100644 --- a/arch/x86/crypto/glue_helper-asm-avx.S +++ b/arch/x86/crypto/glue_helper-asm-avx.S @@ -79,62 +79,3 @@ vpxor (6*16)(src), x6, x6; \ vpxor (7*16)(src), x7, x7; \ store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); - -#define gf128mul_x_ble(iv, mask, tmp) \ - vpsrad $31, iv, tmp; \ - vpaddq iv, iv, iv; \ - vpshufd $0x13, tmp, tmp; \ - vpand mask, tmp, tmp; \ - vpxor tmp, iv, iv; - -#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \ - t1, xts_gf128mul_and_shl1_mask) \ - vmovdqa xts_gf128mul_and_shl1_mask, t0; \ - \ - /* load IV */ \ - vmovdqu (iv), tiv; \ - vpxor (0*16)(src), tiv, x0; \ - vmovdqu tiv, (0*16)(dst); \ - \ - /* construct and store IVs, also xor with source */ \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (1*16)(src), tiv, x1; \ - vmovdqu tiv, (1*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (2*16)(src), tiv, x2; \ - vmovdqu tiv, (2*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (3*16)(src), tiv, x3; \ - vmovdqu tiv, (3*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (4*16)(src), tiv, x4; \ - vmovdqu tiv, (4*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (5*16)(src), tiv, x5; \ - vmovdqu tiv, (5*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (6*16)(src), tiv, x6; \ - vmovdqu tiv, (6*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (7*16)(src), tiv, x7; \ - vmovdqu tiv, (7*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vmovdqu tiv, (iv); - -#define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ - vpxor (0*16)(dst), x0, x0; \ - vpxor (1*16)(dst), x1, x1; \ - vpxor (2*16)(dst), x2, x2; \ - vpxor (3*16)(dst), x3, x3; \ - vpxor (4*16)(dst), x4, x4; \ - vpxor (5*16)(dst), x5, x5; \ - vpxor (6*16)(dst), x6, x6; \ - vpxor (7*16)(dst), x7, x7; \ - store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); diff --git a/arch/x86/crypto/glue_helper-asm-avx2.S b/arch/x86/crypto/glue_helper-asm-avx2.S index d84508c85c13..456bface1e5d 100644 --- a/arch/x86/crypto/glue_helper-asm-avx2.S +++ b/arch/x86/crypto/glue_helper-asm-avx2.S @@ -95,81 +95,3 @@ vpxor (6*32)(src), x6, x6; \ vpxor (7*32)(src), x7, x7; \ store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); - -#define gf128mul_x_ble(iv, mask, tmp) \ - vpsrad $31, iv, tmp; \ - vpaddq iv, iv, iv; \ - vpshufd $0x13, tmp, tmp; \ - vpand mask, tmp, tmp; \ - vpxor tmp, iv, iv; - -#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \ - vpsrad $31, iv, tmp0; \ - vpaddq iv, iv, tmp1; \ - vpsllq $2, iv, iv; \ - vpshufd $0x13, tmp0, tmp0; \ - vpsrad $31, tmp1, tmp1; \ - vpand mask2, tmp0, tmp0; \ - vpshufd $0x13, tmp1, tmp1; \ - vpxor tmp0, iv, iv; \ - vpand mask1, tmp1, tmp1; \ - vpxor tmp1, iv, iv; - -#define load_xts_16way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, \ - tivx, t0, t0x, t1, t1x, t2, t2x, t3, \ - xts_gf128mul_and_shl1_mask_0, \ - xts_gf128mul_and_shl1_mask_1) \ - vbroadcasti128 xts_gf128mul_and_shl1_mask_0, t1; \ - \ - /* load IV and construct second IV */ \ - vmovdqu (iv), tivx; \ - vmovdqa tivx, t0x; \ - gf128mul_x_ble(tivx, t1x, t2x); \ - vbroadcasti128 xts_gf128mul_and_shl1_mask_1, t2; \ - vinserti128 $1, tivx, t0, tiv; \ - vpxor (0*32)(src), tiv, x0; \ - vmovdqu tiv, (0*32)(dst); \ - \ - /* construct and store IVs, also xor with source */ \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (1*32)(src), tiv, x1; \ - vmovdqu tiv, (1*32)(dst); \ - \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (2*32)(src), tiv, x2; \ - vmovdqu tiv, (2*32)(dst); \ - \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (3*32)(src), tiv, x3; \ - vmovdqu tiv, (3*32)(dst); \ - \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (4*32)(src), tiv, x4; \ - vmovdqu tiv, (4*32)(dst); \ - \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (5*32)(src), tiv, x5; \ - vmovdqu tiv, (5*32)(dst); \ - \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (6*32)(src), tiv, x6; \ - vmovdqu tiv, (6*32)(dst); \ - \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (7*32)(src), tiv, x7; \ - vmovdqu tiv, (7*32)(dst); \ - \ - vextracti128 $1, tiv, tivx; \ - gf128mul_x_ble(tivx, t1x, t2x); \ - vmovdqu tivx, (iv); - -#define store_xts_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ - vpxor (0*32)(dst), x0, x0; \ - vpxor (1*32)(dst), x1, x1; \ - vpxor (2*32)(dst), x2, x2; \ - vpxor (3*32)(dst), x3, x3; \ - vpxor (4*32)(dst), x4, x4; \ - vpxor (5*32)(dst), x5, x5; \ - vpxor (6*32)(dst), x6, x6; \ - vpxor (7*32)(dst), x7, x7; \ - store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index d3d91a0abf88..786ffda1caf4 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -12,10 +12,8 @@ #include #include -#include #include #include -#include #include int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, @@ -226,156 +224,4 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, } EXPORT_SYMBOL_GPL(glue_ctr_req_128bit); -static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx, - void *ctx, - struct skcipher_walk *walk) -{ - const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = walk->src.virt.addr; - u128 *dst = walk->dst.virt.addr; - unsigned int num_blocks, func_bytes; - unsigned int i; - - /* Process multi-block batch */ - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - if (nbytes >= func_bytes) { - do { - gctx->funcs[i].fn_u.xts(ctx, (u8 *)dst, - (const u8 *)src, - walk->iv); - - src += num_blocks; - dst += num_blocks; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); - - if (nbytes < bsize) - goto done; - } - } - -done: - return nbytes; -} - -int glue_xts_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx, bool decrypt) -{ - const bool cts = (req->cryptlen % XTS_BLOCK_SIZE); - const unsigned int bsize = 128 / 8; - struct skcipher_request subreq; - struct skcipher_walk walk; - bool fpu_enabled = false; - unsigned int nbytes, tail; - int err; - - if (req->cryptlen < XTS_BLOCK_SIZE) - return -EINVAL; - - if (unlikely(cts)) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - - tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE; - - skcipher_request_set_tfm(&subreq, tfm); - skcipher_request_set_callback(&subreq, - crypto_skcipher_get_flags(tfm), - NULL, NULL); - skcipher_request_set_crypt(&subreq, req->src, req->dst, - req->cryptlen - tail, req->iv); - req = &subreq; - } - - err = skcipher_walk_virt(&walk, req, false); - nbytes = walk.nbytes; - if (err) - return err; - - /* set minimum length to bsize, for tweak_fn */ - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - &walk, fpu_enabled, - nbytes < bsize ? bsize : nbytes); - - /* calculate first value of T */ - tweak_fn(tweak_ctx, walk.iv, walk.iv); - - while (nbytes) { - nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk); - - err = skcipher_walk_done(&walk, nbytes); - nbytes = walk.nbytes; - } - - if (unlikely(cts)) { - u8 *next_tweak, *final_tweak = req->iv; - struct scatterlist *src, *dst; - struct scatterlist s[2], d[2]; - le128 b[2]; - - dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen); - if (req->dst != req->src) - dst = scatterwalk_ffwd(d, req->dst, req->cryptlen); - - if (decrypt) { - next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE); - gf128mul_x_ble(b, b); - } else { - next_tweak = req->iv; - } - - skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE, - next_tweak); - - err = skcipher_walk_virt(&walk, req, false) ?: - skcipher_walk_done(&walk, - __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); - if (err) - goto out; - - scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0); - memcpy(b + 1, b, tail - XTS_BLOCK_SIZE); - scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE, - tail - XTS_BLOCK_SIZE, 0); - scatterwalk_map_and_copy(b, dst, 0, tail, 1); - - skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE, - final_tweak); - - err = skcipher_walk_virt(&walk, req, false) ?: - skcipher_walk_done(&walk, - __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); - } - -out: - glue_fpu_end(fpu_enabled); - - return err; -} -EXPORT_SYMBOL_GPL(glue_xts_req_128bit); - -void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, const u8 *src, - le128 *iv, common_glue_func_t fn) -{ - le128 ivblk = *iv; - - /* generate next IV */ - gf128mul_x_ble(iv, &ivblk); - - /* CC <- T xor C */ - u128_xor((u128 *)dst, (const u128 *)src, (u128 *)&ivblk); - - /* PP <- D(Key2,CC) */ - fn(ctx, dst, dst); - - /* P <- T xor PP */ - u128_xor((u128 *)dst, (u128 *)dst, (u128 *)&ivblk); -} -EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one); - MODULE_LICENSE("GPL"); diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 777c0f63418c..62680775d189 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -15,8 +15,6 @@ typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src); typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src); typedef void (*common_glue_ctr_func_t)(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -typedef void (*common_glue_xts_func_t)(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); struct common_glue_func_entry { unsigned int num_blocks; /* number of blocks that @fn will process */ @@ -24,7 +22,6 @@ struct common_glue_func_entry { common_glue_func_t ecb; common_glue_cbc_func_t cbc; common_glue_ctr_func_t ctr; - common_glue_xts_func_t xts; } fn_u; }; @@ -106,13 +103,4 @@ extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req); -extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx, bool decrypt); - -extern void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, - const u8 *src, le128 *iv, - common_glue_func_t fn); - #endif /* _CRYPTO_GLUE_HELPER_H */ From patchwork Thu Dec 31 17:23:22 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994283 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 38F3CC433E0 for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id F176C223E4 for ; Thu, 31 Dec 2020 17:25:29 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726848AbgLaRZO (ORCPT ); Thu, 31 Dec 2020 12:25:14 -0500 Received: from mail.kernel.org ([198.145.29.99]:55028 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726821AbgLaRZO (ORCPT ); Thu, 31 Dec 2020 12:25:14 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 4925F22472; Thu, 31 Dec 2020 17:24:02 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435443; bh=Bncq3p07y4vzXMssM3usmQCyxP0h3Sn+wWGRLBMj1aA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=GenncMWXmDfK15ggwJdz5ydkQtyPRo7p3wDpEob6rst9XG1ZtxhTOwwqhQw9hLlCR OhMNpdVx66IJDjeL8TTN1k6pStdbN9pUmDEih7YXqg1jpKr3ZjxN4zzpLem2R1szD4 nIAcY2lJRkOq2Z43w++dI8bmoEHuD6m7CPYLEfhZtKH5qlJheX2sCyhOosEOD9Pin5 tBOAGtN5bV12WXd3U5PkRrAnJXiTD6GnVYYaxRc9sGpY7WiWFTz4t8rumm49TI9lTJ qinH6i//8FHCrnTVR8VARzWdjQYnzRItm0+DuL8clO3LlibaiyhFQGGLDQG/9qIhom EordeDltVvVcA== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 06/21] crypto: x86/camellia - drop CTR mode implementation Date: Thu, 31 Dec 2020 18:23:22 +0100 Message-Id: <20201231172337.23073-7-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Camellia in CTR mode is never used by the kernel directly, and is highly unlikely to be relied upon by dm-crypt or algif_skcipher. So let's drop the accelerated CTR mode implementation, and instead, rely on the CTR template and the bare cipher. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel --- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 117 ---------------- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 144 -------------------- arch/x86/crypto/camellia_aesni_avx2_glue.c | 41 ------ arch/x86/crypto/camellia_aesni_avx_glue.c | 40 ------ arch/x86/crypto/camellia_glue.c | 68 --------- arch/x86/include/asm/crypto/camellia.h | 6 - crypto/Kconfig | 1 + 7 files changed, 1 insertion(+), 416 deletions(-) diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 471c34e6cac2..e2a0e0f4bf9d 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -588,10 +588,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .long 0x80808080 .long 0x80808080 -/* For CTR-mode IV byteswap */ -.Lbswap128_mask: - .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 - /* * pre-SubByte transform * @@ -993,116 +989,3 @@ SYM_FUNC_START(camellia_cbc_dec_16way) FRAME_END ret; SYM_FUNC_END(camellia_cbc_dec_16way) - -#define inc_le128(x, minus_one, tmp) \ - vpcmpeqq minus_one, x, tmp; \ - vpsubq minus_one, x, x; \ - vpslldq $8, tmp, tmp; \ - vpsubq tmp, x, x; - -SYM_FUNC_START(camellia_ctr_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (little endian, 128bit) - */ - FRAME_BEGIN - - subq $(16 * 16), %rsp; - movq %rsp, %rax; - - vmovdqa .Lbswap128_mask, %xmm14; - - /* load IV and byteswap */ - vmovdqu (%rcx), %xmm0; - vpshufb %xmm14, %xmm0, %xmm15; - vmovdqu %xmm15, 15 * 16(%rax); - - vpcmpeqd %xmm15, %xmm15, %xmm15; - vpsrldq $8, %xmm15, %xmm15; /* low: -1, high: 0 */ - - /* construct IVs */ - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm13; - vmovdqu %xmm13, 14 * 16(%rax); - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm13; - vmovdqu %xmm13, 13 * 16(%rax); - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm12; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm11; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm10; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm9; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm8; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm7; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm6; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm5; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm4; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm3; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm2; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm1; - inc_le128(%xmm0, %xmm15, %xmm13); - vmovdqa %xmm0, %xmm13; - vpshufb %xmm14, %xmm0, %xmm0; - inc_le128(%xmm13, %xmm15, %xmm14); - vmovdqu %xmm13, (%rcx); - - /* inpack16_pre: */ - vmovq (key_table)(CTX), %xmm15; - vpshufb .Lpack_bswap, %xmm15, %xmm15; - vpxor %xmm0, %xmm15, %xmm0; - vpxor %xmm1, %xmm15, %xmm1; - vpxor %xmm2, %xmm15, %xmm2; - vpxor %xmm3, %xmm15, %xmm3; - vpxor %xmm4, %xmm15, %xmm4; - vpxor %xmm5, %xmm15, %xmm5; - vpxor %xmm6, %xmm15, %xmm6; - vpxor %xmm7, %xmm15, %xmm7; - vpxor %xmm8, %xmm15, %xmm8; - vpxor %xmm9, %xmm15, %xmm9; - vpxor %xmm10, %xmm15, %xmm10; - vpxor %xmm11, %xmm15, %xmm11; - vpxor %xmm12, %xmm15, %xmm12; - vpxor 13 * 16(%rax), %xmm15, %xmm13; - vpxor 14 * 16(%rax), %xmm15, %xmm14; - vpxor 15 * 16(%rax), %xmm15, %xmm15; - - call __camellia_enc_blk16; - - addq $(16 * 16), %rsp; - - vpxor 0 * 16(%rdx), %xmm7, %xmm7; - vpxor 1 * 16(%rdx), %xmm6, %xmm6; - vpxor 2 * 16(%rdx), %xmm5, %xmm5; - vpxor 3 * 16(%rdx), %xmm4, %xmm4; - vpxor 4 * 16(%rdx), %xmm3, %xmm3; - vpxor 5 * 16(%rdx), %xmm2, %xmm2; - vpxor 6 * 16(%rdx), %xmm1, %xmm1; - vpxor 7 * 16(%rdx), %xmm0, %xmm0; - vpxor 8 * 16(%rdx), %xmm15, %xmm15; - vpxor 9 * 16(%rdx), %xmm14, %xmm14; - vpxor 10 * 16(%rdx), %xmm13, %xmm13; - vpxor 11 * 16(%rdx), %xmm12, %xmm12; - vpxor 12 * 16(%rdx), %xmm11, %xmm11; - vpxor 13 * 16(%rdx), %xmm10, %xmm10; - vpxor 14 * 16(%rdx), %xmm9, %xmm9; - vpxor 15 * 16(%rdx), %xmm8, %xmm8; - write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, - %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, - %xmm8, %rsi); - - FRAME_END - ret; -SYM_FUNC_END(camellia_ctr_16way) diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 9561dee52de0..782e9712a1ec 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -624,10 +624,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .section .rodata.cst16, "aM", @progbits, 16 .align 16 -/* For CTR-mode IV byteswap */ -.Lbswap128_mask: - .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 - /* * pre-SubByte transform * @@ -1054,143 +1050,3 @@ SYM_FUNC_START(camellia_cbc_dec_32way) FRAME_END ret; SYM_FUNC_END(camellia_cbc_dec_32way) - -#define inc_le128(x, minus_one, tmp) \ - vpcmpeqq minus_one, x, tmp; \ - vpsubq minus_one, x, x; \ - vpslldq $8, tmp, tmp; \ - vpsubq tmp, x, x; - -#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \ - vpcmpeqq minus_one, x, tmp1; \ - vpcmpeqq minus_two, x, tmp2; \ - vpsubq minus_two, x, x; \ - vpor tmp2, tmp1, tmp1; \ - vpslldq $8, tmp1, tmp1; \ - vpsubq tmp1, x, x; - -SYM_FUNC_START(camellia_ctr_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (32 blocks) - * %rdx: src (32 blocks) - * %rcx: iv (little endian, 128bit) - */ - FRAME_BEGIN - - vzeroupper; - - movq %rsp, %r10; - cmpq %rsi, %rdx; - je .Lctr_use_stack; - - /* dst can be used as temporary storage, src is not overwritten. */ - movq %rsi, %rax; - jmp .Lctr_continue; - -.Lctr_use_stack: - subq $(16 * 32), %rsp; - movq %rsp, %rax; - -.Lctr_continue: - vpcmpeqd %ymm15, %ymm15, %ymm15; - vpsrldq $8, %ymm15, %ymm15; /* ab: -1:0 ; cd: -1:0 */ - vpaddq %ymm15, %ymm15, %ymm12; /* ab: -2:0 ; cd: -2:0 */ - - /* load IV and byteswap */ - vmovdqu (%rcx), %xmm0; - vmovdqa %xmm0, %xmm1; - inc_le128(%xmm0, %xmm15, %xmm14); - vbroadcasti128 .Lbswap128_mask, %ymm14; - vinserti128 $1, %xmm0, %ymm1, %ymm0; - vpshufb %ymm14, %ymm0, %ymm13; - vmovdqu %ymm13, 15 * 32(%rax); - - /* construct IVs */ - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); /* ab:le2 ; cd:le3 */ - vpshufb %ymm14, %ymm0, %ymm13; - vmovdqu %ymm13, 14 * 32(%rax); - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm13; - vmovdqu %ymm13, 13 * 32(%rax); - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm13; - vmovdqu %ymm13, 12 * 32(%rax); - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm13; - vmovdqu %ymm13, 11 * 32(%rax); - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm10; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm9; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm8; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm7; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm6; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm5; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm4; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm3; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm2; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm1; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vextracti128 $1, %ymm0, %xmm13; - vpshufb %ymm14, %ymm0, %ymm0; - inc_le128(%xmm13, %xmm15, %xmm14); - vmovdqu %xmm13, (%rcx); - - /* inpack32_pre: */ - vpbroadcastq (key_table)(CTX), %ymm15; - vpshufb .Lpack_bswap, %ymm15, %ymm15; - vpxor %ymm0, %ymm15, %ymm0; - vpxor %ymm1, %ymm15, %ymm1; - vpxor %ymm2, %ymm15, %ymm2; - vpxor %ymm3, %ymm15, %ymm3; - vpxor %ymm4, %ymm15, %ymm4; - vpxor %ymm5, %ymm15, %ymm5; - vpxor %ymm6, %ymm15, %ymm6; - vpxor %ymm7, %ymm15, %ymm7; - vpxor %ymm8, %ymm15, %ymm8; - vpxor %ymm9, %ymm15, %ymm9; - vpxor %ymm10, %ymm15, %ymm10; - vpxor 11 * 32(%rax), %ymm15, %ymm11; - vpxor 12 * 32(%rax), %ymm15, %ymm12; - vpxor 13 * 32(%rax), %ymm15, %ymm13; - vpxor 14 * 32(%rax), %ymm15, %ymm14; - vpxor 15 * 32(%rax), %ymm15, %ymm15; - - call __camellia_enc_blk32; - - movq %r10, %rsp; - - vpxor 0 * 32(%rdx), %ymm7, %ymm7; - vpxor 1 * 32(%rdx), %ymm6, %ymm6; - vpxor 2 * 32(%rdx), %ymm5, %ymm5; - vpxor 3 * 32(%rdx), %ymm4, %ymm4; - vpxor 4 * 32(%rdx), %ymm3, %ymm3; - vpxor 5 * 32(%rdx), %ymm2, %ymm2; - vpxor 6 * 32(%rdx), %ymm1, %ymm1; - vpxor 7 * 32(%rdx), %ymm0, %ymm0; - vpxor 8 * 32(%rdx), %ymm15, %ymm15; - vpxor 9 * 32(%rdx), %ymm14, %ymm14; - vpxor 10 * 32(%rdx), %ymm13, %ymm13; - vpxor 11 * 32(%rdx), %ymm12, %ymm12; - vpxor 12 * 32(%rdx), %ymm11, %ymm11; - vpxor 13 * 32(%rdx), %ymm10, %ymm10; - vpxor 14 * 32(%rdx), %ymm9, %ymm9; - vpxor 15 * 32(%rdx), %ymm8, %ymm8; - write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, - %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, - %ymm8, %rsi); - - vzeroupper; - - FRAME_END - ret; -SYM_FUNC_END(camellia_ctr_32way) diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index d956d0473668..8f25a2a6222e 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -22,8 +22,6 @@ asmlinkage void camellia_ecb_enc_32way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); static const struct common_glue_ctx camellia_enc = { .num_funcs = 4, @@ -44,25 +42,6 @@ static const struct common_glue_ctx camellia_enc = { } } }; -static const struct common_glue_ctx camellia_ctr = { - .num_funcs = 4, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .ctr = camellia_ctr_32way } - }, { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ctr = camellia_ctr_16way } - }, { - .num_blocks = 2, - .fn_u = { .ctr = camellia_crypt_ctr_2way } - }, { - .num_blocks = 1, - .fn_u = { .ctr = camellia_crypt_ctr } - } } -}; - static const struct common_glue_ctx camellia_dec = { .num_funcs = 4, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -127,11 +106,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&camellia_ctr, req); -} - static struct skcipher_alg camellia_algs[] = { { .base.cra_name = "__ecb(camellia)", @@ -160,21 +134,6 @@ static struct skcipher_alg camellia_algs[] = { .setkey = camellia_setkey, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(camellia)", - .base.cra_driver_name = "__ctr-camellia-aesni-avx2", - .base.cra_priority = 500, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct camellia_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .chunksize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 44614f8a452c..22a89cdfedfb 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -26,10 +26,6 @@ EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way); asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way); -asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -EXPORT_SYMBOL_GPL(camellia_ctr_16way); - static const struct common_glue_ctx camellia_enc = { .num_funcs = 3, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -46,22 +42,6 @@ static const struct common_glue_ctx camellia_enc = { } } }; -static const struct common_glue_ctx camellia_ctr = { - .num_funcs = 3, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ctr = camellia_ctr_16way } - }, { - .num_blocks = 2, - .fn_u = { .ctr = camellia_crypt_ctr_2way } - }, { - .num_blocks = 1, - .fn_u = { .ctr = camellia_crypt_ctr } - } } -}; - static const struct common_glue_ctx camellia_dec = { .num_funcs = 3, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -120,11 +100,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&camellia_ctr, req); -} - static struct skcipher_alg camellia_algs[] = { { .base.cra_name = "__ecb(camellia)", @@ -153,21 +128,6 @@ static struct skcipher_alg camellia_algs[] = { .setkey = camellia_setkey, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(camellia)", - .base.cra_driver_name = "__ctr-camellia-aesni", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct camellia_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .chunksize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, } }; diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 242c056e5fa8..fefeedf2b33d 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -1274,42 +1274,6 @@ void camellia_decrypt_cbc_2way(const void *ctx, u8 *d, const u8 *s) } EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way); -void camellia_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblk; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - if (dst != src) - *dst = *src; - - le128_to_be128(&ctrblk, iv); - le128_inc(iv); - - camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); -} -EXPORT_SYMBOL_GPL(camellia_crypt_ctr); - -void camellia_crypt_ctr_2way(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblks[2]; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - if (dst != src) { - dst[0] = src[0]; - dst[1] = src[1]; - } - - le128_to_be128(&ctrblks[0], iv); - le128_inc(iv); - le128_to_be128(&ctrblks[1], iv); - le128_inc(iv); - - camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks); -} -EXPORT_SYMBOL_GPL(camellia_crypt_ctr_2way); - static const struct common_glue_ctx camellia_enc = { .num_funcs = 2, .fpu_blocks_limit = -1, @@ -1323,19 +1287,6 @@ static const struct common_glue_ctx camellia_enc = { } } }; -static const struct common_glue_ctx camellia_ctr = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 2, - .fn_u = { .ctr = camellia_crypt_ctr_2way } - }, { - .num_blocks = 1, - .fn_u = { .ctr = camellia_crypt_ctr } - } } -}; - static const struct common_glue_ctx camellia_dec = { .num_funcs = 2, .fpu_blocks_limit = -1, @@ -1382,11 +1333,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&camellia_ctr, req); -} - static struct crypto_alg camellia_cipher_alg = { .cra_name = "camellia", .cra_driver_name = "camellia-asm", @@ -1433,20 +1379,6 @@ static struct skcipher_alg camellia_skcipher_algs[] = { .setkey = camellia_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "ctr(camellia)", - .base.cra_driver_name = "ctr-camellia-asm", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct camellia_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .chunksize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, } }; diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index 0e5f82adbaf9..1dcea79e8f8e 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -38,8 +38,6 @@ asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src) { @@ -65,9 +63,5 @@ static inline void camellia_enc_blk_xor_2way(const void *ctx, u8 *dst, /* glue helpers */ extern void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src); -extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); #endif /* ASM_X86_CAMELLIA_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index 7ad9bf84f4a0..ea788cab8c7d 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1286,6 +1286,7 @@ config CRYPTO_CAMELLIA_X86_64 depends on CRYPTO select CRYPTO_SKCIPHER select CRYPTO_GLUE_HELPER_X86 + imply CRYPTO_CTR help Camellia cipher algorithm module (x86_64). From patchwork Thu Dec 31 17:23:23 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994297 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7BB66C43381 for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 4E4F7223E8 for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726883AbgLaRZQ (ORCPT ); Thu, 31 Dec 2020 12:25:16 -0500 Received: from mail.kernel.org ([198.145.29.99]:55050 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726540AbgLaRZQ (ORCPT ); Thu, 31 Dec 2020 12:25:16 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 5ACF22247F; Thu, 31 Dec 2020 17:24:04 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435446; bh=XAVqDW0bAWsrKd+67L2IKk7dIn9+Ngq/EVd/jpxzSaI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=YQxMc3oVIS4ku8/vLRYWoMW8Dan7vlUdvNG+V0HN7WQgV3kBRw+NJgIRPo7IweGeL fHnY2uI1lpIDccUNnN3lM8oNcHLMXCrU+93A+7ydQrGW8OYWynQt6uj0lAwos2uLKH kqc/M1ooUACegdIze2NET0JifCHOIcOxp5k+Q5GC60Rwuv6IOGdotuWE1yBk3a7LmH ZzN3pmq6vR8KT/Xi1+9GiDWRhKqHLaos9Bkqv5xCigE9POpftVGxoU2DWQZXk7Ym9w InzDpfRNjh47RAGYUCDTMMBzez1K+BALKNp57/3P8/0ImDZLqzcAnYAf169Jio5FIQ dFrbXmJvwknOA== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 07/21] crypto: x86/serpent - drop CTR mode implementation Date: Thu, 31 Dec 2020 18:23:23 +0100 Message-Id: <20201231172337.23073-8-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Serpent in CTR mode is never used by the kernel directly, and is highly unlikely to be relied upon by dm-crypt or algif_skcipher. So let's drop the accelerated CTR mode implementation, and instead, rely on the CTR template and the bare cipher. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel --- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 20 ------ arch/x86/crypto/serpent-avx2-asm_64.S | 25 -------- arch/x86/crypto/serpent_avx2_glue.c | 38 ----------- arch/x86/crypto/serpent_avx_glue.c | 51 --------------- arch/x86/crypto/serpent_sse2_glue.c | 67 -------------------- crypto/Kconfig | 3 + 6 files changed, 3 insertions(+), 201 deletions(-) diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 6b41f46bcc76..b7ee24df7fba 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -711,23 +711,3 @@ SYM_FUNC_START(serpent_cbc_dec_8way_avx) FRAME_END ret; SYM_FUNC_END(serpent_cbc_dec_8way_avx) - -SYM_FUNC_START(serpent_ctr_8way_avx) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (little endian, 128bit) - */ - FRAME_BEGIN - - load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, - RD2, RK0, RK1, RK2); - - call __serpent_enc_blk8_avx; - - store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - FRAME_END - ret; -SYM_FUNC_END(serpent_ctr_8way_avx) diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index a510a949f02f..9161b6e441f3 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -724,28 +724,3 @@ SYM_FUNC_START(serpent_cbc_dec_16way) FRAME_END ret; SYM_FUNC_END(serpent_cbc_dec_16way) - -SYM_FUNC_START(serpent_ctr_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (little endian, 128bit) - */ - FRAME_BEGIN - - vzeroupper; - - load_ctr_16way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, - RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, - tp); - - call __serpent_enc_blk16; - - store_ctr_16way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - vzeroupper; - - FRAME_END - ret; -SYM_FUNC_END(serpent_ctr_16way) diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 9cdf2c078e21..28e542c6512a 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -22,8 +22,6 @@ asmlinkage void serpent_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void serpent_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_ctr_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -46,22 +44,6 @@ static const struct common_glue_ctx serpent_enc = { } } }; -static const struct common_glue_ctx serpent_ctr = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .ctr = serpent_ctr_16way } - }, { - .num_blocks = 8, - .fn_u = { .ctr = serpent_ctr_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .ctr = __serpent_crypt_ctr } - } } -}; - static const struct common_glue_ctx serpent_dec = { .num_funcs = 3, .fpu_blocks_limit = 8, @@ -114,11 +96,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&serpent_ctr, req); -} - static struct skcipher_alg serpent_algs[] = { { .base.cra_name = "__ecb(serpent)", @@ -147,21 +124,6 @@ static struct skcipher_alg serpent_algs[] = { .setkey = serpent_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(serpent)", - .base.cra_driver_name = "__ctr-serpent-avx2", - .base.cra_priority = 600, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct serpent_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .chunksize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index b17a08b57a91..aa4605baf9d4 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -31,24 +31,6 @@ asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx); -asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx); - -void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblk; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - le128_to_be128(&ctrblk, iv); - le128_inc(iv); - - __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); - u128_xor(dst, src, (u128 *)&ctrblk); -} -EXPORT_SYMBOL_GPL(__serpent_crypt_ctr); - static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -68,19 +50,6 @@ static const struct common_glue_ctx serpent_enc = { } } }; -static const struct common_glue_ctx serpent_ctr = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ctr = serpent_ctr_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .ctr = __serpent_crypt_ctr } - } } -}; - static const struct common_glue_ctx serpent_dec = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, @@ -127,11 +96,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&serpent_ctr, req); -} - static struct skcipher_alg serpent_algs[] = { { .base.cra_name = "__ecb(serpent)", @@ -160,21 +124,6 @@ static struct skcipher_alg serpent_algs[] = { .setkey = serpent_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(serpent)", - .base.cra_driver_name = "__ctr-serpent-avx", - .base.cra_priority = 500, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct serpent_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .chunksize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 4fed8d26b91a..9acb3bf28feb 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -10,8 +10,6 @@ * * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: * Copyright (c) 2006 Herbert Xu - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten */ #include @@ -47,38 +45,6 @@ static void serpent_decrypt_cbc_xway(const void *ctx, u8 *d, const u8 *s) u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); } -static void serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblk; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - le128_to_be128(&ctrblk, iv); - le128_inc(iv); - - __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); - u128_xor(dst, src, (u128 *)&ctrblk); -} - -static void serpent_crypt_ctr_xway(const void *ctx, u8 *d, const u8 *s, - le128 *iv) -{ - be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - unsigned int i; - - for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { - if (dst != src) - dst[i] = src[i]; - - le128_to_be128(&ctrblks[i], iv); - le128_inc(iv); - } - - serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); -} - static const struct common_glue_ctx serpent_enc = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, @@ -92,19 +58,6 @@ static const struct common_glue_ctx serpent_enc = { } } }; -static const struct common_glue_ctx serpent_ctr = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ctr = serpent_crypt_ctr_xway } - }, { - .num_blocks = 1, - .fn_u = { .ctr = serpent_crypt_ctr } - } } -}; - static const struct common_glue_ctx serpent_dec = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, @@ -152,11 +105,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&serpent_ctr, req); -} - static struct skcipher_alg serpent_algs[] = { { .base.cra_name = "__ecb(serpent)", @@ -185,21 +133,6 @@ static struct skcipher_alg serpent_algs[] = { .setkey = serpent_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(serpent)", - .base.cra_driver_name = "__ctr-serpent-sse2", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct serpent_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .chunksize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/crypto/Kconfig b/crypto/Kconfig index ea788cab8c7d..dd48c3bab3f5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1539,6 +1539,7 @@ config CRYPTO_SERPENT_SSE2_X86_64 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SIMD + imply CRYPTO_CTR help Serpent cipher algorithm, by Anderson, Biham & Knudsen. @@ -1558,6 +1559,7 @@ config CRYPTO_SERPENT_SSE2_586 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SIMD + imply CRYPTO_CTR help Serpent cipher algorithm, by Anderson, Biham & Knudsen. @@ -1578,6 +1580,7 @@ config CRYPTO_SERPENT_AVX_X86_64 select CRYPTO_SERPENT select CRYPTO_SIMD imply CRYPTO_XTS + imply CRYPTO_CTR help Serpent cipher algorithm, by Anderson, Biham & Knudsen. From patchwork Thu Dec 31 17:23:24 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994285 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 68E87C433E9 for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 308922246B for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726885AbgLaRZP (ORCPT ); Thu, 31 Dec 2020 12:25:15 -0500 Received: from mail.kernel.org ([198.145.29.99]:55052 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726883AbgLaRZP (ORCPT ); Thu, 31 Dec 2020 12:25:15 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 8A0F922473; Thu, 31 Dec 2020 17:24:06 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435448; bh=Y/24W4qtBypfZpCpO5EBH4g2ZR8hiD5+VnNNEAsnRX0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=iWJ7b22N9qCflw1r8Olu1JjYz3fpTlvoj5Y3ep8SoyOoXw6o9NWRrl0/vXTOWFLUt sMJALcH2FTBKuiA7EAOj29Fjo/tjEjDl7TZ3+ED63KgOcg/EXuPESm7/lLn5vNV/Jn ET8wUh8uDbJ66CtM7bHh7pgU2qmyngNEFpl+9GgktT9SyAlpZYPHP6J9W79f9eL+zH W9xxkzmrHPCQPeDMd1i1eruk0ubIaRjfSbBF+/ka9g/yP1pb6lHbkgXYQPU9B5j4gv nnmmsnNvh8bYpc9dBaYGjdwjjkDdbh00SFk/PpJwb/KDbVSeiJk+IIXi8CqyCKLg3f lMLA2+fUT8EZQ== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 08/21] crypto: x86/cast5 - drop CTR mode implementation Date: Thu, 31 Dec 2020 18:23:24 +0100 Message-Id: <20201231172337.23073-9-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org CAST5 in CTR mode is never used by the kernel directly, and is highly unlikely to be relied upon by dm-crypt or algif_skcipher. So let's drop the accelerated CTR mode implementation, and instead, rely on the CTR template and the bare cipher. Signed-off-by: Ard Biesheuvel Acked-by: Eric Biggers --- arch/x86/crypto/cast5_avx_glue.c | 103 -------------------- crypto/Kconfig | 1 + 2 files changed, 1 insertion(+), 103 deletions(-) diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 384ccb00f9e1..e0d1c7903b29 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -23,8 +23,6 @@ asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src); asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src); -asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, - __be64 *iv); static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -214,92 +212,6 @@ static int cbc_decrypt(struct skcipher_request *req) return err; } -static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx) -{ - u8 *ctrblk = walk->iv; - u8 keystream[CAST5_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - __cast5_encrypt(ctx, keystream, ctrblk); - crypto_xor_cpy(dst, keystream, src, nbytes); - - crypto_inc(ctrblk, CAST5_BLOCK_SIZE); -} - -static unsigned int __ctr_crypt(struct skcipher_walk *walk, - struct cast5_ctx *ctx) -{ - const unsigned int bsize = CAST5_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - - /* Process multi-block batch */ - if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { - do { - cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, - (__be64 *)walk->iv); - - src += CAST5_PARALLEL_BLOCKS; - dst += CAST5_PARALLEL_BLOCKS; - nbytes -= bsize * CAST5_PARALLEL_BLOCKS; - } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - u64 ctrblk; - - if (dst != src) - *dst = *src; - - ctrblk = *(u64 *)walk->iv; - be64_add_cpu((__be64 *)walk->iv, 1); - - __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); - *dst ^= ctrblk; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - return nbytes; -} - -static int ctr_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); - bool fpu_enabled = false; - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); - nbytes = __ctr_crypt(&walk, ctx); - err = skcipher_walk_done(&walk, nbytes); - } - - cast5_fpu_end(fpu_enabled); - - if (walk.nbytes) { - ctr_crypt_final(&walk, ctx); - err = skcipher_walk_done(&walk, 0); - } - - return err; -} - static struct skcipher_alg cast5_algs[] = { { .base.cra_name = "__ecb(cast5)", @@ -328,21 +240,6 @@ static struct skcipher_alg cast5_algs[] = { .setkey = cast5_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(cast5)", - .base.cra_driver_name = "__ctr-cast5-avx", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct cast5_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .ivsize = CAST5_BLOCK_SIZE, - .chunksize = CAST5_BLOCK_SIZE, - .setkey = cast5_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, } }; diff --git a/crypto/Kconfig b/crypto/Kconfig index dd48c3bab3f5..fed73fff5a65 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1372,6 +1372,7 @@ config CRYPTO_CAST5_AVX_X86_64 select CRYPTO_CAST5 select CRYPTO_CAST_COMMON select CRYPTO_SIMD + imply CRYPTO_CTR help The CAST5 encryption algorithm (synonymous with CAST-128) is described in RFC2144. From patchwork Thu Dec 31 17:23:25 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994287 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 9B305C433DB for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 690F422473 for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726949AbgLaRZR (ORCPT ); Thu, 31 Dec 2020 12:25:17 -0500 Received: from mail.kernel.org ([198.145.29.99]:55066 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726540AbgLaRZR (ORCPT ); Thu, 31 Dec 2020 12:25:17 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 8BBE422475; Thu, 31 Dec 2020 17:24:08 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435450; bh=3Vhg56eWWqr7CVwxIZSbNWU/dThl7owwJz4vEc1demQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=jjzoa9nUflNaSYVqhXl/5Y18BRnLZvZTf7hoVa2I1oXGJqK7V3hGc7/Cma+AYDmV7 ub33rs2dzTdQU8SL4Bb0bziXYxS2Y8xy3OCa551TupS/qv6Y+dmbtsq/W5dKHJRQYI zJFvftSqJE2GhN/XGEfmn8ylMDCeb8UQOU5y+vSBISN1ysTvyZYkbKzBnijUU/stjV GmHcbF+cZIH6YlhE8K4wPoqnfjlvnORJKwuZYc75z0RyJRh2LJR0l1KFjtEvmOY5FT hhQQMY/lgGzxDQN46+5nAmf7b8apXeaPZ45+THxAYjun/7EVXhnhJcIApMUfTBm3h5 523bDvz31eQMA== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 09/21] crypto: x86/cast6 - drop CTR mode implementation Date: Thu, 31 Dec 2020 18:23:25 +0100 Message-Id: <20201231172337.23073-10-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org CAST6 in CTR mode is never used by the kernel directly, and is highly unlikely to be relied upon by dm-crypt or algif_skcipher. So let's drop the accelerated CTR mode implementation, and instead, rely on the CTR template and the bare cipher. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel --- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 28 ------------ arch/x86/crypto/cast6_avx_glue.c | 48 -------------------- crypto/Kconfig | 1 + 3 files changed, 1 insertion(+), 76 deletions(-) diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 0c1ea836215a..fbddcecc3e3f 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -410,31 +410,3 @@ SYM_FUNC_START(cast6_cbc_dec_8way) FRAME_END ret; SYM_FUNC_END(cast6_cbc_dec_8way) - -SYM_FUNC_START(cast6_ctr_8way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (little endian, 128bit) - */ - FRAME_BEGIN - pushq %r12; - pushq %r15 - - movq %rdi, CTX; - movq %rsi, %r11; - movq %rdx, %r12; - - load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, - RD2, RX, RKR, RKM); - - call __cast6_enc_blk8; - - store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - popq %r15; - popq %r12; - FRAME_END - ret; -SYM_FUNC_END(cast6_ctr_8way) diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 5a21d3e9041c..790efcb6df3b 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -23,8 +23,6 @@ asmlinkage void cast6_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void cast6_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void cast6_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void cast6_ctr_8way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -32,19 +30,6 @@ static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, return cast6_setkey(&tfm->base, key, keylen); } -static void cast6_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblk; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - le128_to_be128(&ctrblk, iv); - le128_inc(iv); - - __cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); - u128_xor(dst, src, (u128 *)&ctrblk); -} - static const struct common_glue_ctx cast6_enc = { .num_funcs = 2, .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, @@ -58,19 +43,6 @@ static const struct common_glue_ctx cast6_enc = { } } }; -static const struct common_glue_ctx cast6_ctr = { - .num_funcs = 2, - .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .ctr = cast6_ctr_8way } - }, { - .num_blocks = 1, - .fn_u = { .ctr = cast6_crypt_ctr } - } } -}; - static const struct common_glue_ctx cast6_dec = { .num_funcs = 2, .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, @@ -117,11 +89,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&cast6_ctr, req); -} - static struct skcipher_alg cast6_algs[] = { { .base.cra_name = "__ecb(cast6)", @@ -150,21 +117,6 @@ static struct skcipher_alg cast6_algs[] = { .setkey = cast6_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(cast6)", - .base.cra_driver_name = "__ctr-cast6-avx", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct cast6_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .chunksize = CAST6_BLOCK_SIZE, - .setkey = cast6_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/crypto/Kconfig b/crypto/Kconfig index fed73fff5a65..3f51c5dfc2a9 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1397,6 +1397,7 @@ config CRYPTO_CAST6_AVX_X86_64 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SIMD imply CRYPTO_XTS + imply CRYPTO_CTR help The CAST6 encryption algorithm (synonymous with CAST-256) is described in RFC2612. From patchwork Thu Dec 31 17:23:26 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994291 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id AB8B0C4332B for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 8A5122246B for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726540AbgLaRZS (ORCPT ); Thu, 31 Dec 2020 12:25:18 -0500 Received: from mail.kernel.org ([198.145.29.99]:55068 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726917AbgLaRZR (ORCPT ); Thu, 31 Dec 2020 12:25:17 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id E4E1022482; Thu, 31 Dec 2020 17:24:10 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435452; bh=MWJ+uVTwSBTinYo4I5XVzRSYZeX+sYaZRc5m+1X1ejs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=aFTdPIhBuRYxHkC4+5wOEsBGTDIVrP5Ou67R4R1RyFAJYPlTQEg9KORT8D+ueHbMv 8+nM9XrafwG3eCpIYkXhYq2q1abuaG2Ztt1mr4S4R5IhIn1wd+kdcDD8vZVKpaD1Ck A0zPc8lFJn56JfTvKvn2l+aXoykceAFPKjQC0vcQkVECI5kfEZesbnwoKjt7/PB9iy k+DjZPm/9Eej1w0+n8U2AQQB8S6EBDsX2ufMzW3FAyag56okDEuCmiWEzcWWOa33lS yRXlmmwVk+cJtw0Xl8F1AQzxw0vIqn9CoJEFqwS1auaetp+CT5gzb5xc30Jl2NUzim AfPix/1gT+tsg== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 10/21] crypto: x86/twofish - drop CTR mode implementation Date: Thu, 31 Dec 2020 18:23:26 +0100 Message-Id: <20201231172337.23073-11-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Twofish in CTR mode is never used by the kernel directly, and is highly unlikely to be relied upon by dm-crypt or algif_skcipher. So let's drop the accelerated CTR mode implementation, and instead, rely on the CTR template and the bare cipher. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel --- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 27 ------- arch/x86/crypto/twofish_avx_glue.c | 38 ---------- arch/x86/crypto/twofish_glue_3way.c | 78 -------------------- arch/x86/include/asm/crypto/twofish.h | 4 - crypto/Kconfig | 2 + 5 files changed, 2 insertions(+), 147 deletions(-) diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index 84e61ef03638..37e63b3c664e 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -374,30 +374,3 @@ SYM_FUNC_START(twofish_cbc_dec_8way) FRAME_END ret; SYM_FUNC_END(twofish_cbc_dec_8way) - -SYM_FUNC_START(twofish_ctr_8way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (little endian, 128bit) - */ - FRAME_BEGIN - - pushq %r12; - - movq %rsi, %r11; - movq %rdx, %r12; - - load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, - RD2, RX0, RX1, RY0); - - call __twofish_enc_blk8; - - store_ctr_8way(%r12, %r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); - - popq %r12; - - FRAME_END - ret; -SYM_FUNC_END(twofish_ctr_8way) diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 7b539bbb108f..13f810b61034 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -25,8 +25,6 @@ asmlinkage void twofish_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void twofish_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void twofish_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void twofish_ctr_8way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -55,22 +53,6 @@ static const struct common_glue_ctx twofish_enc = { } } }; -static const struct common_glue_ctx twofish_ctr = { - .num_funcs = 3, - .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ctr = twofish_ctr_8way } - }, { - .num_blocks = 3, - .fn_u = { .ctr = twofish_enc_blk_ctr_3way } - }, { - .num_blocks = 1, - .fn_u = { .ctr = twofish_enc_blk_ctr } - } } -}; - static const struct common_glue_ctx twofish_dec = { .num_funcs = 3, .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, @@ -123,11 +105,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&twofish_ctr, req); -} - static struct skcipher_alg twofish_algs[] = { { .base.cra_name = "__ecb(twofish)", @@ -156,21 +133,6 @@ static struct skcipher_alg twofish_algs[] = { .setkey = twofish_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(twofish)", - .base.cra_driver_name = "__ctr-twofish-avx", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct twofish_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .chunksize = TF_BLOCK_SIZE, - .setkey = twofish_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 768af6075479..88252370db0a 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -30,12 +30,6 @@ static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) __twofish_enc_blk_3way(ctx, dst, src, false); } -static inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst, - const u8 *src) -{ - __twofish_enc_blk_3way(ctx, dst, src, true); -} - void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s) { u128 ivs[2]; @@ -52,46 +46,6 @@ void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s) } EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); -void twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblk; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - if (dst != src) - *dst = *src; - - le128_to_be128(&ctrblk, iv); - le128_inc(iv); - - twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); - u128_xor(dst, dst, (u128 *)&ctrblk); -} -EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); - -void twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblks[3]; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - if (dst != src) { - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - } - - le128_to_be128(&ctrblks[0], iv); - le128_inc(iv); - le128_to_be128(&ctrblks[1], iv); - le128_inc(iv); - le128_to_be128(&ctrblks[2], iv); - le128_inc(iv); - - twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); -} -EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); - static const struct common_glue_ctx twofish_enc = { .num_funcs = 2, .fpu_blocks_limit = -1, @@ -105,19 +59,6 @@ static const struct common_glue_ctx twofish_enc = { } } }; -static const struct common_glue_ctx twofish_ctr = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 3, - .fn_u = { .ctr = twofish_enc_blk_ctr_3way } - }, { - .num_blocks = 1, - .fn_u = { .ctr = twofish_enc_blk_ctr } - } } -}; - static const struct common_glue_ctx twofish_dec = { .num_funcs = 2, .fpu_blocks_limit = -1, @@ -164,11 +105,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&twofish_ctr, req); -} - static struct skcipher_alg tf_skciphers[] = { { .base.cra_name = "ecb(twofish)", @@ -195,20 +131,6 @@ static struct skcipher_alg tf_skciphers[] = { .setkey = twofish_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "ctr(twofish)", - .base.cra_driver_name = "ctr-twofish-3way", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct twofish_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .chunksize = TF_BLOCK_SIZE, - .setkey = twofish_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index 2c377a8042e1..12df400e6d53 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -17,9 +17,5 @@ asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src); /* helpers from twofish_x86_64-3way module */ extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src); -extern void twofish_enc_blk_ctr(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -extern void twofish_enc_blk_ctr_3way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); #endif /* ASM_X86_TWOFISH_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index 3f51c5dfc2a9..606f94079f05 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1680,6 +1680,7 @@ config CRYPTO_TWOFISH_586 depends on (X86 || UML_X86) && !64BIT select CRYPTO_ALGAPI select CRYPTO_TWOFISH_COMMON + imply CRYPTO_CTR help Twofish cipher algorithm. @@ -1696,6 +1697,7 @@ config CRYPTO_TWOFISH_X86_64 depends on (X86 || UML_X86) && 64BIT select CRYPTO_ALGAPI select CRYPTO_TWOFISH_COMMON + imply CRYPTO_CTR help Twofish cipher algorithm (x86_64). From patchwork Thu Dec 31 17:23:27 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994295 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id EB43BC43331 for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id C12072246B for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726917AbgLaRZT (ORCPT ); Thu, 31 Dec 2020 12:25:19 -0500 Received: from mail.kernel.org ([198.145.29.99]:55082 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727008AbgLaRZT (ORCPT ); Thu, 31 Dec 2020 12:25:19 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 493D122483; Thu, 31 Dec 2020 17:24:12 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435455; bh=LZJ5rT51wGaVrRzcx+bBG8RjvAZl7QeBAeeLpY6Y61o=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ti9uoHWu5lLCiytbQe7648trnlnImnJaLzb3VgQisH6cz2Uw5hfwz0EU3zV+HS+io QYEWe3KmUp8fvb20kFmS8KLjEb6xGcm2GO381EE8OEo/98tF20qSUL/luYIcGr8Jb5 5dv1mED0Z50+Tz+uErLVfCxuYbtS3fvNjrp4SLjYFQL3WjLpHy19QrdSfAWdcH7WZs Sb66yWssAppPibT9ohWmooT4FvQEpfaMnraG5IerAehFLQUXjzVMAeBCUU7jY5ATZc OM32xNFw+OphRNLs9beD7POuf1snqCxgwM58j3uwQCYQ7q4dZ4UyNWOkcABJYLUwlt MjSEVMcUZHwdA== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 11/21] crypto: x86/glue-helper - drop CTR helper routines Date: Thu, 31 Dec 2020 18:23:27 +0100 Message-Id: <20201231172337.23073-12-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org The glue helper's CTR routines are no longer used, so drop them. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel --- arch/x86/crypto/glue_helper-asm-avx.S | 45 ------------ arch/x86/crypto/glue_helper-asm-avx2.S | 58 ---------------- arch/x86/crypto/glue_helper.c | 72 -------------------- arch/x86/include/asm/crypto/glue_helper.h | 32 --------- 4 files changed, 207 deletions(-) diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S index a94511432803..3da385271227 100644 --- a/arch/x86/crypto/glue_helper-asm-avx.S +++ b/arch/x86/crypto/glue_helper-asm-avx.S @@ -34,48 +34,3 @@ vpxor (5*16)(src), x6, x6; \ vpxor (6*16)(src), x7, x7; \ store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); - -#define inc_le128(x, minus_one, tmp) \ - vpcmpeqq minus_one, x, tmp; \ - vpsubq minus_one, x, x; \ - vpslldq $8, tmp, tmp; \ - vpsubq tmp, x, x; - -#define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \ - vpcmpeqd t0, t0, t0; \ - vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \ - vmovdqa bswap, t1; \ - \ - /* load IV and byteswap */ \ - vmovdqu (iv), x7; \ - vpshufb t1, x7, x0; \ - \ - /* construct IVs */ \ - inc_le128(x7, t0, t2); \ - vpshufb t1, x7, x1; \ - inc_le128(x7, t0, t2); \ - vpshufb t1, x7, x2; \ - inc_le128(x7, t0, t2); \ - vpshufb t1, x7, x3; \ - inc_le128(x7, t0, t2); \ - vpshufb t1, x7, x4; \ - inc_le128(x7, t0, t2); \ - vpshufb t1, x7, x5; \ - inc_le128(x7, t0, t2); \ - vpshufb t1, x7, x6; \ - inc_le128(x7, t0, t2); \ - vmovdqa x7, t2; \ - vpshufb t1, x7, x7; \ - inc_le128(t2, t0, t1); \ - vmovdqu t2, (iv); - -#define store_ctr_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \ - vpxor (0*16)(src), x0, x0; \ - vpxor (1*16)(src), x1, x1; \ - vpxor (2*16)(src), x2, x2; \ - vpxor (3*16)(src), x3, x3; \ - vpxor (4*16)(src), x4, x4; \ - vpxor (5*16)(src), x5, x5; \ - vpxor (6*16)(src), x6, x6; \ - vpxor (7*16)(src), x7, x7; \ - store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); diff --git a/arch/x86/crypto/glue_helper-asm-avx2.S b/arch/x86/crypto/glue_helper-asm-avx2.S index 456bface1e5d..c77e9049431f 100644 --- a/arch/x86/crypto/glue_helper-asm-avx2.S +++ b/arch/x86/crypto/glue_helper-asm-avx2.S @@ -37,61 +37,3 @@ vpxor (5*32+16)(src), x6, x6; \ vpxor (6*32+16)(src), x7, x7; \ store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); - -#define inc_le128(x, minus_one, tmp) \ - vpcmpeqq minus_one, x, tmp; \ - vpsubq minus_one, x, x; \ - vpslldq $8, tmp, tmp; \ - vpsubq tmp, x, x; - -#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \ - vpcmpeqq minus_one, x, tmp1; \ - vpcmpeqq minus_two, x, tmp2; \ - vpsubq minus_two, x, x; \ - vpor tmp2, tmp1, tmp1; \ - vpslldq $8, tmp1, tmp1; \ - vpsubq tmp1, x, x; - -#define load_ctr_16way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t0x, t1, \ - t1x, t2, t2x, t3, t3x, t4, t5) \ - vpcmpeqd t0, t0, t0; \ - vpsrldq $8, t0, t0; /* ab: -1:0 ; cd: -1:0 */ \ - vpaddq t0, t0, t4; /* ab: -2:0 ; cd: -2:0 */\ - \ - /* load IV and byteswap */ \ - vmovdqu (iv), t2x; \ - vmovdqa t2x, t3x; \ - inc_le128(t2x, t0x, t1x); \ - vbroadcasti128 bswap, t1; \ - vinserti128 $1, t2x, t3, t2; /* ab: le0 ; cd: le1 */ \ - vpshufb t1, t2, x0; \ - \ - /* construct IVs */ \ - add2_le128(t2, t0, t4, t3, t5); /* ab: le2 ; cd: le3 */ \ - vpshufb t1, t2, x1; \ - add2_le128(t2, t0, t4, t3, t5); \ - vpshufb t1, t2, x2; \ - add2_le128(t2, t0, t4, t3, t5); \ - vpshufb t1, t2, x3; \ - add2_le128(t2, t0, t4, t3, t5); \ - vpshufb t1, t2, x4; \ - add2_le128(t2, t0, t4, t3, t5); \ - vpshufb t1, t2, x5; \ - add2_le128(t2, t0, t4, t3, t5); \ - vpshufb t1, t2, x6; \ - add2_le128(t2, t0, t4, t3, t5); \ - vpshufb t1, t2, x7; \ - vextracti128 $1, t2, t2x; \ - inc_le128(t2x, t0x, t3x); \ - vmovdqu t2x, (iv); - -#define store_ctr_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \ - vpxor (0*32)(src), x0, x0; \ - vpxor (1*32)(src), x1, x1; \ - vpxor (2*32)(src), x2, x2; \ - vpxor (3*32)(src), x3, x3; \ - vpxor (4*32)(src), x4, x4; \ - vpxor (5*32)(src), x5, x5; \ - vpxor (6*32)(src), x6, x6; \ - vpxor (7*32)(src), x7, x7; \ - store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 786ffda1caf4..895d34150c3f 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -6,8 +6,6 @@ * * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: * Copyright (c) 2006 Herbert Xu - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten */ #include @@ -154,74 +152,4 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, } EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit); -int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req) -{ - void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); - const unsigned int bsize = 128 / 8; - struct skcipher_walk walk; - bool fpu_enabled = false; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes) >= bsize) { - const u128 *src = walk.src.virt.addr; - u128 *dst = walk.dst.virt.addr; - unsigned int func_bytes, num_blocks; - unsigned int i; - le128 ctrblk; - - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - &walk, fpu_enabled, nbytes); - - be128_to_le128(&ctrblk, (be128 *)walk.iv); - - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - if (nbytes < func_bytes) - continue; - - /* Process multi-block batch */ - do { - gctx->funcs[i].fn_u.ctr(ctx, (u8 *)dst, - (const u8 *)src, - &ctrblk); - src += num_blocks; - dst += num_blocks; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); - - if (nbytes < bsize) - break; - } - - le128_to_be128((be128 *)walk.iv, &ctrblk); - err = skcipher_walk_done(&walk, nbytes); - } - - glue_fpu_end(fpu_enabled); - - if (nbytes) { - le128 ctrblk; - u128 tmp; - - be128_to_le128(&ctrblk, (be128 *)walk.iv); - memcpy(&tmp, walk.src.virt.addr, nbytes); - gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, (u8 *)&tmp, - (const u8 *)&tmp, - &ctrblk); - memcpy(walk.dst.virt.addr, &tmp, nbytes); - le128_to_be128((be128 *)walk.iv, &ctrblk); - - err = skcipher_walk_done(&walk, 0); - } - - return err; -} -EXPORT_SYMBOL_GPL(glue_ctr_req_128bit); - MODULE_LICENSE("GPL"); diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 62680775d189..23e09efd2aa6 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -9,19 +9,15 @@ #include #include #include -#include typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src); typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src); -typedef void (*common_glue_ctr_func_t)(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); struct common_glue_func_entry { unsigned int num_blocks; /* number of blocks that @fn will process */ union { common_glue_func_t ecb; common_glue_cbc_func_t cbc; - common_glue_ctr_func_t ctr; } fn_u; }; @@ -66,31 +62,6 @@ static inline void glue_fpu_end(bool fpu_enabled) kernel_fpu_end(); } -static inline void le128_to_be128(be128 *dst, const le128 *src) -{ - dst->a = cpu_to_be64(le64_to_cpu(src->a)); - dst->b = cpu_to_be64(le64_to_cpu(src->b)); -} - -static inline void be128_to_le128(le128 *dst, const be128 *src) -{ - dst->a = cpu_to_le64(be64_to_cpu(src->a)); - dst->b = cpu_to_le64(be64_to_cpu(src->b)); -} - -static inline void le128_inc(le128 *i) -{ - u64 a = le64_to_cpu(i->a); - u64 b = le64_to_cpu(i->b); - - b++; - if (!b) - a++; - - i->a = cpu_to_le64(a); - i->b = cpu_to_le64(b); -} - extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req); @@ -100,7 +71,4 @@ extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req); -extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req); - #endif /* _CRYPTO_GLUE_HELPER_H */ From patchwork Thu Dec 31 17:23:28 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994289 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id CCB2DC4332D for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id AB02A2242A for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727029AbgLaRZT (ORCPT ); Thu, 31 Dec 2020 12:25:19 -0500 Received: from mail.kernel.org ([198.145.29.99]:55084 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726917AbgLaRZT (ORCPT ); Thu, 31 Dec 2020 12:25:19 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 16B6A224B0; Thu, 31 Dec 2020 17:24:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435458; bh=fKXmKV7pdES/OQRIcaMDkJbEJVlbbOHwf4jakfmXbDQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=QopIQTLpit1qnAsn7GhhtK/76reefE38uMsASl86aFttrM/VitDeK64Jjl27QnuKH w0cRBtZOhSJkpg1Rm1msv1DJPgxj0XgW4NwRI5cQ30s/1ASHWAKDvIbbocV6UI8QlP KGKBfPiH1v00DhHVtibqxR3zIQ4jDZHSrhhHHJomQd98urrf8BvD3Q/HmbJYS4+vRF sljm4HcVt5thSNtOXpNoHQo+8TNuuu3Ry2cdyL6C29MI4mWnu35+Z2xPdixUygjHck B2zigR/b5s0EXKyBECZRbaZlw/WJrXt5R5Y7TfPnyM+3cbTOBtp4J2Y/yR+T2u60BE VF5O5FX2wjunw== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 12/21] crypto: x86/des - drop CTR mode implementation Date: Thu, 31 Dec 2020 18:23:28 +0100 Message-Id: <20201231172337.23073-13-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org DES or Triple DES in counter mode is never used in the kernel, so there is no point in keeping an accelerated implementation around. Signed-off-by: Ard Biesheuvel Acked-by: Eric Biggers --- arch/x86/crypto/des3_ede_glue.c | 104 -------------------- crypto/Kconfig | 1 + 2 files changed, 1 insertion(+), 104 deletions(-) diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index 89830e531350..e7cb68a3db3b 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -6,8 +6,6 @@ * * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: * Copyright (c) 2006 Herbert Xu - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten */ #include @@ -253,94 +251,6 @@ static int cbc_decrypt(struct skcipher_request *req) return err; } -static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx, - struct skcipher_walk *walk) -{ - u8 *ctrblk = walk->iv; - u8 keystream[DES3_EDE_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - des3_ede_enc_blk(ctx, keystream, ctrblk); - crypto_xor_cpy(dst, keystream, src, nbytes); - - crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE); -} - -static unsigned int __ctr_crypt(struct des3_ede_x86_ctx *ctx, - struct skcipher_walk *walk) -{ - unsigned int bsize = DES3_EDE_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - __be64 *src = (__be64 *)walk->src.virt.addr; - __be64 *dst = (__be64 *)walk->dst.virt.addr; - u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); - __be64 ctrblocks[3]; - - /* Process four block batch */ - if (nbytes >= bsize * 3) { - do { - /* create ctrblks for parallel encrypt */ - ctrblocks[0] = cpu_to_be64(ctrblk++); - ctrblocks[1] = cpu_to_be64(ctrblk++); - ctrblocks[2] = cpu_to_be64(ctrblk++); - - des3_ede_enc_blk_3way(ctx, (u8 *)ctrblocks, - (u8 *)ctrblocks); - - dst[0] = src[0] ^ ctrblocks[0]; - dst[1] = src[1] ^ ctrblocks[1]; - dst[2] = src[2] ^ ctrblocks[2]; - - src += 3; - dst += 3; - } while ((nbytes -= bsize * 3) >= bsize * 3); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - ctrblocks[0] = cpu_to_be64(ctrblk++); - - des3_ede_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); - - dst[0] = src[0] ^ ctrblocks[0]; - - src += 1; - dst += 1; - } while ((nbytes -= bsize) >= bsize); - -done: - *(__be64 *)walk->iv = cpu_to_be64(ctrblk); - return nbytes; -} - -static int ctr_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) { - nbytes = __ctr_crypt(ctx, &walk); - err = skcipher_walk_done(&walk, nbytes); - } - - if (nbytes) { - ctr_crypt_final(ctx, &walk); - err = skcipher_walk_done(&walk, 0); - } - - return err; -} - static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { @@ -428,20 +338,6 @@ static struct skcipher_alg des3_ede_skciphers[] = { .setkey = des3_ede_x86_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "ctr(des3_ede)", - .base.cra_driver_name = "ctr-des3_ede-asm", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - .chunksize = DES3_EDE_BLOCK_SIZE, - .setkey = des3_ede_x86_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, } }; diff --git a/crypto/Kconfig b/crypto/Kconfig index 606f94079f05..5e820a57d138 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1427,6 +1427,7 @@ config CRYPTO_DES3_EDE_X86_64 depends on X86 && 64BIT select CRYPTO_SKCIPHER select CRYPTO_LIB_DES + imply CRYPTO_CTR help Triple DES EDE (FIPS 46-3) algorithm. From patchwork Thu Dec 31 17:23:29 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994293 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 011F0C4332E for ; Thu, 31 Dec 2020 17:25:31 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D4B44223E8 for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727134AbgLaRZV (ORCPT ); Thu, 31 Dec 2020 12:25:21 -0500 Received: from mail.kernel.org ([198.145.29.99]:55086 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727008AbgLaRZV (ORCPT ); Thu, 31 Dec 2020 12:25:21 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 6AB7E224B2; Thu, 31 Dec 2020 17:24:19 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435461; bh=Tp36mZIS84xfM5CYG/KD99bEcfDFJ5aHV/dLNt3SrXQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=KyQbR3pcNWtR/mUaRR8b+oDxqXZvkY3vdBGK/qdFQcR3z+DSkUjX4QJTs5tSbTi2Y HeN1qkS1zTC4XvRC6Am1QcZS9QoqBFysUbZOjzupv2NPYBOaXdcas9VC+o39h8Kzpe 2Wz1tL8iel2zsm3GmSnaSQZ+11p87LgF/cd1WOvspUbfUSwM/fkd9Wa1C58KFgxyE3 7Q1ug/GUxK/bY+OtOgIAAAvQgogZgXZC1bDA/rAvqS9XG6dtl1WQaO95AbzkEH4qZ/ c9I4w/RbUTG42Y2fB1+mvK4MZ2bLmIakUuvotOUdujiL57C9BAY3zToY39va0dXXwv r6rIj4jzc2IHg== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 13/21] crypto: x86/blowfish - drop CTR mode implementation Date: Thu, 31 Dec 2020 18:23:29 +0100 Message-Id: <20201231172337.23073-14-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Blowfish in counter mode is never used in the kernel, so there is no point in keeping an accelerated implementation around. Signed-off-by: Ard Biesheuvel Acked-by: Eric Biggers --- arch/x86/crypto/blowfish_glue.c | 107 -------------------- crypto/Kconfig | 1 + 2 files changed, 1 insertion(+), 107 deletions(-) diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index cedfdba69ce3..a880e0b1c255 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -6,8 +6,6 @@ * * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: * Copyright (c) 2006 Herbert Xu - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten */ #include @@ -247,97 +245,6 @@ static int cbc_decrypt(struct skcipher_request *req) return err; } -static void ctr_crypt_final(struct bf_ctx *ctx, struct skcipher_walk *walk) -{ - u8 *ctrblk = walk->iv; - u8 keystream[BF_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - blowfish_enc_blk(ctx, keystream, ctrblk); - crypto_xor_cpy(dst, keystream, src, nbytes); - - crypto_inc(ctrblk, BF_BLOCK_SIZE); -} - -static unsigned int __ctr_crypt(struct bf_ctx *ctx, struct skcipher_walk *walk) -{ - unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); - __be64 ctrblocks[4]; - - /* Process four block batch */ - if (nbytes >= bsize * 4) { - do { - if (dst != src) { - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - } - - /* create ctrblks for parallel encrypt */ - ctrblocks[0] = cpu_to_be64(ctrblk++); - ctrblocks[1] = cpu_to_be64(ctrblk++); - ctrblocks[2] = cpu_to_be64(ctrblk++); - ctrblocks[3] = cpu_to_be64(ctrblk++); - - blowfish_enc_blk_xor_4way(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += 4; - dst += 4; - } while ((nbytes -= bsize * 4) >= bsize * 4); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (dst != src) - *dst = *src; - - ctrblocks[0] = cpu_to_be64(ctrblk++); - - blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); - - src += 1; - dst += 1; - } while ((nbytes -= bsize) >= bsize); - -done: - *(__be64 *)walk->iv = cpu_to_be64(ctrblk); - return nbytes; -} - -static int ctr_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { - nbytes = __ctr_crypt(ctx, &walk); - err = skcipher_walk_done(&walk, nbytes); - } - - if (nbytes) { - ctr_crypt_final(ctx, &walk); - err = skcipher_walk_done(&walk, 0); - } - - return err; -} - static struct crypto_alg bf_cipher_alg = { .cra_name = "blowfish", .cra_driver_name = "blowfish-asm", @@ -384,20 +291,6 @@ static struct skcipher_alg bf_skcipher_algs[] = { .setkey = blowfish_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "ctr(blowfish)", - .base.cra_driver_name = "ctr-blowfish-asm", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct bf_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .chunksize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/crypto/Kconfig b/crypto/Kconfig index 5e820a57d138..24c0e001d06d 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1255,6 +1255,7 @@ config CRYPTO_BLOWFISH_X86_64 depends on X86 && 64BIT select CRYPTO_SKCIPHER select CRYPTO_BLOWFISH_COMMON + imply CRYPTO_CTR help Blowfish cipher algorithm (x86_64), by Bruce Schneier. From patchwork Thu Dec 31 17:23:30 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994299 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3506AC43332 for ; Thu, 31 Dec 2020 17:25:31 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 00FFD2246B for ; Thu, 31 Dec 2020 17:25:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727008AbgLaRZV (ORCPT ); Thu, 31 Dec 2020 12:25:21 -0500 Received: from mail.kernel.org ([198.145.29.99]:55088 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727094AbgLaRZV (ORCPT ); Thu, 31 Dec 2020 12:25:21 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id F32C7224BD; Thu, 31 Dec 2020 17:24:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435464; bh=KhyhXF1Bg7LO728Rqjs0ZDxux3uY4R6U1lxcsB2KOy8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=PI6/yGyYqlLq+gZX9kDwCz4h8kZUKYWyzP4enyV1AURrZ83EyHJmSBOujWTAdPhy1 0shaZoOEnu0HQ0d3UBXN8bm+ox5KmIABmBMnQIhodFuNR5Kaz5oGioiBoW+bc71EwU 1wGTDPh/rCyeWfFyz/6aoqEmZPmqEU8Ej/bGuy9IUOdoZcQ1Eeeb4xuFmea/of7S+G MDgyjPv1d5VDeicQX1We6O6EAewx3w4BO7i9URIQNZb/rukYkoxJXCzEojPAyZCw6W 8P0k7ra17E+HslG2ykEjaUfe2MRcjJoi3LIk21P/UAkJUgbJY+7zKN5YgQwwbX/0s0 cQ9kkxnjohvKQ== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 14/21] crypto: x86 - add some helper macros for ECB and CBC modes Date: Thu, 31 Dec 2020 18:23:30 +0100 Message-Id: <20201231172337.23073-15-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org The x86 glue helper module has started to show its age: - It relies heavily on function pointers to invoke asm helper functions that operate on fixed input sizes that are relatively small. This means the performance is severely impacted by retpolines. - It goes to great lengths to amortize the cost of kernel_fpu_begin()/end() over as much work as possible, which is no longer necessary now that FPU save/restore is done lazily, and doing so may cause unbounded scheduling blackouts due to the fact that enabling the FPU in kernel mode disables preemption. - The CBC mode decryption helper makes backward strides through the input, in order to avoid a single block size memcpy() between chunks. Consuming the input in this manner is highly likely to defeat any hardware prefetchers, so it is better to go through the data linearly, and perform the extra memcpy() where needed (which is turned into direct loads and stores by the compiler anyway). Note that benchmarks won't show this effect, given that the memory they use is always cache hot. GCC does not seem to be smart enough to elide the indirect calls when the function pointers are passed as arguments to static inline helper routines modeled after the existing ones. So instead, let's create some CPP macros that encapsulate the core of the ECB and CBC processing, so we can wire them up for existing users of the glue helper module, i.e., Camellia, Serpent, Twofish and CAST6. Signed-off-by: Ard Biesheuvel Acked-by: Eric Biggers --- arch/x86/crypto/ecb_cbc_helpers.h | 71 ++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/arch/x86/crypto/ecb_cbc_helpers.h b/arch/x86/crypto/ecb_cbc_helpers.h new file mode 100644 index 000000000000..c4e6c0f50bf5 --- /dev/null +++ b/arch/x86/crypto/ecb_cbc_helpers.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _CRYPTO_ECB_CBC_HELPER_H +#define _CRYPTO_ECB_CBC_HELPER_H + +#include +#include + +/* + * Mode helpers to instantiate parameterized skcipher ECB/CBC modes without + * having to rely on indirect calls and retpolines. + */ + +#define ECB_WALK_START(req, bsize, fpu_blocks) do { \ + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); \ + const int __bsize = (bsize); \ + struct skcipher_walk walk; \ + int err = skcipher_walk_virt(&walk, (req), false); \ + while (walk.nbytes > 0) { \ + unsigned int nbytes = walk.nbytes; \ + bool do_fpu = (fpu_blocks) != -1 && \ + nbytes >= (fpu_blocks) * __bsize; \ + const u8 *src = walk.src.virt.addr; \ + u8 *dst = walk.dst.virt.addr; \ + u8 __maybe_unused buf[(bsize)]; \ + if (do_fpu) kernel_fpu_begin() + +#define CBC_WALK_START(req, bsize, fpu_blocks) \ + ECB_WALK_START(req, bsize, fpu_blocks) + +#define ECB_WALK_ADVANCE(blocks) do { \ + dst += (blocks) * __bsize; \ + src += (blocks) * __bsize; \ + nbytes -= (blocks) * __bsize; \ +} while (0) + +#define ECB_BLOCK(blocks, func) do \ + while (nbytes >= (blocks) * __bsize) { \ + (func)(ctx, dst, src); \ + ECB_WALK_ADVANCE(blocks); \ + } while (0) + +#define CBC_ENC_BLOCK(func) do \ + while (nbytes >= __bsize) { \ + crypto_xor_cpy(dst, src, walk.iv, __bsize); \ + (func)(ctx, dst, dst); \ + memcpy(walk.iv, dst, __bsize); \ + ECB_WALK_ADVANCE(1); \ + } while (0) + +#define CBC_DEC_BLOCK(blocks, func) do \ + while (nbytes >= (blocks) * __bsize) { \ + const u8 *__s = src + ((blocks) - 1) * __bsize; \ + if (dst == src) \ + __s = memcpy(buf, __s, __bsize); \ + (func)(ctx, dst, src); \ + crypto_xor(dst, walk.iv, __bsize); \ + memcpy(walk.iv, __s, __bsize); \ + ECB_WALK_ADVANCE(blocks); \ + } while (0) + +#define ECB_WALK_END() \ + if (do_fpu) kernel_fpu_end(); \ + err = skcipher_walk_done(&walk, nbytes); \ + } \ + return err; \ +} while (0) + +#define CBC_WALK_END() ECB_WALK_END() + +#endif From patchwork Thu Dec 31 17:23:31 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994307 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3BA6EC432C3 for ; Thu, 31 Dec 2020 17:25:31 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 17BA4223E8 for ; Thu, 31 Dec 2020 17:25:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727140AbgLaRZa (ORCPT ); Thu, 31 Dec 2020 12:25:30 -0500 Received: from mail.kernel.org ([198.145.29.99]:55028 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727094AbgLaRZ3 (ORCPT ); Thu, 31 Dec 2020 12:25:29 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 623A6224D2; Thu, 31 Dec 2020 17:24:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435466; bh=kHQp5VXN9RcfosAkXgXkihxIjrqsHUOoSM9ko0R2Q3c=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=LfiqT114xI3ozoEVYkkMJwhSQMs6MVWqLKK32STyTdRn0u6Qjw9lrJdGMlt9bQ63i 1s33PvNKYI3P3kHGKOx70cAnZ37PBFL+HPTB7dh2vP/FEMzM+mbRLbGmNnNZ2zvBpF rbtrY9OE8CcKq9pVQ7xQhgyRCL8aWxTmf89wrkrZKbxTwHOZ2DrXkJ28Gv5wshISw0 xCCU1mYHvoCK2nHoAPiVNmeBx0a9VJGIrRNwPoSVhxpLLfdUecki2ZUc07KU7p7GCx Z+gA/87aVvLD/1TzOc8vDG+W/8IFtkFZ6IbXoO9Q+Bvi51O8N9O/3V2ZHoY9b8VoYN 2tkirFhDP0Hqw== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 15/21] crypto: x86/camellia - drop dependency on glue helper Date: Thu, 31 Dec 2020 18:23:31 +0100 Message-Id: <20201231172337.23073-16-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Replace the glue helper dependency with implementations of ECB and CBC based on the new CPP macros, which avoid the need for indirect calls. Signed-off-by: Ard Biesheuvel Acked-by: Eric Biggers --- arch/x86/crypto/camellia_aesni_avx2_glue.c | 85 ++++++-------------- arch/x86/crypto/camellia_aesni_avx_glue.c | 73 +++++------------ arch/x86/crypto/camellia_glue.c | 61 ++++---------- crypto/Kconfig | 2 - 4 files changed, 60 insertions(+), 161 deletions(-) diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 8f25a2a6222e..ef5c0f094584 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -14,6 +13,8 @@ #include #include +#include "ecb_cbc_helpers.h" + #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32 @@ -23,63 +24,6 @@ asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src); -static const struct common_glue_ctx camellia_enc = { - .num_funcs = 4, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .ecb = camellia_ecb_enc_32way } - }, { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = camellia_ecb_enc_16way } - }, { - .num_blocks = 2, - .fn_u = { .ecb = camellia_enc_blk_2way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = camellia_enc_blk } - } } -}; - -static const struct common_glue_ctx camellia_dec = { - .num_funcs = 4, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .ecb = camellia_ecb_dec_32way } - }, { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = camellia_ecb_dec_16way } - }, { - .num_blocks = 2, - .fn_u = { .ecb = camellia_dec_blk_2way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = camellia_dec_blk } - } } -}; - -static const struct common_glue_ctx camellia_dec_cbc = { - .num_funcs = 4, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .cbc = camellia_cbc_dec_32way } - }, { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .cbc = camellia_cbc_dec_16way } - }, { - .num_blocks = 2, - .fn_u = { .cbc = camellia_decrypt_cbc_2way } - }, { - .num_blocks = 1, - .fn_u = { .cbc = camellia_dec_blk } - } } -}; - static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -88,22 +32,39 @@ static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&camellia_enc, req); + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, camellia_ecb_enc_32way); + ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_enc_16way); + ECB_BLOCK(2, camellia_enc_blk_2way); + ECB_BLOCK(1, camellia_enc_blk); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&camellia_dec, req); + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, camellia_ecb_dec_32way); + ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_dec_16way); + ECB_BLOCK(2, camellia_dec_blk_2way); + ECB_BLOCK(1, camellia_dec_blk); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req); + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(camellia_enc_blk); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, camellia_cbc_dec_32way); + CBC_DEC_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_cbc_dec_16way); + CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way); + CBC_DEC_BLOCK(1, camellia_dec_blk); + CBC_WALK_END(); } static struct skcipher_alg camellia_algs[] = { diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 22a89cdfedfb..68fed0a79889 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -14,6 +13,8 @@ #include #include +#include "ecb_cbc_helpers.h" + #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 /* 16-way parallel cipher functions (avx/aes-ni) */ @@ -26,54 +27,6 @@ EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way); asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way); -static const struct common_glue_ctx camellia_enc = { - .num_funcs = 3, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = camellia_ecb_enc_16way } - }, { - .num_blocks = 2, - .fn_u = { .ecb = camellia_enc_blk_2way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = camellia_enc_blk } - } } -}; - -static const struct common_glue_ctx camellia_dec = { - .num_funcs = 3, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = camellia_ecb_dec_16way } - }, { - .num_blocks = 2, - .fn_u = { .ecb = camellia_dec_blk_2way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = camellia_dec_blk } - } } -}; - -static const struct common_glue_ctx camellia_dec_cbc = { - .num_funcs = 3, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .cbc = camellia_cbc_dec_16way } - }, { - .num_blocks = 2, - .fn_u = { .cbc = camellia_decrypt_cbc_2way } - }, { - .num_blocks = 1, - .fn_u = { .cbc = camellia_dec_blk } - } } -}; - static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -82,22 +35,36 @@ static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&camellia_enc, req); + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_enc_16way); + ECB_BLOCK(2, camellia_enc_blk_2way); + ECB_BLOCK(1, camellia_enc_blk); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&camellia_dec, req); + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_dec_16way); + ECB_BLOCK(2, camellia_dec_blk_2way); + ECB_BLOCK(1, camellia_dec_blk); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req); + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(camellia_enc_blk); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_cbc_dec_16way); + CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way); + CBC_DEC_BLOCK(1, camellia_dec_blk); + CBC_WALK_END(); } static struct skcipher_alg camellia_algs[] = { diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index fefeedf2b33d..6c314bb46211 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -15,7 +15,8 @@ #include #include #include -#include + +#include "ecb_cbc_helpers.h" /* regular block cipher functions */ asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, @@ -1274,63 +1275,35 @@ void camellia_decrypt_cbc_2way(const void *ctx, u8 *d, const u8 *s) } EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way); -static const struct common_glue_ctx camellia_enc = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 2, - .fn_u = { .ecb = camellia_enc_blk_2way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = camellia_enc_blk } - } } -}; - -static const struct common_glue_ctx camellia_dec = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 2, - .fn_u = { .ecb = camellia_dec_blk_2way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = camellia_dec_blk } - } } -}; - -static const struct common_glue_ctx camellia_dec_cbc = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 2, - .fn_u = { .cbc = camellia_decrypt_cbc_2way } - }, { - .num_blocks = 1, - .fn_u = { .cbc = camellia_dec_blk } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&camellia_enc, req); + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + ECB_BLOCK(2, camellia_enc_blk_2way); + ECB_BLOCK(1, camellia_enc_blk); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&camellia_dec, req); + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + ECB_BLOCK(2, camellia_dec_blk_2way); + ECB_BLOCK(1, camellia_dec_blk); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req); + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(camellia_enc_blk); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way); + CBC_DEC_BLOCK(1, camellia_dec_blk); + CBC_WALK_END(); } static struct crypto_alg camellia_cipher_alg = { diff --git a/crypto/Kconfig b/crypto/Kconfig index 24c0e001d06d..f8518ff389bb 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1286,7 +1286,6 @@ config CRYPTO_CAMELLIA_X86_64 depends on X86 && 64BIT depends on CRYPTO select CRYPTO_SKCIPHER - select CRYPTO_GLUE_HELPER_X86 imply CRYPTO_CTR help Camellia cipher algorithm module (x86_64). @@ -1305,7 +1304,6 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64 depends on CRYPTO select CRYPTO_SKCIPHER select CRYPTO_CAMELLIA_X86_64 - select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SIMD imply CRYPTO_XTS help From patchwork Thu Dec 31 17:23:32 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994311 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id CD0F7C433DB for ; Thu, 31 Dec 2020 17:25:56 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 899CE223DB for ; Thu, 31 Dec 2020 17:25:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726618AbgLaRZ4 (ORCPT ); Thu, 31 Dec 2020 12:25:56 -0500 Received: from mail.kernel.org ([198.145.29.99]:55272 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727143AbgLaRZz (ORCPT ); Thu, 31 Dec 2020 12:25:55 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 7A5BE224D3; Thu, 31 Dec 2020 17:24:26 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435468; bh=VqT/ENg3u52jjY/a1U5tgh7ke9DkjPAElNY3tRpx3NM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=m6Tp6zv5prdHthjRKG32D4k4XILtnS5IgVMRPW6Pj5ELKpei0VB4wW9Xin6XLLhKf 0/GiyqDJCNllE1tRRe6GPI8muw9G6JKrnwgMxKFYqj8LeMDSkschboHj3doefcZlCP 1NqhgZQahANGLiJtEGNwlGx35aeJs5r2BbtUn5vPUjQe/qNUNMHgez48DicKey1eA3 javx6Qa3GTqq8Vzv55lcAQd7Yn7rUU7FubLbGGaHblPGthnrxni6Nrkwumt9TxcITh zP+EOobqx6G6RPNPfdG0o95X0RzFQfh7GOe8rdiri7doFF49WUa4QrUq5T9Odzm3FV I1X25SwfIyFyw== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 16/21] crypto: x86/serpent - drop dependency on glue helper Date: Thu, 31 Dec 2020 18:23:32 +0100 Message-Id: <20201231172337.23073-17-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Replace the glue helper dependency with implementations of ECB and CBC based on the new CPP macros, which avoid the need for indirect calls. Signed-off-by: Ard Biesheuvel Acked-by: Eric Biggers --- arch/x86/crypto/serpent_avx2_glue.c | 73 +++++------------- arch/x86/crypto/serpent_avx_glue.c | 61 ++++----------- arch/x86/crypto/serpent_sse2_glue.c | 81 ++++++-------------- crypto/Kconfig | 3 - 4 files changed, 61 insertions(+), 157 deletions(-) diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 28e542c6512a..261c9ac2d762 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -12,9 +12,10 @@ #include #include #include -#include #include +#include "ecb_cbc_helpers.h" + #define SERPENT_AVX2_PARALLEL_BLOCKS 16 /* 16-way AVX2 parallel cipher functions */ @@ -28,72 +29,38 @@ static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); } -static const struct common_glue_ctx serpent_enc = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .ecb = serpent_ecb_enc_16way } - }, { - .num_blocks = 8, - .fn_u = { .ecb = serpent_ecb_enc_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __serpent_encrypt } - } } -}; - -static const struct common_glue_ctx serpent_dec = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .ecb = serpent_ecb_dec_16way } - }, { - .num_blocks = 8, - .fn_u = { .ecb = serpent_ecb_dec_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __serpent_decrypt } - } } -}; - -static const struct common_glue_ctx serpent_dec_cbc = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .cbc = serpent_cbc_dec_16way } - }, { - .num_blocks = 8, - .fn_u = { .cbc = serpent_cbc_dec_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .cbc = __serpent_decrypt } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&serpent_enc, req); + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_AVX2_PARALLEL_BLOCKS, serpent_ecb_enc_16way); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_enc_8way_avx); + ECB_BLOCK(1, __serpent_encrypt); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&serpent_dec, req); + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_AVX2_PARALLEL_BLOCKS, serpent_ecb_dec_16way); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_dec_8way_avx); + ECB_BLOCK(1, __serpent_decrypt); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req); + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__serpent_encrypt); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(SERPENT_AVX2_PARALLEL_BLOCKS, serpent_cbc_dec_16way); + CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_cbc_dec_8way_avx); + CBC_DEC_BLOCK(1, __serpent_decrypt); + CBC_WALK_END(); } static struct skcipher_alg serpent_algs[] = { diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index aa4605baf9d4..5fe01d2a5b1d 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -15,9 +15,10 @@ #include #include #include -#include #include +#include "ecb_cbc_helpers.h" + /* 8-way parallel cipher functions */ asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src); @@ -37,63 +38,35 @@ static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); } -static const struct common_glue_ctx serpent_enc = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = serpent_ecb_enc_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __serpent_encrypt } - } } -}; - -static const struct common_glue_ctx serpent_dec = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = serpent_ecb_dec_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __serpent_decrypt } - } } -}; - -static const struct common_glue_ctx serpent_dec_cbc = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .cbc = serpent_cbc_dec_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .cbc = __serpent_decrypt } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&serpent_enc, req); + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_enc_8way_avx); + ECB_BLOCK(1, __serpent_encrypt); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&serpent_dec, req); + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_dec_8way_avx); + ECB_BLOCK(1, __serpent_decrypt); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req); + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__serpent_encrypt); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_cbc_dec_8way_avx); + CBC_DEC_BLOCK(1, __serpent_decrypt); + CBC_WALK_END(); } static struct skcipher_alg serpent_algs[] = { diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 9acb3bf28feb..e28d60949c16 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -21,7 +21,8 @@ #include #include #include -#include + +#include "ecb_cbc_helpers.h" static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -29,80 +30,46 @@ static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); } -static void serpent_decrypt_cbc_xway(const void *ctx, u8 *d, const u8 *s) +static void serpent_decrypt_cbc_xway(const void *ctx, u8 *dst, const u8 *src) { - u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - unsigned int j; - - for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) - ivs[j] = src[j]; + u8 buf[SERPENT_PARALLEL_BLOCKS - 1][SERPENT_BLOCK_SIZE]; + const u8 *s = src; - serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); - - for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) - u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); + if (dst == src) + s = memcpy(buf, src, sizeof(buf)); + serpent_dec_blk_xway(ctx, dst, src); + crypto_xor(dst + SERPENT_BLOCK_SIZE, s, sizeof(buf)); } -static const struct common_glue_ctx serpent_enc = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = serpent_enc_blk_xway } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __serpent_encrypt } - } } -}; - -static const struct common_glue_ctx serpent_dec = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = serpent_dec_blk_xway } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __serpent_decrypt } - } } -}; - -static const struct common_glue_ctx serpent_dec_cbc = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .cbc = serpent_decrypt_cbc_xway } - }, { - .num_blocks = 1, - .fn_u = { .cbc = __serpent_decrypt } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&serpent_enc, req); + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_enc_blk_xway); + ECB_BLOCK(1, __serpent_encrypt); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&serpent_dec, req); + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_dec_blk_xway); + ECB_BLOCK(1, __serpent_decrypt); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(__serpent_encrypt, - req); + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__serpent_encrypt); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_decrypt_cbc_xway); + CBC_DEC_BLOCK(1, __serpent_decrypt); + CBC_WALK_END(); } static struct skcipher_alg serpent_algs[] = { diff --git a/crypto/Kconfig b/crypto/Kconfig index f8518ff389bb..29dce7efc443 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1538,7 +1538,6 @@ config CRYPTO_SERPENT_SSE2_X86_64 tristate "Serpent cipher algorithm (x86_64/SSE2)" depends on X86 && 64BIT select CRYPTO_SKCIPHER - select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SIMD imply CRYPTO_CTR @@ -1558,7 +1557,6 @@ config CRYPTO_SERPENT_SSE2_586 tristate "Serpent cipher algorithm (i586/SSE2)" depends on X86 && !64BIT select CRYPTO_SKCIPHER - select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SIMD imply CRYPTO_CTR @@ -1578,7 +1576,6 @@ config CRYPTO_SERPENT_AVX_X86_64 tristate "Serpent cipher algorithm (x86_64/AVX)" depends on X86 && 64BIT select CRYPTO_SKCIPHER - select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SIMD imply CRYPTO_XTS From patchwork Thu Dec 31 17:23:33 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994301 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4F2A5C43333 for ; Thu, 31 Dec 2020 17:25:31 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 2CD5A22475 for ; Thu, 31 Dec 2020 17:25:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727094AbgLaRZa (ORCPT ); Thu, 31 Dec 2020 12:25:30 -0500 Received: from mail.kernel.org ([198.145.29.99]:55026 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727143AbgLaRZ3 (ORCPT ); Thu, 31 Dec 2020 12:25:29 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 7C902224D4; Thu, 31 Dec 2020 17:24:28 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435470; bh=D3KoFnBPOxh7Ap8tw/fyu7/B3ZdVchNqvUDwSUQME9I=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=TVpoHVCWeB1L4QRR9pS1C4PD80yfLneTmKeegSc6Ik/k12RmT4JDdCnGX5PpMLoM9 6vX8ZK9Zofiq3UInbRmcgCB3L7Zbg2rry4z2oIOtokkF2adWgVcvBc1hz2OAGXu8lr KFKqZjtY/Rsgx3L6bBiTAO10kEajYUM0YhemivuU89cc0lCvucy4+fG8DMYXcTVxf3 VgOx753D+/qGGnC+113X0NC/uT1zr7ETU50/tof7rWnKJc/R1k9vOsSRS7TtGqR64Z OdMQsbgFAGzzxkRo/H04AvNHmh4qSmZA6aWqzcvDFvcK+A8p/x6c4Ve/UNkMXUSk39 Xq4UrLgMGc+Qg== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 17/21] crypto: x86/cast5 - drop dependency on glue helper Date: Thu, 31 Dec 2020 18:23:33 +0100 Message-Id: <20201231172337.23073-18-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Replace the glue helper dependency with implementations of ECB and CBC based on the new CPP macros, which avoid the need for indirect calls. Signed-off-by: Ard Biesheuvel Acked-by: Eric Biggers --- arch/x86/crypto/cast5_avx_glue.c | 184 ++------------------ 1 file changed, 17 insertions(+), 167 deletions(-) diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index e0d1c7903b29..3976a87f92ad 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -6,7 +6,6 @@ * */ -#include #include #include #include @@ -15,6 +14,8 @@ #include #include +#include "ecb_cbc_helpers.h" + #define CAST5_PARALLEL_BLOCKS 16 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, @@ -30,186 +31,35 @@ static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, return cast5_setkey(&tfm->base, key, keylen); } -static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk, - unsigned int nbytes) -{ - return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, - walk, fpu_enabled, nbytes); -} - -static inline void cast5_fpu_end(bool fpu_enabled) -{ - return glue_fpu_end(fpu_enabled); -} - -static int ecb_crypt(struct skcipher_request *req, bool enc) -{ - bool fpu_enabled = false; - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - const unsigned int bsize = CAST5_BLOCK_SIZE; - unsigned int nbytes; - void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - u8 *wsrc = walk.src.virt.addr; - u8 *wdst = walk.dst.virt.addr; - - fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); - - /* Process multi-block batch */ - if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { - fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; - do { - fn(ctx, wdst, wsrc); - - wsrc += bsize * CAST5_PARALLEL_BLOCKS; - wdst += bsize * CAST5_PARALLEL_BLOCKS; - nbytes -= bsize * CAST5_PARALLEL_BLOCKS; - } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - fn = (enc) ? __cast5_encrypt : __cast5_decrypt; - - /* Handle leftovers */ - do { - fn(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = skcipher_walk_done(&walk, nbytes); - } - - cast5_fpu_end(fpu_enabled); - return err; -} - static int ecb_encrypt(struct skcipher_request *req) { - return ecb_crypt(req, true); + ECB_WALK_START(req, CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS); + ECB_BLOCK(CAST5_PARALLEL_BLOCKS, cast5_ecb_enc_16way); + ECB_BLOCK(1, __cast5_encrypt); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return ecb_crypt(req, false); + ECB_WALK_START(req, CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS); + ECB_BLOCK(CAST5_PARALLEL_BLOCKS, cast5_ecb_dec_16way); + ECB_BLOCK(1, __cast5_decrypt); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - const unsigned int bsize = CAST5_BLOCK_SIZE; - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - u64 *src = (u64 *)walk.src.virt.addr; - u64 *dst = (u64 *)walk.dst.virt.addr; - u64 *iv = (u64 *)walk.iv; - - do { - *dst = *src ^ *iv; - __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - src++; - dst++; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u64 *)walk.iv = *iv; - err = skcipher_walk_done(&walk, nbytes); - } - - return err; -} - -static unsigned int __cbc_decrypt(struct cast5_ctx *ctx, - struct skcipher_walk *walk) -{ - const unsigned int bsize = CAST5_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 last_iv; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process multi-block batch */ - if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { - do { - nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); - src -= CAST5_PARALLEL_BLOCKS - 1; - dst -= CAST5_PARALLEL_BLOCKS - 1; - - cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); - } - - /* Handle leftovers */ - for (;;) { - __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } - -done: - *dst ^= *(u64 *)walk->iv; - *(u64 *)walk->iv = last_iv; - - return nbytes; + CBC_WALK_START(req, CAST5_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__cast5_encrypt); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); - bool fpu_enabled = false; - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); - nbytes = __cbc_decrypt(ctx, &walk); - err = skcipher_walk_done(&walk, nbytes); - } - - cast5_fpu_end(fpu_enabled); - return err; + CBC_WALK_START(req, CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(CAST5_PARALLEL_BLOCKS, cast5_cbc_dec_16way); + CBC_DEC_BLOCK(1, __cast5_decrypt); + CBC_WALK_END(); } static struct skcipher_alg cast5_algs[] = { From patchwork Thu Dec 31 17:23:34 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994309 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id AD426C433E0 for ; Thu, 31 Dec 2020 17:25:55 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 7D480223E4 for ; Thu, 31 Dec 2020 17:25:55 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727146AbgLaRZy (ORCPT ); Thu, 31 Dec 2020 12:25:54 -0500 Received: from mail.kernel.org ([198.145.29.99]:55282 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727193AbgLaRZy (ORCPT ); Thu, 31 Dec 2020 12:25:54 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 76066224DF; Thu, 31 Dec 2020 17:24:30 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435472; bh=PL/JmqseT6kVRCDd+a/4yxEhjYHHk5KCkaqlNFXVpLs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=YZ7uROC5//Lu84Aztilv/k4tdkcvFfINcmAcT6sJO89ke3mv+kUDFUtkUlkw24swr cNKU6tECf0c1Mlww+9FrCYztN24+MwjfbZgJwUyyndMp06mX39HIA49pL9zOKvB7Mh zV1IAJUYDuXSECJsEH/fw+8kcYSXKzANLfcD47FDYPE9xnBLsnJpSNddET+ZDHjOT0 ReXKBWINiWo6j8WZAPknk/ufmaTbqVGTrqRc7KfLPnEIHF3WrhHXv1U638ui6lB7FL ygH2f/wy8VGM41RJNtx9xk2Xi3GCHjaor2vplf/7M9ZiVoBpMEnT2EjVV3uL3Cohr3 iWb9wShCynT8A== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 18/21] crypto: x86/cast6 - drop dependency on glue helper Date: Thu, 31 Dec 2020 18:23:34 +0100 Message-Id: <20201231172337.23073-19-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Replace the glue helper dependency with implementations of ECB and CBC based on the new CPP macros, which avoid the need for indirect calls. Signed-off-by: Ard Biesheuvel Acked-by: Eric Biggers --- arch/x86/crypto/cast6_avx_glue.c | 61 ++++++-------------- crypto/Kconfig | 1 - 2 files changed, 17 insertions(+), 45 deletions(-) diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 790efcb6df3b..7e2aea372349 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -15,7 +15,8 @@ #include #include #include -#include + +#include "ecb_cbc_helpers.h" #define CAST6_PARALLEL_BLOCKS 8 @@ -30,63 +31,35 @@ static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, return cast6_setkey(&tfm->base, key, keylen); } -static const struct common_glue_ctx cast6_enc = { - .num_funcs = 2, - .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .ecb = cast6_ecb_enc_8way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __cast6_encrypt } - } } -}; - -static const struct common_glue_ctx cast6_dec = { - .num_funcs = 2, - .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .ecb = cast6_ecb_dec_8way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __cast6_decrypt } - } } -}; - -static const struct common_glue_ctx cast6_dec_cbc = { - .num_funcs = 2, - .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .cbc = cast6_cbc_dec_8way } - }, { - .num_blocks = 1, - .fn_u = { .cbc = __cast6_decrypt } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&cast6_enc, req); + ECB_WALK_START(req, CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS); + ECB_BLOCK(CAST6_PARALLEL_BLOCKS, cast6_ecb_enc_8way); + ECB_BLOCK(1, __cast6_encrypt); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&cast6_dec, req); + ECB_WALK_START(req, CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS); + ECB_BLOCK(CAST6_PARALLEL_BLOCKS, cast6_ecb_dec_8way); + ECB_BLOCK(1, __cast6_decrypt); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(__cast6_encrypt, req); + CBC_WALK_START(req, CAST6_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__cast6_encrypt); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req); + CBC_WALK_START(req, CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(CAST6_PARALLEL_BLOCKS, cast6_cbc_dec_8way); + CBC_DEC_BLOCK(1, __cast6_decrypt); + CBC_WALK_END(); } static struct skcipher_alg cast6_algs[] = { diff --git a/crypto/Kconfig b/crypto/Kconfig index 29dce7efc443..25101558acb5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1393,7 +1393,6 @@ config CRYPTO_CAST6_AVX_X86_64 select CRYPTO_SKCIPHER select CRYPTO_CAST6 select CRYPTO_CAST_COMMON - select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SIMD imply CRYPTO_XTS imply CRYPTO_CTR From patchwork Thu Dec 31 17:23:35 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994303 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 27CC9C43217 for ; Thu, 31 Dec 2020 17:25:32 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id F01D6223E8 for ; Thu, 31 Dec 2020 17:25:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727147AbgLaRZb (ORCPT ); Thu, 31 Dec 2020 12:25:31 -0500 Received: from mail.kernel.org ([198.145.29.99]:55052 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727143AbgLaRZb (ORCPT ); Thu, 31 Dec 2020 12:25:31 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 7450F224F4; Thu, 31 Dec 2020 17:24:32 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435473; bh=QswN+cSXOleQTv7jKXaF6pdOiygStZk2qsiKIqB19hE=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=b93NIClbcXdLr3aWWCvKG+3zNpaQAfHK9J0ax4iMV7sxTrPS76kL6q2t0om0eF19u lewiBaW09+DMamylm6et0AwNJbPxe5j/tAN6vzpu6WVOvASJCi4FXvUEuGDwjEuXWn Sc8qpg3kuqKULqo4QMHvIr+BcWMuxp1V2OzEDZzNriDUGNlRc5sD0/N3h5oqNCsX25 MCBJOUibpO9RbG2B5DUPKGEHbci6wbOSIodO37Sgu9/NzpSgtToAu/LN/7XKXTq95m yCyCE2qqzxBNsxFhm4r92gRaUk5oVctpa47D4bq0fROHKSdeSMTOmVpzkH+12RAw7e 2UWGPcoeTAWoA== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 19/21] crypto: x86/twofish - drop dependency on glue helper Date: Thu, 31 Dec 2020 18:23:35 +0100 Message-Id: <20201231172337.23073-20-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Replace the glue helper dependency with implementations of ECB and CBC based on the new CPP macros, which avoid the need for indirect calls. Signed-off-by: Ard Biesheuvel Acked-by: Eric Biggers --- arch/x86/crypto/twofish_avx_glue.c | 73 +++++------------- arch/x86/crypto/twofish_glue_3way.c | 80 ++++++-------------- crypto/Kconfig | 2 - 3 files changed, 44 insertions(+), 111 deletions(-) diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 13f810b61034..6ce198f808a5 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -15,9 +15,10 @@ #include #include #include -#include #include +#include "ecb_cbc_helpers.h" + #define TWOFISH_PARALLEL_BLOCKS 8 /* 8-way parallel cipher functions */ @@ -37,72 +38,38 @@ static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) __twofish_enc_blk_3way(ctx, dst, src, false); } -static const struct common_glue_ctx twofish_enc = { - .num_funcs = 3, - .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ecb = twofish_ecb_enc_8way } - }, { - .num_blocks = 3, - .fn_u = { .ecb = twofish_enc_blk_3way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = twofish_enc_blk } - } } -}; - -static const struct common_glue_ctx twofish_dec = { - .num_funcs = 3, - .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ecb = twofish_ecb_dec_8way } - }, { - .num_blocks = 3, - .fn_u = { .ecb = twofish_dec_blk_3way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = twofish_dec_blk } - } } -}; - -static const struct common_glue_ctx twofish_dec_cbc = { - .num_funcs = 3, - .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .cbc = twofish_cbc_dec_8way } - }, { - .num_blocks = 3, - .fn_u = { .cbc = twofish_dec_blk_cbc_3way } - }, { - .num_blocks = 1, - .fn_u = { .cbc = twofish_dec_blk } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&twofish_enc, req); + ECB_WALK_START(req, TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS); + ECB_BLOCK(TWOFISH_PARALLEL_BLOCKS, twofish_ecb_enc_8way); + ECB_BLOCK(3, twofish_enc_blk_3way); + ECB_BLOCK(1, twofish_enc_blk); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&twofish_dec, req); + ECB_WALK_START(req, TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS); + ECB_BLOCK(TWOFISH_PARALLEL_BLOCKS, twofish_ecb_dec_8way); + ECB_BLOCK(3, twofish_dec_blk_3way); + ECB_BLOCK(1, twofish_dec_blk); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req); + CBC_WALK_START(req, TF_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(twofish_enc_blk); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); + CBC_WALK_START(req, TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(TWOFISH_PARALLEL_BLOCKS, twofish_cbc_dec_8way); + CBC_DEC_BLOCK(3, twofish_dec_blk_cbc_3way); + CBC_DEC_BLOCK(1, twofish_dec_blk); + CBC_WALK_END(); } static struct skcipher_alg twofish_algs[] = { diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 88252370db0a..d1fdefa5195a 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -5,17 +5,16 @@ * Copyright (c) 2011 Jussi Kivilinna */ -#include #include #include -#include -#include #include #include #include #include #include +#include "ecb_cbc_helpers.h" + EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); @@ -30,79 +29,48 @@ static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) __twofish_enc_blk_3way(ctx, dst, src, false); } -void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s) +void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src) { - u128 ivs[2]; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - ivs[0] = src[0]; - ivs[1] = src[1]; + u8 buf[2][TF_BLOCK_SIZE]; + const u8 *s = src; - twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); + if (dst == src) + s = memcpy(buf, src, sizeof(buf)); + twofish_dec_blk_3way(ctx, dst, src); + crypto_xor(dst + TF_BLOCK_SIZE, s, sizeof(buf)); - u128_xor(&dst[1], &dst[1], &ivs[0]); - u128_xor(&dst[2], &dst[2], &ivs[1]); } EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); -static const struct common_glue_ctx twofish_enc = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 3, - .fn_u = { .ecb = twofish_enc_blk_3way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = twofish_enc_blk } - } } -}; - -static const struct common_glue_ctx twofish_dec = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 3, - .fn_u = { .ecb = twofish_dec_blk_3way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = twofish_dec_blk } - } } -}; - -static const struct common_glue_ctx twofish_dec_cbc = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 3, - .fn_u = { .cbc = twofish_dec_blk_cbc_3way } - }, { - .num_blocks = 1, - .fn_u = { .cbc = twofish_dec_blk } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&twofish_enc, req); + ECB_WALK_START(req, TF_BLOCK_SIZE, -1); + ECB_BLOCK(3, twofish_enc_blk_3way); + ECB_BLOCK(1, twofish_enc_blk); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&twofish_dec, req); + ECB_WALK_START(req, TF_BLOCK_SIZE, -1); + ECB_BLOCK(3, twofish_dec_blk_3way); + ECB_BLOCK(1, twofish_dec_blk); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req); + CBC_WALK_START(req, TF_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(twofish_enc_blk); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); + CBC_WALK_START(req, TF_BLOCK_SIZE, -1); + CBC_DEC_BLOCK(3, twofish_dec_blk_cbc_3way); + CBC_DEC_BLOCK(1, twofish_dec_blk); + CBC_WALK_END(); } static struct skcipher_alg tf_skciphers[] = { diff --git a/crypto/Kconfig b/crypto/Kconfig index 25101558acb5..b2182658c55e 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1711,7 +1711,6 @@ config CRYPTO_TWOFISH_X86_64_3WAY select CRYPTO_SKCIPHER select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 - select CRYPTO_GLUE_HELPER_X86 help Twofish cipher algorithm (x86_64, 3-way parallel). @@ -1730,7 +1729,6 @@ config CRYPTO_TWOFISH_AVX_X86_64 tristate "Twofish cipher algorithm (x86_64/AVX)" depends on X86 && 64BIT select CRYPTO_SKCIPHER - select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SIMD select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 From patchwork Thu Dec 31 17:23:36 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994313 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2CA2EC433E6 for ; Thu, 31 Dec 2020 17:25:58 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id F071A223DB for ; Thu, 31 Dec 2020 17:25:57 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726830AbgLaRZ5 (ORCPT ); Thu, 31 Dec 2020 12:25:57 -0500 Received: from mail.kernel.org ([198.145.29.99]:55290 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727169AbgLaRZ5 (ORCPT ); Thu, 31 Dec 2020 12:25:57 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 5993A20BED; Thu, 31 Dec 2020 17:24:34 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435475; bh=86byDVqEQEXyFgp0u1q84vaJaVqCNNrA8X/2php+esA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=e4xDxAVCQnVKciwkhfUGqH4No7Okr/ASq4ls1lAEkMeOvAlqJn3fAcc5sBVhxZBrz 7rkdBay6iwUtizsKLnxGcb04EqoXd/m8Q8GaxMJ0aw347mz8E7pJBBv66bkYW7Z9l4 QbtNcFCEm6HcPWoKX/KCLXi7+cM7tmSH45v4ZxWlQZ0/kIF4Lzs9gqwKlOePHndxzs OAb/ms3XBxa8iDe8xpNURd5cAJBFIGHJCgp0XyYwEuvUDEeMd/1g9diWAzLNIpL2l/ /D8OCzh1BDq+eTc+hSo3ly3Pnqdsm1sZaPHWDLxoD9LxGhwuHKIwLV52yT14iFiwqZ uuKVGz/UlZslA== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 20/21] crypto: x86 - remove glue helper module Date: Thu, 31 Dec 2020 18:23:36 +0100 Message-Id: <20201231172337.23073-21-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org All dependencies on the x86 glue helper module have been replaced by local instantiations of the new ECB/CBC preprocessor helper macros, so the glue helper module can be retired. Signed-off-by: Ard Biesheuvel Acked-by: Eric Biggers Reported-by: kernel test robot --- arch/x86/crypto/Makefile | 2 - arch/x86/crypto/glue_helper.c | 155 -------------------- arch/x86/include/asm/crypto/glue_helper.h | 74 ---------- crypto/Kconfig | 5 - crypto/skcipher.c | 6 - include/crypto/internal/skcipher.h | 1 - 6 files changed, 243 deletions(-) diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index a31de0c6ccde..b28e36b7c96b 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -4,8 +4,6 @@ OBJECT_FILES_NON_STANDARD := y -obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o - obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c deleted file mode 100644 index 895d34150c3f..000000000000 --- a/arch/x86/crypto/glue_helper.c +++ /dev/null @@ -1,155 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Shared glue code for 128bit block ciphers - * - * Copyright © 2012-2013 Jussi Kivilinna - * - * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: - * Copyright (c) 2006 Herbert Xu - */ - -#include -#include -#include -#include -#include - -int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req) -{ - void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); - const unsigned int bsize = 128 / 8; - struct skcipher_walk walk; - bool fpu_enabled = false; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - unsigned int func_bytes; - unsigned int i; - - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - &walk, fpu_enabled, nbytes); - for (i = 0; i < gctx->num_funcs; i++) { - func_bytes = bsize * gctx->funcs[i].num_blocks; - - if (nbytes < func_bytes) - continue; - - /* Process multi-block batch */ - do { - gctx->funcs[i].fn_u.ecb(ctx, dst, src); - src += func_bytes; - dst += func_bytes; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); - - if (nbytes < bsize) - break; - } - err = skcipher_walk_done(&walk, nbytes); - } - - glue_fpu_end(fpu_enabled); - return err; -} -EXPORT_SYMBOL_GPL(glue_ecb_req_128bit); - -int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, - struct skcipher_request *req) -{ - void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); - const unsigned int bsize = 128 / 8; - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - const u128 *src = (u128 *)walk.src.virt.addr; - u128 *dst = (u128 *)walk.dst.virt.addr; - u128 *iv = (u128 *)walk.iv; - - do { - u128_xor(dst, src, iv); - fn(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - src++; - dst++; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u128 *)walk.iv = *iv; - err = skcipher_walk_done(&walk, nbytes); - } - return err; -} -EXPORT_SYMBOL_GPL(glue_cbc_encrypt_req_128bit); - -int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req) -{ - void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); - const unsigned int bsize = 128 / 8; - struct skcipher_walk walk; - bool fpu_enabled = false; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - const u128 *src = walk.src.virt.addr; - u128 *dst = walk.dst.virt.addr; - unsigned int func_bytes, num_blocks; - unsigned int i; - u128 last_iv; - - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - &walk, fpu_enabled, nbytes); - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - if (nbytes < func_bytes) - continue; - - /* Process multi-block batch */ - do { - src -= num_blocks - 1; - dst -= num_blocks - 1; - - gctx->funcs[i].fn_u.cbc(ctx, (u8 *)dst, - (const u8 *)src); - - nbytes -= func_bytes; - if (nbytes < bsize) - goto done; - - u128_xor(dst, dst, --src); - dst--; - } while (nbytes >= func_bytes); - } -done: - u128_xor(dst, dst, (u128 *)walk.iv); - *(u128 *)walk.iv = last_iv; - err = skcipher_walk_done(&walk, nbytes); - } - - glue_fpu_end(fpu_enabled); - return err; -} -EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit); - -MODULE_LICENSE("GPL"); diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h deleted file mode 100644 index 23e09efd2aa6..000000000000 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Shared glue code for 128bit block ciphers - */ - -#ifndef _CRYPTO_GLUE_HELPER_H -#define _CRYPTO_GLUE_HELPER_H - -#include -#include -#include - -typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src); -typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src); - -struct common_glue_func_entry { - unsigned int num_blocks; /* number of blocks that @fn will process */ - union { - common_glue_func_t ecb; - common_glue_cbc_func_t cbc; - } fn_u; -}; - -struct common_glue_ctx { - unsigned int num_funcs; - int fpu_blocks_limit; /* -1 means fpu not needed at all */ - - /* - * First funcs entry must have largest num_blocks and last funcs entry - * must have num_blocks == 1! - */ - struct common_glue_func_entry funcs[]; -}; - -static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, - struct skcipher_walk *walk, - bool fpu_enabled, unsigned int nbytes) -{ - if (likely(fpu_blocks_limit < 0)) - return false; - - if (fpu_enabled) - return true; - - /* - * Vector-registers are only used when chunk to be processed is large - * enough, so do not enable FPU until it is necessary. - */ - if (nbytes < bsize * (unsigned int)fpu_blocks_limit) - return false; - - /* prevent sleeping if FPU is in use */ - skcipher_walk_atomise(walk); - - kernel_fpu_begin(); - return true; -} - -static inline void glue_fpu_end(bool fpu_enabled) -{ - if (fpu_enabled) - kernel_fpu_end(); -} - -extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req); - -extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, - struct skcipher_request *req); - -extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req); - -#endif /* _CRYPTO_GLUE_HELPER_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index b2182658c55e..94f0fde06b94 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -210,11 +210,6 @@ config CRYPTO_SIMD tristate select CRYPTO_CRYPTD -config CRYPTO_GLUE_HELPER_X86 - tristate - depends on X86 - select CRYPTO_SKCIPHER - config CRYPTO_ENGINE tristate diff --git a/crypto/skcipher.c b/crypto/skcipher.c index ff16d05644c7..a15376245416 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -491,12 +491,6 @@ int skcipher_walk_virt(struct skcipher_walk *walk, } EXPORT_SYMBOL_GPL(skcipher_walk_virt); -void skcipher_walk_atomise(struct skcipher_walk *walk) -{ - walk->flags &= ~SKCIPHER_WALK_SLEEP; -} -EXPORT_SYMBOL_GPL(skcipher_walk_atomise); - int skcipher_walk_async(struct skcipher_walk *walk, struct skcipher_request *req) { diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 9dd6c0c17eb8..a2339f80a615 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -133,7 +133,6 @@ int skcipher_walk_done(struct skcipher_walk *walk, int err); int skcipher_walk_virt(struct skcipher_walk *walk, struct skcipher_request *req, bool atomic); -void skcipher_walk_atomise(struct skcipher_walk *walk); int skcipher_walk_async(struct skcipher_walk *walk, struct skcipher_request *req); int skcipher_walk_aead_encrypt(struct skcipher_walk *walk, From patchwork Thu Dec 31 17:23:37 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 11994305 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.0 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 21F6BC43219 for ; Thu, 31 Dec 2020 17:25:33 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id EFB40223E4 for ; Thu, 31 Dec 2020 17:25:32 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727163AbgLaRZc (ORCPT ); Thu, 31 Dec 2020 12:25:32 -0500 Received: from mail.kernel.org ([198.145.29.99]:55050 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727143AbgLaRZc (ORCPT ); Thu, 31 Dec 2020 12:25:32 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 63070223DB; Thu, 31 Dec 2020 17:24:36 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1609435478; bh=OKQ0fd2bs8JpVACCWAKpCv690JDohzIxTKiett1tB0c=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=LfO6hvg5RvBIyJk4eO2xtneHGeHkm/UDL5614+p4YAgxQi97Y51nqykHSZbkscsDP wDIaMKrl69R3xHA2nE8P7LkYyOKR3lOtEuD+S8Sm2d4WgGd6bXI63dW8qXAQcpt/8R +WwXftgPFp9WZt+EMmwrcw+5NqfGfe0R0jESZGTdLcakeLH4+8OHI/l1aCljM3MTw9 T/SEml8IoQNklqS1Xtvv4jBXwWN7n8VA7POPMI0laTY38Rn41f2Z+ySg8sEE3F4neK TL35WRnp5WOZGhhOdWp5YC5XkJf7dtXW6Uiucb4I+UAGEWxCBG9lvkCyOEmwC1RMPy A2L75UgpCjZ2g== From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Megha Dey , Eric Biggers , Herbert Xu , Milan Broz , Mike Snitzer Subject: [PATCH 21/21] crypto: x86 - use local headers for x86 specific shared declarations Date: Thu, 31 Dec 2020 18:23:37 +0100 Message-Id: <20201231172337.23073-22-ardb@kernel.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201231172337.23073-1-ardb@kernel.org> References: <20201231172337.23073-1-ardb@kernel.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org The Camellia, Serpent and Twofish related header files only contain declarations that are shared between different implementations of the respective algorithms residing under arch/x86/crypto, and none of their contents should be used elsewhere. So move the header files into the same location, and use local #includes instead. Signed-off-by: Ard Biesheuvel Acked-by: Eric Biggers --- arch/x86/{include/asm => }/crypto/camellia.h | 0 arch/x86/crypto/camellia_aesni_avx2_glue.c | 2 +- arch/x86/crypto/camellia_aesni_avx_glue.c | 2 +- arch/x86/crypto/camellia_glue.c | 2 +- arch/x86/{include/asm => }/crypto/serpent-avx.h | 0 arch/x86/{include/asm => }/crypto/serpent-sse2.h | 0 arch/x86/crypto/serpent_avx2_glue.c | 2 +- arch/x86/crypto/serpent_avx_glue.c | 2 +- arch/x86/crypto/serpent_sse2_glue.c | 2 +- arch/x86/{include/asm => }/crypto/twofish.h | 0 arch/x86/crypto/twofish_avx_glue.c | 2 +- arch/x86/crypto/twofish_glue_3way.c | 2 +- 12 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/crypto/camellia.h similarity index 100% rename from arch/x86/include/asm/crypto/camellia.h rename to arch/x86/crypto/camellia.h diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index ef5c0f094584..e7e4d64e9577 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -5,7 +5,6 @@ * Copyright © 2013 Jussi Kivilinna */ -#include #include #include #include @@ -13,6 +12,7 @@ #include #include +#include "camellia.h" #include "ecb_cbc_helpers.h" #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 68fed0a79889..c7ccf63e741e 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -5,7 +5,6 @@ * Copyright © 2012-2013 Jussi Kivilinna */ -#include #include #include #include @@ -13,6 +12,7 @@ #include #include +#include "camellia.h" #include "ecb_cbc_helpers.h" #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 6c314bb46211..8ecd796e2afd 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -14,8 +14,8 @@ #include #include #include -#include +#include "camellia.h" #include "ecb_cbc_helpers.h" /* regular block cipher functions */ diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/crypto/serpent-avx.h similarity index 100% rename from arch/x86/include/asm/crypto/serpent-avx.h rename to arch/x86/crypto/serpent-avx.h diff --git a/arch/x86/include/asm/crypto/serpent-sse2.h b/arch/x86/crypto/serpent-sse2.h similarity index 100% rename from arch/x86/include/asm/crypto/serpent-sse2.h rename to arch/x86/crypto/serpent-sse2.h diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 261c9ac2d762..ccf0b5fa4933 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -12,8 +12,8 @@ #include #include #include -#include +#include "serpent-avx.h" #include "ecb_cbc_helpers.h" #define SERPENT_AVX2_PARALLEL_BLOCKS 16 diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 5fe01d2a5b1d..6c248e1ea4ef 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -15,8 +15,8 @@ #include #include #include -#include +#include "serpent-avx.h" #include "ecb_cbc_helpers.h" /* 8-way parallel cipher functions */ diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index e28d60949c16..d78f37e9b2cf 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -20,8 +20,8 @@ #include #include #include -#include +#include "serpent-sse2.h" #include "ecb_cbc_helpers.h" static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/crypto/twofish.h similarity index 100% rename from arch/x86/include/asm/crypto/twofish.h rename to arch/x86/crypto/twofish.h diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 6ce198f808a5..3eb3440b477a 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -15,8 +15,8 @@ #include #include #include -#include +#include "twofish.h" #include "ecb_cbc_helpers.h" #define TWOFISH_PARALLEL_BLOCKS 8 diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index d1fdefa5195a..03725696397c 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -5,7 +5,6 @@ * Copyright (c) 2011 Jussi Kivilinna */ -#include #include #include #include @@ -13,6 +12,7 @@ #include #include +#include "twofish.h" #include "ecb_cbc_helpers.h" EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);