From patchwork Thu Apr 11 16:23:56 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 13626379 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 43CDB39852; Thu, 11 Apr 2024 16:25:45 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712852745; cv=none; b=SlpPqhn2K3O5j4BkJQprBBa3mxZD/hUjlnWOvm82II5VaCePzwYq8nFVXd3mUrd2m5Ndec4AWrkSRZ3+JCCpXnAZ5CD9zZUKOPdjmlU02nhTx/NgHP5JWhO2Df5VU5pi5MX7s1Xo8zItsxwnnZS9v7rOqX2e5efok4TK3of0/oM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712852745; c=relaxed/simple; bh=eXsDIArs6R5/jsyxwL5zTJAABCCAYPruwqOVMEDiLPs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=qm9KwcIA9aTXLVv8iECPsX3FhOiE5MEc/k27V2qk5lY3ZjOv6zm8XmGN0SwEnKE4sDn4g6ThWiVrrDcPsXqRlkggi0iGnyvMbQUsnrjkCSbiofkKw8L0BqkAVjCZILXKDCwNBxzgVBJvKvXfTQeJ7IjUZKWjZ8HBVSpq6hF73cI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=cr+l+LH4; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="cr+l+LH4" Received: by smtp.kernel.org (Postfix) with ESMTPSA id C0877C113CE; Thu, 11 Apr 2024 16:25:44 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1712852744; bh=eXsDIArs6R5/jsyxwL5zTJAABCCAYPruwqOVMEDiLPs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=cr+l+LH4m/4IXUA5EJnO5drYzGgurAjyrwBYaKQfqK7ki2HuLHAOhyby3jSfAD33r T8dMJ2FxuI/oRyYNIDsq9F6MBmlG/t80T2Pps02rw9gFhSmrqz8az/fdODh+7wSmJJ PtmcChBNdyNIQt7cSVO0vY4uu8WXSIBhtq4P41qojjpIKWtQ6ndsSlbgXEi1+QxoX/ QBCALeOGLwShRt3/Hou49KLr7BVjmhFsxZ0XFB/JQ9dCaAICUQM6Z2Pu57w9Z8HjBc VTIEhVpnqByWPAnwaEAKHCIGGFktH9xRNVJtbIMYN/GPsNUpoHD8g5PoFuZEmkiwhY 18Vv/7Tb9KnQQ== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org, Stefan Kanthak Subject: [PATCH v2 1/4] crypto: x86/sha256-ni - convert to use rounds macros Date: Thu, 11 Apr 2024 09:23:56 -0700 Message-ID: <20240411162359.39073-2-ebiggers@kernel.org> X-Mailer: git-send-email 2.44.0 In-Reply-To: <20240411162359.39073-1-ebiggers@kernel.org> References: <20240411162359.39073-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers To avoid source code duplication, do the SHA-256 rounds using macros. This reduces the length of sha256_ni_asm.S by 153 lines while still producing the exact same object file. Signed-off-by: Eric Biggers --- arch/x86/crypto/sha256_ni_asm.S | 211 +++++--------------------------- 1 file changed, 29 insertions(+), 182 deletions(-) diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 537b6dcd7ed8..498f67727b94 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -74,23 +74,43 @@ #define SHUF_MASK %xmm8 #define ABEF_SAVE %xmm9 #define CDGH_SAVE %xmm10 +.macro do_4rounds i, m0, m1, m2, m3 +.if \i < 16 + movdqu \i*4(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, \m0 +.else + movdqa \m0, MSG +.endif + paddd \i*4(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 +.if \i >= 12 && \i < 60 + movdqa \m0, MSGTMP4 + palignr $4, \m3, MSGTMP4 + paddd MSGTMP4, \m1 + sha256msg2 \m0, \m1 +.endif + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 +.if \i >= 4 && \i < 52 + sha256msg1 \m0, \m3 +.endif +.endm + /* * Intel SHA Extensions optimized implementation of a SHA-256 update function * * The function takes a pointer to the current hash values, a pointer to the * input data, and a number of 64 byte blocks to process. Once all blocks have * been processed, the digest pointer is updated with the resulting hash value. * The function only processes complete blocks, there is no functionality to * store partial blocks. All message padding and hash value initialization must * be done outside the update function. * - * The indented lines in the loop are instructions related to rounds processing. - * The non-indented lines are instructions related to the message schedule. - * * void sha256_ni_transform(uint32_t *digest, const void *data, uint32_t numBlocks); * digest : pointer to digest * data: pointer to input data * numBlocks: Number of blocks to process @@ -123,189 +143,16 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) .Lloop0: /* Save hash values for addition after rounds */ movdqa STATE0, ABEF_SAVE movdqa STATE1, CDGH_SAVE - /* Rounds 0-3 */ - movdqu 0*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP0 - paddd 0*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - - /* Rounds 4-7 */ - movdqu 1*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP1 - paddd 1*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 - - /* Rounds 8-11 */ - movdqu 2*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP2 - paddd 2*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 - - /* Rounds 12-15 */ - movdqu 3*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP3 - paddd 3*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP3, MSGTMP4 - palignr $4, MSGTMP2, MSGTMP4 - paddd MSGTMP4, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 - - /* Rounds 16-19 */ - movdqa MSGTMP0, MSG - paddd 4*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP0, MSGTMP4 - palignr $4, MSGTMP3, MSGTMP4 - paddd MSGTMP4, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 - - /* Rounds 20-23 */ - movdqa MSGTMP1, MSG - paddd 5*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP1, MSGTMP4 - palignr $4, MSGTMP0, MSGTMP4 - paddd MSGTMP4, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 - - /* Rounds 24-27 */ - movdqa MSGTMP2, MSG - paddd 6*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP2, MSGTMP4 - palignr $4, MSGTMP1, MSGTMP4 - paddd MSGTMP4, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 - - /* Rounds 28-31 */ - movdqa MSGTMP3, MSG - paddd 7*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP3, MSGTMP4 - palignr $4, MSGTMP2, MSGTMP4 - paddd MSGTMP4, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 - - /* Rounds 32-35 */ - movdqa MSGTMP0, MSG - paddd 8*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP0, MSGTMP4 - palignr $4, MSGTMP3, MSGTMP4 - paddd MSGTMP4, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 - - /* Rounds 36-39 */ - movdqa MSGTMP1, MSG - paddd 9*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP1, MSGTMP4 - palignr $4, MSGTMP0, MSGTMP4 - paddd MSGTMP4, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 - - /* Rounds 40-43 */ - movdqa MSGTMP2, MSG - paddd 10*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP2, MSGTMP4 - palignr $4, MSGTMP1, MSGTMP4 - paddd MSGTMP4, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 - - /* Rounds 44-47 */ - movdqa MSGTMP3, MSG - paddd 11*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP3, MSGTMP4 - palignr $4, MSGTMP2, MSGTMP4 - paddd MSGTMP4, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 - - /* Rounds 48-51 */ - movdqa MSGTMP0, MSG - paddd 12*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP0, MSGTMP4 - palignr $4, MSGTMP3, MSGTMP4 - paddd MSGTMP4, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 - - /* Rounds 52-55 */ - movdqa MSGTMP1, MSG - paddd 13*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP1, MSGTMP4 - palignr $4, MSGTMP0, MSGTMP4 - paddd MSGTMP4, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - - /* Rounds 56-59 */ - movdqa MSGTMP2, MSG - paddd 14*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP2, MSGTMP4 - palignr $4, MSGTMP1, MSGTMP4 - paddd MSGTMP4, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - - /* Rounds 60-63 */ - movdqa MSGTMP3, MSG - paddd 15*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 +.irp i, 0, 16, 32, 48 + do_4rounds (\i + 0), MSGTMP0, MSGTMP1, MSGTMP2, MSGTMP3 + do_4rounds (\i + 4), MSGTMP1, MSGTMP2, MSGTMP3, MSGTMP0 + do_4rounds (\i + 8), MSGTMP2, MSGTMP3, MSGTMP0, MSGTMP1 + do_4rounds (\i + 12), MSGTMP3, MSGTMP0, MSGTMP1, MSGTMP2 +.endr /* Add current hash values with previously saved */ paddd ABEF_SAVE, STATE0 paddd CDGH_SAVE, STATE1 From patchwork Thu Apr 11 16:23:57 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 13626380 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4FD1839FD5; Thu, 11 Apr 2024 16:25:45 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712852745; cv=none; b=LaEjd71QHF3EaqrhFrxb4JpY7GPYgsWM+aBSvJAh5yPy/gTNC23qKTa6wqs2VuQQvPIbVB63kiNGAgCAwrjaQJOAlHbT03a86seEq3Dmg5I2Snuc0YulaSCal++YQ3Bf36WNFopemw/51qpLG+YTCUIH5PXsUG+9BDL5ebTjwCo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712852745; c=relaxed/simple; bh=ybCUIXPNlE2ccFVuwilfNZMMLqA8i2CCsAuoUnxb8ek=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=KwK2sxkqVTRlILOvk4lKQjXo9sPu6N7bS78d8DMDC/nSgiHDobCJRoatLRpHDMzoGc3hQmGxBaKPpLJAdkgG0wvUdd8bdonMP8p9ee7/X6kWBpr9T2GdH/Q22SAknrO2uT6stfyXZ06JCpTQYALkZXreimqY14jQAA0Xt5VeCXM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=qy0r+0S7; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="qy0r+0S7" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0E13AC2BD10; Thu, 11 Apr 2024 16:25:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1712852745; bh=ybCUIXPNlE2ccFVuwilfNZMMLqA8i2CCsAuoUnxb8ek=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=qy0r+0S7Ig8s8KoGcJB4xrta4g4/SXKev2W4UfeR+D8c4h235pHw7Uj9zdycpuI50 UCZ2Hy48ur9zZk2YgeH8Bfi79B/H1JMynR7xL68rF2JnSH2W+KHzKlLPlRjPTNgygs ZcvloBRklELIgBO5+Azjd7TpiFCplFLJYadwCdJGeic30nMK/+I2PrqkaVUbzJdx+/ 4IgReIvt3R6gv8OfN5aOV6D42pizivCkIiXeFw6e6co8DPiKdXaSsB5aIGkKoVmvJQ o6qsUNfCFdHQmPXR+13MlzstqaMHly6WyzccUxgdjrLafi6xCStgtmw7c2czO26W5r DEHKI0bJkW5Eg== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org, Stefan Kanthak Subject: [PATCH v2 2/4] crypto: x86/sha256-ni - rename some register aliases Date: Thu, 11 Apr 2024 09:23:57 -0700 Message-ID: <20240411162359.39073-3-ebiggers@kernel.org> X-Mailer: git-send-email 2.44.0 In-Reply-To: <20240411162359.39073-1-ebiggers@kernel.org> References: <20240411162359.39073-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers MSGTMP[0-3] are used to hold the message schedule and are not temporary registers per se. MSGTMP4 is used as a temporary register for several different purposes and isn't really related to MSGTMP[0-3]. Rename them to MSG[0-3] and TMP accordingly. Also add a comment that clarifies what MSG is. Suggested-by: Stefan Kanthak Signed-off-by: Eric Biggers --- arch/x86/crypto/sha256_ni_asm.S | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 498f67727b94..b7e7001dafdf 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -60,18 +60,18 @@ #define DATA_PTR %rsi /* 2nd arg */ #define NUM_BLKS %rdx /* 3rd arg */ #define SHA256CONSTANTS %rax -#define MSG %xmm0 +#define MSG %xmm0 /* sha256rnds2 implicit operand */ #define STATE0 %xmm1 #define STATE1 %xmm2 -#define MSGTMP0 %xmm3 -#define MSGTMP1 %xmm4 -#define MSGTMP2 %xmm5 -#define MSGTMP3 %xmm6 -#define MSGTMP4 %xmm7 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 +#define TMP %xmm7 #define SHUF_MASK %xmm8 #define ABEF_SAVE %xmm9 #define CDGH_SAVE %xmm10 @@ -85,13 +85,13 @@ movdqa \m0, MSG .endif paddd \i*4(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 .if \i >= 12 && \i < 60 - movdqa \m0, MSGTMP4 - palignr $4, \m3, MSGTMP4 - paddd MSGTMP4, \m1 + movdqa \m0, TMP + palignr $4, \m3, TMP + paddd TMP, \m1 sha256msg2 \m0, \m1 .endif pshufd $0x0E, MSG, MSG sha256rnds2 STATE1, STATE0 .if \i >= 4 && \i < 52 @@ -131,27 +131,27 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) movdqu 0*16(DIGEST_PTR), STATE0 movdqu 1*16(DIGEST_PTR), STATE1 pshufd $0xB1, STATE0, STATE0 /* CDAB */ pshufd $0x1B, STATE1, STATE1 /* EFGH */ - movdqa STATE0, MSGTMP4 + movdqa STATE0, TMP palignr $8, STATE1, STATE0 /* ABEF */ - pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ + pblendw $0xF0, TMP, STATE1 /* CDGH */ movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK lea K256(%rip), SHA256CONSTANTS .Lloop0: /* Save hash values for addition after rounds */ movdqa STATE0, ABEF_SAVE movdqa STATE1, CDGH_SAVE .irp i, 0, 16, 32, 48 - do_4rounds (\i + 0), MSGTMP0, MSGTMP1, MSGTMP2, MSGTMP3 - do_4rounds (\i + 4), MSGTMP1, MSGTMP2, MSGTMP3, MSGTMP0 - do_4rounds (\i + 8), MSGTMP2, MSGTMP3, MSGTMP0, MSGTMP1 - do_4rounds (\i + 12), MSGTMP3, MSGTMP0, MSGTMP1, MSGTMP2 + do_4rounds (\i + 0), MSG0, MSG1, MSG2, MSG3 + do_4rounds (\i + 4), MSG1, MSG2, MSG3, MSG0 + do_4rounds (\i + 8), MSG2, MSG3, MSG0, MSG1 + do_4rounds (\i + 12), MSG3, MSG0, MSG1, MSG2 .endr /* Add current hash values with previously saved */ paddd ABEF_SAVE, STATE0 paddd CDGH_SAVE, STATE1 @@ -162,13 +162,13 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) jne .Lloop0 /* Write hash values back in the correct order */ pshufd $0x1B, STATE0, STATE0 /* FEBA */ pshufd $0xB1, STATE1, STATE1 /* DCHG */ - movdqa STATE0, MSGTMP4 + movdqa STATE0, TMP pblendw $0xF0, STATE1, STATE0 /* DCBA */ - palignr $8, MSGTMP4, STATE1 /* HGFE */ + palignr $8, TMP, STATE1 /* HGFE */ movdqu STATE0, 0*16(DIGEST_PTR) movdqu STATE1, 1*16(DIGEST_PTR) .Ldone_hash: From patchwork Thu Apr 11 16:23:58 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 13626381 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 877EA3B182; Thu, 11 Apr 2024 16:25:45 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712852745; cv=none; b=MdhJOt9lEpGvkUFyST70NilJ/jVYBZP03mbbHNNTbbRQg1Gwx+39oWPDQt3NqI8a37HeFX45MEUQQ7L52/FxQnsbDRiEctqvriiWe0Im1T3aR8QWwtv+5mIm+tQhbe6gvTTt4RWgiLCKnKt6Qv34c0AIl6tcATDvY/RhBS5vwrk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712852745; c=relaxed/simple; bh=ZzfV+EqcBie72foxnU6KRYOCGyM6B01njeHXtkgYybc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=O1jJdqswbUxAH6xtwSCbvozQpE0meRGyNyxgKxXa12Vw1dOPU/vH7BWefhGaKh5rHfL/mYuu19PCbh7eqll3suze4s71jJL4yJUA0sDgb3YsaCWShPZX+5HGmMV5xXyG3rIsLY5kRs5SjZUEp6YlS5eQH7Jj2WWs7DlBLrHJuk4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=HFV62PX2; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="HFV62PX2" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 504D8C2BBFC; Thu, 11 Apr 2024 16:25:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1712852745; bh=ZzfV+EqcBie72foxnU6KRYOCGyM6B01njeHXtkgYybc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=HFV62PX29RmMjKa3NzIcILdjgk/KHlbOztnGagt1jHkHCycv5Biyfrvn6ulX3wI6E 9vL6ODjD/PbETVJJ/uANKykXhAjZEZiOeIHNC+6nJSzDTOta1bkXu3yh8Tg7PCdP78 7dpysnZcWYiBe3e+coGfIGSNqB0UGOeizvHukmbsFAa0f82Bnrvumrmp76SMs/HNEZ rMUkMWK/p81YRVPcQ5hZzSlYXoHDtv1tRHlgSTjGCSd8eADAhxnejwIaQcHlujhqrG tXVyz3iNhHeiW3IfdFlizrbj5gt9fFCsisAsuS2Wzy8cKYVKRr94FH0B+cjFBnQqx2 2coZWUXyab2IA== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org, Stefan Kanthak Subject: [PATCH v2 3/4] crypto: x86/sha256-ni - optimize code size Date: Thu, 11 Apr 2024 09:23:58 -0700 Message-ID: <20240411162359.39073-4-ebiggers@kernel.org> X-Mailer: git-send-email 2.44.0 In-Reply-To: <20240411162359.39073-1-ebiggers@kernel.org> References: <20240411162359.39073-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers - Load the SHA-256 round constants relative to a pointer that points into the middle of the constants rather than to the beginning. Since x86 instructions use signed offsets, this decreases the instruction length required to access some of the later round constants. - Use punpcklqdq or punpckhqdq instead of longer instructions such as pshufd, pblendw, and palignr. This doesn't harm performance. The end result is that sha256_ni_transform shrinks from 839 bytes to 791 bytes, with no loss in performance. Suggested-by: Stefan Kanthak Signed-off-by: Eric Biggers --- arch/x86/crypto/sha256_ni_asm.S | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index b7e7001dafdf..ffc9f1c75c15 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -82,19 +82,19 @@ pshufb SHUF_MASK, MSG movdqa MSG, \m0 .else movdqa \m0, MSG .endif - paddd \i*4(SHA256CONSTANTS), MSG + paddd (\i-32)*4(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 .if \i >= 12 && \i < 60 movdqa \m0, TMP palignr $4, \m3, TMP paddd TMP, \m1 sha256msg2 \m0, \m1 .endif - pshufd $0x0E, MSG, MSG + punpckhqdq MSG, MSG sha256rnds2 STATE1, STATE0 .if \i >= 4 && \i < 52 sha256msg1 \m0, \m3 .endif .endm @@ -126,21 +126,21 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) /* * load initial hash values * Need to reorder these appropriately * DCBA, HGFE -> ABEF, CDGH */ - movdqu 0*16(DIGEST_PTR), STATE0 - movdqu 1*16(DIGEST_PTR), STATE1 + movdqu 0*16(DIGEST_PTR), STATE0 /* DCBA */ + movdqu 1*16(DIGEST_PTR), STATE1 /* HGFE */ - pshufd $0xB1, STATE0, STATE0 /* CDAB */ - pshufd $0x1B, STATE1, STATE1 /* EFGH */ movdqa STATE0, TMP - palignr $8, STATE1, STATE0 /* ABEF */ - pblendw $0xF0, TMP, STATE1 /* CDGH */ + punpcklqdq STATE1, STATE0 /* FEBA */ + punpckhqdq TMP, STATE1 /* DCHG */ + pshufd $0x1B, STATE0, STATE0 /* ABEF */ + pshufd $0xB1, STATE1, STATE1 /* CDGH */ movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK - lea K256(%rip), SHA256CONSTANTS + lea K256+32*4(%rip), SHA256CONSTANTS .Lloop0: /* Save hash values for addition after rounds */ movdqa STATE0, ABEF_SAVE movdqa STATE1, CDGH_SAVE @@ -160,18 +160,18 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) add $64, DATA_PTR cmp NUM_BLKS, DATA_PTR jne .Lloop0 /* Write hash values back in the correct order */ - pshufd $0x1B, STATE0, STATE0 /* FEBA */ - pshufd $0xB1, STATE1, STATE1 /* DCHG */ movdqa STATE0, TMP - pblendw $0xF0, STATE1, STATE0 /* DCBA */ - palignr $8, TMP, STATE1 /* HGFE */ + punpcklqdq STATE1, STATE0 /* GHEF */ + punpckhqdq TMP, STATE1 /* ABCD */ + pshufd $0xB1, STATE0, STATE0 /* HGFE */ + pshufd $0x1B, STATE1, STATE1 /* DCBA */ - movdqu STATE0, 0*16(DIGEST_PTR) - movdqu STATE1, 1*16(DIGEST_PTR) + movdqu STATE1, 0*16(DIGEST_PTR) + movdqu STATE0, 1*16(DIGEST_PTR) .Ldone_hash: RET SYM_FUNC_END(sha256_ni_transform) From patchwork Thu Apr 11 16:23:59 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 13626382 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CBC7C78C6C; Thu, 11 Apr 2024 16:25:45 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712852745; cv=none; b=uOEUQQgTyQJ+x5UKfhOrmz2DWuG2QYCO0m3/oSX1M3Xdkln58kiFrK9rncn0senLM25V6tLheYF635BzkCaJm6CEQyyavZUMe5OeQsv4DtLWOJe5zFrlq1ZsvKsfdNUerJSnp7CwFRtt1oWdiBlssvSzL5r2Kd2Hc1clMAmz3ys= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712852745; c=relaxed/simple; bh=sclbn/VSrfoogdQKY3CA8N8Boiy93My3gD2GlPibz+k=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=p6dE8i0BJG5fhCGODyzmm3YJN6120zsFGoJu2wvECMHkSsJDTCpN9Ms5bat1QvKW338y+UuMVv9TZDrIWVNZscCmVNkar43vfIl6Ek1RMYEjr+sc9v9vtyk8Dn+rZTPypG4SBMlP2uan60HiFUHNZXCOYdnT1Ow7wiW082LQrps= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Sox4PT1d; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Sox4PT1d" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 91CB3C4AF07; Thu, 11 Apr 2024 16:25:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1712852745; bh=sclbn/VSrfoogdQKY3CA8N8Boiy93My3gD2GlPibz+k=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Sox4PT1dhDleQ2En1sA6z1o840zMV02X0SYTZ2GW471vgvaBH4aoGoh9W1SsDd+m3 kgjawz0HP3Ajk1Zaw5IO36N9h0rmvzSV16V44eqI/cMFpX1m+OPeKGxVWAxsrX/rJP 8vkjzu0hpUUt/11aWTa237pckWIRAfUTo2znanZifINMpqp1EmpHwRm1nvHTeq1/id yMpY6L0yy9lkbRlHISVyBpyy2YLMzIHV3n1/YrJm9YzKv7Pna4wP38GcY1heoOICy8 pMa9GkPqn2VVdCznsHrBh16xrv+YSaN6WyewPiJYcvZQ25g9ZhzGJByxWovLy5UhSN nCs5rPSaIA6bw== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org, Stefan Kanthak Subject: [PATCH v2 4/4] crypto: x86/sha256-ni - simplify do_4rounds Date: Thu, 11 Apr 2024 09:23:59 -0700 Message-ID: <20240411162359.39073-5-ebiggers@kernel.org> X-Mailer: git-send-email 2.44.0 In-Reply-To: <20240411162359.39073-1-ebiggers@kernel.org> References: <20240411162359.39073-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers Instead of loading the message words into both MSG and \m0 and then adding the round constants to MSG, load the message words into \m0 and the round constants into MSG and then add \m0 to MSG. This shortens the source code slightly. It changes the instructions slightly, but it doesn't affect binary code size and doesn't seem to affect performance. Suggested-by: Stefan Kanthak Signed-off-by: Eric Biggers --- arch/x86/crypto/sha256_ni_asm.S | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index ffc9f1c75c15..d515a55a3bc1 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -76,17 +76,15 @@ #define ABEF_SAVE %xmm9 #define CDGH_SAVE %xmm10 .macro do_4rounds i, m0, m1, m2, m3 .if \i < 16 - movdqu \i*4(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, \m0 -.else - movdqa \m0, MSG + movdqu \i*4(DATA_PTR), \m0 + pshufb SHUF_MASK, \m0 .endif - paddd (\i-32)*4(SHA256CONSTANTS), MSG + movdqa (\i-32)*4(SHA256CONSTANTS), MSG + paddd \m0, MSG sha256rnds2 STATE0, STATE1 .if \i >= 12 && \i < 60 movdqa \m0, TMP palignr $4, \m3, TMP paddd TMP, \m1