From patchwork Sun Feb 16 03:07:12 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Herbert Xu X-Patchwork-Id: 13976317 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from abb.hmeau.com (abb.hmeau.com [144.6.53.87]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D2B1817E4 for ; Sun, 16 Feb 2025 03:07:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=144.6.53.87 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675239; cv=none; b=NcjAQVrMztQRLbh/fF9b8LNeh1SwTrLc5Se9mCXuCdgVaeohmPPzFoaZ6uZzeThw7/bm1wOmjKcbxluC5pieZt1sS+i1FyQd8cbAWiMCbcoQ7TyghfpdjxokbtamRyBpBWEpNkATom3Ads0PuqmR+Xybgtd6ZlYU5QqUPI1iZ/k= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675239; c=relaxed/simple; bh=a5IvobQdDd67NuvafwjGwrdTks1mGVCWAx+yKT8yafs=; h=Date:Message-Id:In-Reply-To:References:From:Subject:To:Cc; b=qXh1TLru9GDplbEisCD2Mg3Y89Ry5hpxGObOdDt34mbFr0WwROYx1nkDRIKUbJhmsPfpV/OPYm04KeS7pBe4JAevs9RGvstzKDAYdmujVj5V7fmlqniXSnBtfcDRp5Wygm6APZDKStTxlEh26a5WEuU7Lg2cqoZF1fJqHEuHMhE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au; spf=pass smtp.mailfrom=gondor.apana.org.au; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b=g1Zf6bF8; arc=none smtp.client-ip=144.6.53.87 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b="g1Zf6bF8" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=hmeau.com; s=formenos; h=Cc:To:Subject:From:References:In-Reply-To:Message-Id:Date: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help:List-Unsubscribe: List-Subscribe:List-Post:List-Owner:List-Archive; bh=cyd6Nq1HT0PcrsGafZnNvIlqd9vNMhgLdADFFGPRSjQ=; b=g1Zf6bF8axcT6Irs2BHlguoiDO sngdFNejlQZEHhR5LDQyBb3uLfBZZj1PYPJMnTlnnUombH2X55swlx9zm67FxbsUZknnBXB8zVKK8 ZIEgDeyGqi7ZJayF+YVts1H0xU1Kyvu+a05U99bsygwexT8cdPuzm3Wl1YsFUu4ArRWSgzEdE6XmL h+VV+wcjFerYBAQ7JSnyuBXulCr16WAsc9JXLC6rnV8ZPv3s1D1HbSAMgR42k7dTQ/4kbpA8jX6Ee Dl6UVu65nSgvHXnCAVm0tvG7G5PQ/+LWhKsGPaXcC8qTE2b1dFhk8DN8Qyi1sa4+n0ctcgXwkdyyl uKc8DKqw==; Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.96 #2 (Debian)) id 1tjUn2-000gXV-0W; Sun, 16 Feb 2025 11:07:13 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Sun, 16 Feb 2025 11:07:12 +0800 Date: Sun, 16 Feb 2025 11:07:12 +0800 Message-Id: <2fc12150edcde501896347c565735acc3fc24eb3.1739674648.git.herbert@gondor.apana.org.au> In-Reply-To: References: From: Herbert Xu Subject: [v2 PATCH 01/11] crypto: ahash - Only save callback and data in ahash_save_req To: Linux Crypto Mailing List Cc: Eric Biggers , Ard Biesheuvel , Megha Dey , Tim Chen Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: As unaligned operations are supported by the underlying algorithm, ahash_save_req and ahash_restore_req can be greatly simplified to only preserve the callback and data. Signed-off-by: Herbert Xu --- crypto/ahash.c | 97 ++++++++++++++++--------------------------- include/crypto/hash.h | 3 -- 2 files changed, 35 insertions(+), 65 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index bcd9de009a91..c8e7327c6949 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -27,6 +27,12 @@ #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e +struct ahash_save_req_state { + struct ahash_request *req; + crypto_completion_t compl; + void *data; +}; + /* * For an ahash tfm that is using an shash algorithm (instead of an ahash * algorithm), this returns the underlying shash tfm. @@ -262,67 +268,34 @@ int crypto_ahash_init(struct ahash_request *req) } EXPORT_SYMBOL_GPL(crypto_ahash_init); -static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt, - bool has_state) +static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - unsigned int ds = crypto_ahash_digestsize(tfm); - struct ahash_request *subreq; - unsigned int subreq_size; - unsigned int reqsize; - u8 *result; + struct ahash_save_req_state *state; gfp_t gfp; u32 flags; - subreq_size = sizeof(*subreq); - reqsize = crypto_ahash_reqsize(tfm); - reqsize = ALIGN(reqsize, crypto_tfm_ctx_alignment()); - subreq_size += reqsize; - subreq_size += ds; - flags = ahash_request_flags(req); gfp = (flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; - subreq = kmalloc(subreq_size, gfp); - if (!subreq) + state = kmalloc(sizeof(*state), gfp); + if (!state) return -ENOMEM; - ahash_request_set_tfm(subreq, tfm); - ahash_request_set_callback(subreq, flags, cplt, req); - - result = (u8 *)(subreq + 1) + reqsize; - - ahash_request_set_crypt(subreq, req->src, result, req->nbytes); - - if (has_state) { - void *state; - - state = kmalloc(crypto_ahash_statesize(tfm), gfp); - if (!state) { - kfree(subreq); - return -ENOMEM; - } - - crypto_ahash_export(req, state); - crypto_ahash_import(subreq, state); - kfree_sensitive(state); - } - - req->priv = subreq; + state->compl = req->base.complete; + state->data = req->base.data; + req->base.complete = cplt; + req->base.data = state; + state->req = req; return 0; } -static void ahash_restore_req(struct ahash_request *req, int err) +static void ahash_restore_req(struct ahash_request *req) { - struct ahash_request *subreq = req->priv; + struct ahash_save_req_state *state = req->base.data; - if (!err) - memcpy(req->result, subreq->result, - crypto_ahash_digestsize(crypto_ahash_reqtfm(req))); - - req->priv = NULL; - - kfree_sensitive(subreq); + req->base.complete = state->compl; + req->base.data = state->data; + kfree(state); } int crypto_ahash_update(struct ahash_request *req) @@ -374,51 +347,51 @@ EXPORT_SYMBOL_GPL(crypto_ahash_digest); static void ahash_def_finup_done2(void *data, int err) { - struct ahash_request *areq = data; + struct ahash_save_req_state *state = data; + struct ahash_request *areq = state->req; if (err == -EINPROGRESS) return; - ahash_restore_req(areq, err); - + ahash_restore_req(areq); ahash_request_complete(areq, err); } static int ahash_def_finup_finish1(struct ahash_request *req, int err) { - struct ahash_request *subreq = req->priv; - if (err) goto out; - subreq->base.complete = ahash_def_finup_done2; + req->base.complete = ahash_def_finup_done2; - err = crypto_ahash_alg(crypto_ahash_reqtfm(req))->final(subreq); + err = crypto_ahash_alg(crypto_ahash_reqtfm(req))->final(req); if (err == -EINPROGRESS || err == -EBUSY) return err; out: - ahash_restore_req(req, err); + ahash_restore_req(req); return err; } static void ahash_def_finup_done1(void *data, int err) { - struct ahash_request *areq = data; - struct ahash_request *subreq; + struct ahash_save_req_state *state0 = data; + struct ahash_save_req_state state; + struct ahash_request *areq; + state = *state0; + areq = state.req; if (err == -EINPROGRESS) goto out; - subreq = areq->priv; - subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; + areq->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; err = ahash_def_finup_finish1(areq, err); if (err == -EINPROGRESS || err == -EBUSY) return; out: - ahash_request_complete(areq, err); + state.compl(state.data, err); } static int ahash_def_finup(struct ahash_request *req) @@ -426,11 +399,11 @@ static int ahash_def_finup(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); int err; - err = ahash_save_req(req, ahash_def_finup_done1, true); + err = ahash_save_req(req, ahash_def_finup_done1); if (err) return err; - err = crypto_ahash_alg(tfm)->update(req->priv); + err = crypto_ahash_alg(tfm)->update(req); if (err == -EINPROGRESS || err == -EBUSY) return err; diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 2d5ea9f9ff43..9c1f8ca59a77 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -55,9 +55,6 @@ struct ahash_request { struct scatterlist *src; u8 *result; - /* This field may only be used by the ahash API code. */ - void *priv; - void *__ctx[] CRYPTO_MINALIGN_ATTR; }; From patchwork Sun Feb 16 03:07:15 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Herbert Xu X-Patchwork-Id: 13976318 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from abb.hmeau.com (abb.hmeau.com [144.6.53.87]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2EADC7E1 for ; Sun, 16 Feb 2025 03:07:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=144.6.53.87 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675241; cv=none; b=UYcbx0PFWAmjDOKQq630qbto+J6B4r/im0JrVz1nbVcyUlHsluKNYfmzgbIEZGL4kLStsX+jV0+DBd4hZRVSNmdYwOaQ8X7If0hum25GuaJjFCBTRQo0ld0C76aOs/vm2Hufo73RHd4ZZRrNWLk9cBB8C0ON4WhM9bT1neDlZf0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675241; c=relaxed/simple; bh=sQQYWA6pMLOLdbNHlQ2+coyzbhGk4oJjTtOCtgn0uNE=; h=Date:Message-Id:In-Reply-To:References:From:Subject:To:Cc; b=dLk9XMEQzdRIah0mb1UV+XjuoWohUoN/GZoOgjId9MGEP72d4zoA55pDoYnS6F1HVXgfyjryflXI/beUfcLqXOU/n81zV/4f2B/jsNjq/ky+obWz6l7nDCQwXZNcErVObU0adTNqWb9Zx1TFolLzbdkniOOBjisB/5YhQ7n1KLw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au; spf=pass smtp.mailfrom=gondor.apana.org.au; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b=S1lEQjBJ; arc=none smtp.client-ip=144.6.53.87 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b="S1lEQjBJ" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=hmeau.com; s=formenos; h=Cc:To:Subject:From:References:In-Reply-To:Message-Id:Date: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help:List-Unsubscribe: List-Subscribe:List-Post:List-Owner:List-Archive; bh=dVrRxU7foExstX9hgCMbnUQLE4gwkVI3GUAFfq9oi5M=; b=S1lEQjBJiuIht80/JAx8UliTTh E3p5UZbok556snrecR30y01eZE1SB8knDAgIQgNBN/Gen1bamGgIrNriaS23hsTEK3cjvMDJAO2Nq u2UPRB60nuClto+rJFxJCsUqfFmgdBqLvxr5bNZ5TEoqVqRdqgVVwxsdmmWxpTFf8kwbW/R2W1bKA oU09QI0hVwxgRHc/Vwha+rYFdZjNV1UKFNihXVH0XTo6sL8eI4tMCwTMPiWCGapRTmJO69X3qSiJA s453B4zvOEP1kKziP2vAN5eEB4jolmgEqxchK6XYVTRLD8zJf8GNPqpz3WBoJbmfpS7jLgeAii1z7 bruxobvQ==; Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.96 #2 (Debian)) id 1tjUn4-000gXh-1X; Sun, 16 Feb 2025 11:07:16 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Sun, 16 Feb 2025 11:07:15 +0800 Date: Sun, 16 Feb 2025 11:07:15 +0800 Message-Id: In-Reply-To: References: From: Herbert Xu Subject: [v2 PATCH 02/11] crypto: x86/ghash - Use proper helpers to clone request To: Linux Crypto Mailing List Cc: Eric Biggers , Ard Biesheuvel , Megha Dey , Tim Chen Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Rather than copying a request by hand with memcpy, use the correct API helpers to setup the new request. This will matter once the API helpers start setting up chained requests as a simple memcpy will break chaining. Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_glue.c | 23 ++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 41bc02e48916..c759ec808bf1 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -189,6 +189,20 @@ static int ghash_async_init(struct ahash_request *req) return crypto_shash_init(desc); } +static void ghash_init_cryptd_req(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + ahash_request_set_callback(cryptd_req, req->base.flags, + req->base.complete, req->base.data); + ahash_request_set_crypt(cryptd_req, req->src, req->result, + req->nbytes); +} + static int ghash_async_update(struct ahash_request *req) { struct ahash_request *cryptd_req = ahash_request_ctx(req); @@ -198,8 +212,7 @@ static int ghash_async_update(struct ahash_request *req) if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { - memcpy(cryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + ghash_init_cryptd_req(req); return crypto_ahash_update(cryptd_req); } else { struct shash_desc *desc = cryptd_shash_desc(cryptd_req); @@ -216,8 +229,7 @@ static int ghash_async_final(struct ahash_request *req) if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { - memcpy(cryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + ghash_init_cryptd_req(req); return crypto_ahash_final(cryptd_req); } else { struct shash_desc *desc = cryptd_shash_desc(cryptd_req); @@ -257,8 +269,7 @@ static int ghash_async_digest(struct ahash_request *req) if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { - memcpy(cryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + ghash_init_cryptd_req(req); return crypto_ahash_digest(cryptd_req); } else { struct shash_desc *desc = cryptd_shash_desc(cryptd_req); From patchwork Sun Feb 16 03:07:17 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Herbert Xu X-Patchwork-Id: 13976319 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from abb.hmeau.com (abb.hmeau.com [144.6.53.87]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 47922199B8 for ; Sun, 16 Feb 2025 03:07:21 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=144.6.53.87 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675244; cv=none; b=DIUGOgSvWAMeYDZT6e3LR4z6d2E/zpXldRKfLB8Ei+jifjG5AkQRTe2pmX0+8osCRCCQN6CPX/SPJlFozxDNnGlx4xZf+q8LeVKgI1b32VEFo4gKvbk2+T72JXq0FNTeHQwl90Ks6Nw5aDG/Y/PhoxSh7QVc4nmgvuZ9P8xt9Jo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675244; c=relaxed/simple; bh=ngVqmcVjyuRr0fzi+JKTUsW81NW3PTctVC2QF4dOB4Y=; h=Date:Message-Id:In-Reply-To:References:From:Subject:To:Cc; b=ILkHLsHTVnKuQG77Ih0uWTzdAfvLgPPzSbXf2lEJMq44FyuWGCSz0xD0/uccNM1GW6yd5q3zXpnph7yKfYckM0KJjeMBgmwxBVOkOnoS7zVe5Qw5SO2iY/IPU0R/R6ZUh9eXdPhFEtwie2suyfUhpKnV5+pC6MgsTCVpDKunLZ0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au; spf=pass smtp.mailfrom=gondor.apana.org.au; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b=dFFBYkHT; arc=none smtp.client-ip=144.6.53.87 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b="dFFBYkHT" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=hmeau.com; s=formenos; h=Cc:To:Subject:From:References:In-Reply-To:Message-Id:Date: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help:List-Unsubscribe: List-Subscribe:List-Post:List-Owner:List-Archive; bh=jrSfLFfLt6LV+gM98uhBTRMPVhMJjEWqhHmSmGKFnUc=; b=dFFBYkHTnV6sljQuGQ/hPRm03g R+m3bDnt1897JWyDc9r/uZw2eJowR4E2A9E+PAFD35zTVNyYl6DZfP5k5Xh7vAStXPmwBZ46aEkvq YoLnqLr6Dv8sdCd6ujHmuSyEtmyp9BKBY7Agys13IsJglmRJ4vbItAbwrMG0KQTgshEHj2fgj/T6L zFn8o1OBmzcsOVB8W0k1AKphaJsMsksRcwL4Ty1ZyoVVU3TLeZ71+0LHINBczZHZagxWss8r99rjr gxvTJ7Iq6LWGR0/98MBLAZGWgmC0bNKSytz032Qmla9ZkHweGxTYFBfO3tsBclUTcJ6fhPyGeK5zv iIqeyGJQ==; Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.96 #2 (Debian)) id 1tjUn6-000gXt-2U; Sun, 16 Feb 2025 11:07:18 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Sun, 16 Feb 2025 11:07:17 +0800 Date: Sun, 16 Feb 2025 11:07:17 +0800 Message-Id: <2620cdada3777a66d3600cd1887cd34245d1e26a.1739674648.git.herbert@gondor.apana.org.au> In-Reply-To: References: From: Herbert Xu Subject: [v2 PATCH 03/11] crypto: hash - Add request chaining API To: Linux Crypto Mailing List Cc: Eric Biggers , Ard Biesheuvel , Megha Dey , Tim Chen Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: This adds request chaining to the ahash interface. Request chaining allows multiple requests to be submitted in one shot. An algorithm can elect to receive chained requests by setting the flag CRYPTO_ALG_REQ_CHAIN. If this bit is not set, the API will break up chained requests and submit them one-by-one. A new err field is added to struct crypto_async_request to record the return value for each individual request. Signed-off-by: Herbert Xu --- crypto/ahash.c | 261 +++++++++++++++++++++++++++++---- crypto/algapi.c | 2 +- include/crypto/algapi.h | 11 ++ include/crypto/hash.h | 28 ++-- include/crypto/internal/hash.h | 10 ++ include/linux/crypto.h | 24 +++ 6 files changed, 299 insertions(+), 37 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index c8e7327c6949..0546835f7304 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -28,11 +28,19 @@ #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e struct ahash_save_req_state { - struct ahash_request *req; + struct list_head head; + struct ahash_request *req0; + struct ahash_request *cur; + int (*op)(struct ahash_request *req); crypto_completion_t compl; void *data; }; +static void ahash_reqchain_done(void *data, int err); +static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt); +static void ahash_restore_req(struct ahash_request *req); +static int ahash_def_finup(struct ahash_request *req); + /* * For an ahash tfm that is using an shash algorithm (instead of an ahash * algorithm), this returns the underlying shash tfm. @@ -256,24 +264,145 @@ int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, } EXPORT_SYMBOL_GPL(crypto_ahash_setkey); +static int ahash_reqchain_finish(struct ahash_save_req_state *state, + int err, u32 mask) +{ + struct ahash_request *req0 = state->req0; + struct ahash_request *req = state->cur; + struct ahash_request *n; + + req->base.err = err; + + if (req != req0) + list_add_tail(&req->base.list, &req0->base.list); + + list_for_each_entry_safe(req, n, &state->head, base.list) { + list_del_init(&req->base.list); + + req->base.flags &= mask; + req->base.complete = ahash_reqchain_done; + req->base.data = state; + state->cur = req; + err = state->op(req); + + if (err == -EINPROGRESS) { + if (!list_empty(&state->head)) + err = -EBUSY; + goto out; + } + + if (err == -EBUSY) + goto out; + + req->base.err = err; + list_add_tail(&req->base.list, &req0->base.list); + } + + ahash_restore_req(req0); + +out: + return err; +} + +static void ahash_reqchain_done(void *data, int err) +{ + struct ahash_save_req_state *state = data; + crypto_completion_t compl = state->compl; + + data = state->data; + + if (err == -EINPROGRESS) { + if (!list_empty(&state->head)) + return; + goto notify; + } + + err = ahash_reqchain_finish(state, err, CRYPTO_TFM_REQ_MAY_BACKLOG); + if (err == -EBUSY) + return; + +notify: + compl(data, err); +} + +static int ahash_do_req_chain(struct ahash_request *req, + int (*op)(struct ahash_request *req)) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ahash_save_req_state *state; + struct ahash_save_req_state state0; + int err; + + if (!ahash_request_chained(req) || crypto_ahash_req_chain(tfm)) + return op(req); + + state = &state0; + + if (ahash_is_async(tfm)) { + err = ahash_save_req(req, ahash_reqchain_done); + if (err) { + struct ahash_request *r2; + + req->base.err = err; + list_for_each_entry(r2, &req->base.list, base.list) + r2->base.err = err; + + return err; + } + + state = req->base.data; + } + + state->op = op; + state->cur = req; + INIT_LIST_HEAD(&state->head); + list_splice_init(&req->base.list, &state->head); + + err = op(req); + if (err == -EBUSY || err == -EINPROGRESS) + return -EBUSY; + + return ahash_reqchain_finish(state, err, ~0); +} + int crypto_ahash_init(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - if (likely(tfm->using_shash)) - return crypto_shash_init(prepare_shash_desc(req, tfm)); + if (likely(tfm->using_shash)) { + struct ahash_request *r2; + int err; + + err = crypto_shash_init(prepare_shash_desc(req, tfm)); + req->base.err = err; + + list_for_each_entry(r2, &req->base.list, base.list) { + struct shash_desc *desc; + + desc = prepare_shash_desc(r2, tfm); + r2->base.err = crypto_shash_init(desc); + } + + return err; + } + if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; - return crypto_ahash_alg(tfm)->init(req); + + return ahash_do_req_chain(req, crypto_ahash_alg(tfm)->init); } EXPORT_SYMBOL_GPL(crypto_ahash_init); static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct ahash_save_req_state *state; gfp_t gfp; u32 flags; + if (!ahash_is_async(tfm)) + return 0; + flags = ahash_request_flags(req); gfp = (flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; state = kmalloc(sizeof(*state), gfp); @@ -284,14 +413,20 @@ static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt) state->data = req->base.data; req->base.complete = cplt; req->base.data = state; - state->req = req; + state->req0 = req; return 0; } static void ahash_restore_req(struct ahash_request *req) { - struct ahash_save_req_state *state = req->base.data; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ahash_save_req_state *state; + + if (!ahash_is_async(tfm)) + return; + + state = req->base.data; req->base.complete = state->compl; req->base.data = state->data; @@ -302,10 +437,24 @@ int crypto_ahash_update(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - if (likely(tfm->using_shash)) - return shash_ahash_update(req, ahash_request_ctx(req)); + if (likely(tfm->using_shash)) { + struct ahash_request *r2; + int err; - return crypto_ahash_alg(tfm)->update(req); + err = shash_ahash_update(req, ahash_request_ctx(req)); + req->base.err = err; + + list_for_each_entry(r2, &req->base.list, base.list) { + struct shash_desc *desc; + + desc = ahash_request_ctx(r2); + r2->base.err = shash_ahash_update(r2, desc); + } + + return err; + } + + return ahash_do_req_chain(req, crypto_ahash_alg(tfm)->update); } EXPORT_SYMBOL_GPL(crypto_ahash_update); @@ -313,10 +462,24 @@ int crypto_ahash_final(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - if (likely(tfm->using_shash)) - return crypto_shash_final(ahash_request_ctx(req), req->result); + if (likely(tfm->using_shash)) { + struct ahash_request *r2; + int err; - return crypto_ahash_alg(tfm)->final(req); + err = crypto_shash_final(ahash_request_ctx(req), req->result); + req->base.err = err; + + list_for_each_entry(r2, &req->base.list, base.list) { + struct shash_desc *desc; + + desc = ahash_request_ctx(r2); + r2->base.err = crypto_shash_final(desc, r2->result); + } + + return err; + } + + return ahash_do_req_chain(req, crypto_ahash_alg(tfm)->final); } EXPORT_SYMBOL_GPL(crypto_ahash_final); @@ -324,10 +487,27 @@ int crypto_ahash_finup(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - if (likely(tfm->using_shash)) - return shash_ahash_finup(req, ahash_request_ctx(req)); + if (likely(tfm->using_shash)) { + struct ahash_request *r2; + int err; - return crypto_ahash_alg(tfm)->finup(req); + err = shash_ahash_finup(req, ahash_request_ctx(req)); + req->base.err = err; + + list_for_each_entry(r2, &req->base.list, base.list) { + struct shash_desc *desc; + + desc = ahash_request_ctx(r2); + r2->base.err = shash_ahash_finup(r2, desc); + } + + return err; + } + + if (!crypto_ahash_alg(tfm)->finup) + return ahash_def_finup(req); + + return ahash_do_req_chain(req, crypto_ahash_alg(tfm)->finup); } EXPORT_SYMBOL_GPL(crypto_ahash_finup); @@ -335,20 +515,34 @@ int crypto_ahash_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - if (likely(tfm->using_shash)) - return shash_ahash_digest(req, prepare_shash_desc(req, tfm)); + if (likely(tfm->using_shash)) { + struct ahash_request *r2; + int err; + + err = shash_ahash_digest(req, prepare_shash_desc(req, tfm)); + req->base.err = err; + + list_for_each_entry(r2, &req->base.list, base.list) { + struct shash_desc *desc; + + desc = prepare_shash_desc(r2, tfm); + r2->base.err = shash_ahash_digest(r2, desc); + } + + return err; + } if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; - return crypto_ahash_alg(tfm)->digest(req); + return ahash_do_req_chain(req, crypto_ahash_alg(tfm)->digest); } EXPORT_SYMBOL_GPL(crypto_ahash_digest); static void ahash_def_finup_done2(void *data, int err) { struct ahash_save_req_state *state = data; - struct ahash_request *areq = state->req; + struct ahash_request *areq = state->req0; if (err == -EINPROGRESS) return; @@ -359,12 +553,15 @@ static void ahash_def_finup_done2(void *data, int err) static int ahash_def_finup_finish1(struct ahash_request *req, int err) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + if (err) goto out; - req->base.complete = ahash_def_finup_done2; + if (ahash_is_async(tfm)) + req->base.complete = ahash_def_finup_done2; - err = crypto_ahash_alg(crypto_ahash_reqtfm(req))->final(req); + err = crypto_ahash_final(req); if (err == -EINPROGRESS || err == -EBUSY) return err; @@ -380,7 +577,7 @@ static void ahash_def_finup_done1(void *data, int err) struct ahash_request *areq; state = *state0; - areq = state.req; + areq = state.req0; if (err == -EINPROGRESS) goto out; @@ -396,14 +593,13 @@ static void ahash_def_finup_done1(void *data, int err) static int ahash_def_finup(struct ahash_request *req) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); int err; err = ahash_save_req(req, ahash_def_finup_done1); if (err) return err; - err = crypto_ahash_alg(tfm)->update(req); + err = crypto_ahash_update(req); if (err == -EINPROGRESS || err == -EBUSY) return err; @@ -618,8 +814,6 @@ static int ahash_prepare_alg(struct ahash_alg *alg) base->cra_type = &crypto_ahash_type; base->cra_flags |= CRYPTO_ALG_TYPE_AHASH; - if (!alg->finup) - alg->finup = ahash_def_finup; if (!alg->setkey) alg->setkey = ahash_nosetkey; @@ -690,5 +884,20 @@ int ahash_register_instance(struct crypto_template *tmpl, } EXPORT_SYMBOL_GPL(ahash_register_instance); +void ahash_request_free(struct ahash_request *req) +{ + struct ahash_request *tmp; + struct ahash_request *r2; + + if (unlikely(!req)) + return; + + list_for_each_entry_safe(r2, tmp, &req->base.list, base.list) + kfree_sensitive(r2); + + kfree_sensitive(req); +} +EXPORT_SYMBOL_GPL(ahash_request_free); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Asynchronous cryptographic hash type"); diff --git a/crypto/algapi.c b/crypto/algapi.c index 5318c214debb..e7a9a2ada2cf 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -955,7 +955,7 @@ struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue) queue->backlog = queue->backlog->next; request = queue->list.next; - list_del(request); + list_del_init(request); return list_entry(request, struct crypto_async_request, list); } diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 156de41ca760..11065978d360 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -271,4 +272,14 @@ static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK; } +static inline bool crypto_request_chained(struct crypto_async_request *req) +{ + return !list_empty(&req->list); +} + +static inline bool crypto_tfm_req_chain(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_flags & CRYPTO_ALG_REQ_CHAIN; +} + #endif /* _CRYPTO_ALGAPI_H */ diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 9c1f8ca59a77..0a6f744ce4a1 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -572,16 +572,7 @@ static inline struct ahash_request *ahash_request_alloc_noprof( * ahash_request_free() - zeroize and free the request data structure * @req: request data structure cipher handle to be freed */ -static inline void ahash_request_free(struct ahash_request *req) -{ - kfree_sensitive(req); -} - -static inline void ahash_request_zero(struct ahash_request *req) -{ - memzero_explicit(req, sizeof(*req) + - crypto_ahash_reqsize(crypto_ahash_reqtfm(req))); -} +void ahash_request_free(struct ahash_request *req); static inline struct ahash_request *ahash_request_cast( struct crypto_async_request *req) @@ -622,6 +613,7 @@ static inline void ahash_request_set_callback(struct ahash_request *req, req->base.complete = compl; req->base.data = data; req->base.flags = flags; + crypto_reqchain_init(&req->base); } /** @@ -646,6 +638,12 @@ static inline void ahash_request_set_crypt(struct ahash_request *req, req->result = result; } +static inline void ahash_request_chain(struct ahash_request *req, + struct ahash_request *head) +{ + crypto_request_chain(&req->base, &head->base); +} + /** * DOC: Synchronous Message Digest API * @@ -947,4 +945,14 @@ static inline void shash_desc_zero(struct shash_desc *desc) sizeof(*desc) + crypto_shash_descsize(desc->tfm)); } +static inline int ahash_request_err(struct ahash_request *req) +{ + return req->base.err; +} + +static inline bool ahash_is_async(struct crypto_ahash *tfm) +{ + return crypto_tfm_is_async(&tfm->base); +} + #endif /* _CRYPTO_HASH_H */ diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 58967593b6b4..81542a48587e 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -270,5 +270,15 @@ static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm) return container_of(tfm, struct crypto_shash, base); } +static inline bool ahash_request_chained(struct ahash_request *req) +{ + return crypto_request_chained(&req->base); +} + +static inline bool crypto_ahash_req_chain(struct crypto_ahash *tfm) +{ + return crypto_tfm_req_chain(&tfm->base); +} + #endif /* _CRYPTO_INTERNAL_HASH_H */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b164da5e129e..1d2a6c515d58 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -13,6 +13,8 @@ #define _LINUX_CRYPTO_H #include +#include +#include #include #include #include @@ -124,6 +126,9 @@ */ #define CRYPTO_ALG_FIPS_INTERNAL 0x00020000 +/* Set if the algorithm supports request chains. */ +#define CRYPTO_ALG_REQ_CHAIN 0x00040000 + /* * Transform masks and values (for crt_flags). */ @@ -174,6 +179,7 @@ struct crypto_async_request { struct crypto_tfm *tfm; u32 flags; + int err; }; /** @@ -540,5 +546,23 @@ int crypto_comp_decompress(struct crypto_comp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen); +static inline void crypto_reqchain_init(struct crypto_async_request *req) +{ + req->err = -EINPROGRESS; + INIT_LIST_HEAD(&req->list); +} + +static inline void crypto_request_chain(struct crypto_async_request *req, + struct crypto_async_request *head) +{ + req->err = -EINPROGRESS; + list_add_tail(&req->list, &head->list); +} + +static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; +} + #endif /* _LINUX_CRYPTO_H */ From patchwork Sun Feb 16 03:07:19 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Herbert Xu X-Patchwork-Id: 13976320 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from abb.hmeau.com (abb.hmeau.com [144.6.53.87]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4C55BC8E0 for ; Sun, 16 Feb 2025 03:07:23 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=144.6.53.87 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675245; cv=none; b=jrIT7WhSgFdkX2xJ9iuq/Kt9PNBmj6Eb/Ju/hdQ2TLZtaMFKm4YnQu6C6fGc9//+bmDdmdbmoE2G9x7AtG6dZFj2hdb9kchYyBdjsTPcH/C4DJqq9xVHvyudjQtNFGKZpVTMd6vt9Ton3cje4RsghcGLKTFHiPGL46gJbnTaZ/o= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675245; c=relaxed/simple; bh=6uYRfnfABTZI+87Pbsa7eyHiBir6dg+GdVVyelYnnls=; h=Date:Message-Id:In-Reply-To:References:From:Subject:To:Cc; b=tb81CuWd0kVE8eF0gJ80XAUy1YwI1r4sPap6WlnRWT0DEGsatuG6kqO2fJQvcM8KEo2blXSLsha2exZ/NScZQImPoYfKcoTEtONls9Edirf3YS70j4+qLG7e79x6BgBS4W8IX7sZR2j1jphSh5cjMY8iE+z7I0ng8rgZTw0FEkI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au; spf=pass smtp.mailfrom=gondor.apana.org.au; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b=HVRK4mDo; arc=none smtp.client-ip=144.6.53.87 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b="HVRK4mDo" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=hmeau.com; s=formenos; h=Cc:To:Subject:From:References:In-Reply-To:Message-Id:Date: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help:List-Unsubscribe: List-Subscribe:List-Post:List-Owner:List-Archive; bh=Hwmc8xPh4SMYBYUlF10dsEfk/3KiAvetRcQZKKJ4QxI=; b=HVRK4mDo0TU/66pYnGOpxlxyTL wFh7u51tDDkuFnsM8Fr4YPnN9lDRjqOVW9LtkWTlTPIfPoXc2qHN9awl7LcI1c54P87rfohEGS9t/ X6VNWlMf4IOqigv4TMZb5SVsNxfhTHiAXoGhTQN0hLQ5pgB7XBYDRYnRHLvDvj55QVIB8kWhKHacY ipwgSr1cx+BxtjyOc3CD/OZfg8XrnICIdfcpEVruBByKydKp/Ol2n2zVvU/9PsqGh5dwLQ3I6I9C8 HGAb0dhF3GdK2mIU+ZlFYuMXnR9dUQKdECO0osDII+6oUdVSgkZx1VMK76+1Nq5Uocg1BAC8M+luz wF0V8F3Q==; Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.96 #2 (Debian)) id 1tjUn9-000gY7-09; Sun, 16 Feb 2025 11:07:20 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Sun, 16 Feb 2025 11:07:19 +0800 Date: Sun, 16 Feb 2025 11:07:19 +0800 Message-Id: <7e79533fbbe4e0f56376963347b349935e6a343d.1739674648.git.herbert@gondor.apana.org.au> In-Reply-To: References: From: Herbert Xu Subject: [v2 PATCH 04/11] crypto: tcrypt - Restore multibuffer ahash tests To: Linux Crypto Mailing List Cc: Eric Biggers , Ard Biesheuvel , Megha Dey , Tim Chen Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: This patch is a revert of commit 388ac25efc8ce3bf9768ce7bf24268d6fac285d5. As multibuffer ahash is coming back in the form of request chaining, restore the multibuffer ahash tests using the new interface. Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 231 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index e1a74cb2cfbe..f618f61c5615 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -716,6 +716,207 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret) return crypto_wait_req(ret, wait); } +struct test_mb_ahash_data { + struct scatterlist sg[XBUFSIZE]; + char result[64]; + struct ahash_request *req; + struct crypto_wait wait; + char *xbuf[XBUFSIZE]; +}; + +static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb, + int *rc) +{ + int i, err; + + /* Fire up a bunch of concurrent requests */ + err = crypto_ahash_digest(data[0].req); + + /* Wait for all requests to finish */ + err = crypto_wait_req(err, &data[0].wait); + if (num_mb < 2) + return err; + + for (i = 0; i < num_mb; i++) { + rc[i] = ahash_request_err(data[i].req); + if (rc[i]) { + pr_info("concurrent request %d error %d\n", i, rc[i]); + err = rc[i]; + } + } + + return err; +} + +static int test_mb_ahash_jiffies(struct test_mb_ahash_data *data, int blen, + int secs, u32 num_mb) +{ + unsigned long start, end; + int bcount; + int ret = 0; + int *rc; + + rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL); + if (!rc) + return -ENOMEM; + + for (start = jiffies, end = start + secs * HZ, bcount = 0; + time_before(jiffies, end); bcount++) { + ret = do_mult_ahash_op(data, num_mb, rc); + if (ret) + goto out; + } + + pr_cont("%d operations in %d seconds (%llu bytes)\n", + bcount * num_mb, secs, (u64)bcount * blen * num_mb); + +out: + kfree(rc); + return ret; +} + +static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen, + u32 num_mb) +{ + unsigned long cycles = 0; + int ret = 0; + int i; + int *rc; + + rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL); + if (!rc) + return -ENOMEM; + + /* Warm-up run. */ + for (i = 0; i < 4; i++) { + ret = do_mult_ahash_op(data, num_mb, rc); + if (ret) + goto out; + } + + /* The real thing. */ + for (i = 0; i < 8; i++) { + cycles_t start, end; + + start = get_cycles(); + ret = do_mult_ahash_op(data, num_mb, rc); + end = get_cycles(); + + if (ret) + goto out; + + cycles += end - start; + } + + pr_cont("1 operation in %lu cycles (%d bytes)\n", + (cycles + 4) / (8 * num_mb), blen); + +out: + kfree(rc); + return ret; +} + +static void test_mb_ahash_speed(const char *algo, unsigned int secs, + struct hash_speed *speed, u32 num_mb) +{ + struct test_mb_ahash_data *data; + struct crypto_ahash *tfm; + unsigned int i, j, k; + int ret; + + data = kcalloc(num_mb, sizeof(*data), GFP_KERNEL); + if (!data) + return; + + tfm = crypto_alloc_ahash(algo, 0, 0); + if (IS_ERR(tfm)) { + pr_err("failed to load transform for %s: %ld\n", + algo, PTR_ERR(tfm)); + goto free_data; + } + + for (i = 0; i < num_mb; ++i) { + if (testmgr_alloc_buf(data[i].xbuf)) + goto out; + + crypto_init_wait(&data[i].wait); + + data[i].req = ahash_request_alloc(tfm, GFP_KERNEL); + if (!data[i].req) { + pr_err("alg: hash: Failed to allocate request for %s\n", + algo); + goto out; + } + + + if (i) { + ahash_request_set_callback(data[i].req, 0, NULL, NULL); + ahash_request_chain(data[i].req, data[0].req); + } else + ahash_request_set_callback(data[0].req, 0, + crypto_req_done, + &data[0].wait); + + sg_init_table(data[i].sg, XBUFSIZE); + for (j = 0; j < XBUFSIZE; j++) { + sg_set_buf(data[i].sg + j, data[i].xbuf[j], PAGE_SIZE); + memset(data[i].xbuf[j], 0xff, PAGE_SIZE); + } + } + + pr_info("\ntesting speed of multibuffer %s (%s)\n", algo, + get_driver_name(crypto_ahash, tfm)); + + for (i = 0; speed[i].blen != 0; i++) { + /* For some reason this only tests digests. */ + if (speed[i].blen != speed[i].plen) + continue; + + if (speed[i].blen > XBUFSIZE * PAGE_SIZE) { + pr_err("template (%u) too big for tvmem (%lu)\n", + speed[i].blen, XBUFSIZE * PAGE_SIZE); + goto out; + } + + if (klen) + crypto_ahash_setkey(tfm, tvmem[0], klen); + + for (k = 0; k < num_mb; k++) + ahash_request_set_crypt(data[k].req, data[k].sg, + data[k].result, speed[i].blen); + + pr_info("test%3u " + "(%5u byte blocks,%5u bytes per update,%4u updates): ", + i, speed[i].blen, speed[i].plen, + speed[i].blen / speed[i].plen); + + if (secs) { + ret = test_mb_ahash_jiffies(data, speed[i].blen, secs, + num_mb); + cond_resched(); + } else { + ret = test_mb_ahash_cycles(data, speed[i].blen, num_mb); + } + + + if (ret) { + pr_err("At least one hashing failed ret=%d\n", ret); + break; + } + } + +out: + ahash_request_free(data[0].req); + + for (k = 0; k < num_mb; ++k) + testmgr_free_buf(data[k].xbuf); + + crypto_free_ahash(tfm); + +free_data: + kfree(data); +} + static int test_ahash_jiffies_digest(struct ahash_request *req, int blen, char *out, int secs) { @@ -2391,6 +2592,36 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_ahash_speed("sm3", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; fallthrough; + case 450: + test_mb_ahash_speed("sha1", sec, generic_hash_speed_template, + num_mb); + if (mode > 400 && mode < 500) break; + fallthrough; + case 451: + test_mb_ahash_speed("sha256", sec, generic_hash_speed_template, + num_mb); + if (mode > 400 && mode < 500) break; + fallthrough; + case 452: + test_mb_ahash_speed("sha512", sec, generic_hash_speed_template, + num_mb); + if (mode > 400 && mode < 500) break; + fallthrough; + case 453: + test_mb_ahash_speed("sm3", sec, generic_hash_speed_template, + num_mb); + if (mode > 400 && mode < 500) break; + fallthrough; + case 454: + test_mb_ahash_speed("streebog256", sec, + generic_hash_speed_template, num_mb); + if (mode > 400 && mode < 500) break; + fallthrough; + case 455: + test_mb_ahash_speed("streebog512", sec, + generic_hash_speed_template, num_mb); + if (mode > 400 && mode < 500) break; + fallthrough; case 499: break; From patchwork Sun Feb 16 03:07:22 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Herbert Xu X-Patchwork-Id: 13976321 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from abb.hmeau.com (abb.hmeau.com [144.6.53.87]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BB5C2175AB for ; Sun, 16 Feb 2025 03:07:25 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=144.6.53.87 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675248; cv=none; b=Ploq8YfaJdhsTEWZV5A67//7uI4k8HSlzhJctlsKxnMpVoH55Pzeb53WNwbYf2D6AmabXbC+N0XvlPP5GRtLKJ8uTo2TfG96lHaN0n5+5/gtXUBZJsh4fSR4ZeMFf+aJnaTjUZiXo1vEizjcv88wJ/gVHh8cJqbxgeLI7J+/kX4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675248; c=relaxed/simple; bh=jziEbKrec9WOhZrx0dFqsFfkh1pNtoP0LbvwYTOBMvY=; h=Date:Message-Id:In-Reply-To:References:From:Subject:To:Cc; b=jxz7Xds6XmeB/tNtAtvEvwmv477MWakqgkeo1K1oxUzBZbaaXNT6MtqVawNpjsg0ysfI1gF2hF9o08doi3oQS801hgP2hrVnKnm7tfwcAGtQLLXM0PTIShOztzHhh4M1ExJLrfQyKMIWJGTuEV+W4xQ2rgRdlUfltkcKdf9aZ0g= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au; spf=pass smtp.mailfrom=gondor.apana.org.au; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b=A2Mox/6f; arc=none smtp.client-ip=144.6.53.87 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b="A2Mox/6f" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=hmeau.com; s=formenos; h=Cc:To:Subject:From:References:In-Reply-To:Message-Id:Date: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help:List-Unsubscribe: List-Subscribe:List-Post:List-Owner:List-Archive; bh=R9WGgoI571EuXd+Go6pmbGvnFR8S8xYvN+howKR/nUo=; b=A2Mox/6fj4GuinuUTNUMoG0qDK WD6mcanaE7lETcvsjydpqddIhOeo22PjpXIHf+cuhasqy51SLDyuEGLAYjWkLOHch/0oEEYVQzNoP xp3EgIetNB3fmKLWFygwMOweFuBQv5f3ETn6OZfha8iqMLys+yOitmASQOYoi3oa9dF42x1KejSFZ aWY1xxK3ZwDq/gn1yhODLseBWgyUsIWTijp/5FFfaCQpG3jh3JE2Yr8CqTKKlcA5SSowRV+Op/xdL DnLnhCnmJxnK4wfYVkIsXw3fUDSFkkJNQ6S3gLt8dQ4n2MB3zVWSRrltSfqIIBLcJMb6OddymoHzs g2g4jGhA==; Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.96 #2 (Debian)) id 1tjUnB-000gYK-1C; Sun, 16 Feb 2025 11:07:23 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Sun, 16 Feb 2025 11:07:22 +0800 Date: Sun, 16 Feb 2025 11:07:22 +0800 Message-Id: <2a7e040e93d8f2646131645289e63ce48e1ab182.1739674648.git.herbert@gondor.apana.org.au> In-Reply-To: References: From: Herbert Xu Subject: [v2 PATCH 05/11] crypto: ahash - Add virtual address support To: Linux Crypto Mailing List Cc: Eric Biggers , Ard Biesheuvel , Megha Dey , Tim Chen Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: This patch adds virtual address support to ahash. Virtual addresses were previously only supported through shash. The user may choose to use virtual addresses with ahash by calling ahash_request_set_virt instead of ahash_request_set_crypt. The API will take care of translating this to an SG list if necessary, unless the algorithm declares that it supports chaining. Therefore in order for an ahash algorithm to support chaining, it must also support virtual addresses directly. Signed-off-by: Herbert Xu --- crypto/ahash.c | 280 +++++++++++++++++++++++++++++---- include/crypto/hash.h | 38 ++++- include/crypto/internal/hash.h | 7 +- include/linux/crypto.h | 2 +- 4 files changed, 293 insertions(+), 34 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index 0546835f7304..40ccaf4c0cd6 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -34,11 +34,17 @@ struct ahash_save_req_state { int (*op)(struct ahash_request *req); crypto_completion_t compl; void *data; + struct scatterlist sg; + const u8 *src; + u8 *page; + unsigned int offset; + unsigned int nbytes; }; static void ahash_reqchain_done(void *data, int err); static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt); -static void ahash_restore_req(struct ahash_request *req); +static void ahash_restore_req(struct ahash_save_req_state *state); +static void ahash_def_finup_done1(void *data, int err); static int ahash_def_finup(struct ahash_request *req); /* @@ -100,6 +106,10 @@ int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc) unsigned int offset; int err; + if (ahash_request_isvirt(req)) + return crypto_shash_digest(desc, req->svirt, nbytes, + req->result); + if (nbytes && (sg = req->src, offset = sg->offset, nbytes <= min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset))) { @@ -182,6 +192,9 @@ static int hash_walk_new_entry(struct crypto_hash_walk *walk) int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err) { + if ((walk->flags & CRYPTO_AHASH_REQ_VIRT)) + return err; + walk->data -= walk->offset; kunmap_local(walk->data); @@ -209,14 +222,20 @@ int crypto_hash_walk_first(struct ahash_request *req, struct crypto_hash_walk *walk) { walk->total = req->nbytes; + walk->entrylen = 0; - if (!walk->total) { - walk->entrylen = 0; + if (!walk->total) return 0; + + walk->flags = req->base.flags; + + if (ahash_request_isvirt(req)) { + walk->data = req->svirt; + walk->total = 0; + return req->nbytes; } walk->sg = req->src; - walk->flags = req->base.flags; return hash_walk_new_entry(walk); } @@ -264,18 +283,82 @@ int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, } EXPORT_SYMBOL_GPL(crypto_ahash_setkey); +static bool ahash_request_hasvirt(struct ahash_request *req) +{ + struct ahash_request *r2; + + if (ahash_request_isvirt(req)) + return true; + + list_for_each_entry(r2, &req->base.list, base.list) + if (ahash_request_isvirt(r2)) + return true; + + return false; +} + +static int ahash_reqchain_virt(struct ahash_save_req_state *state, + int err, u32 mask) +{ + struct ahash_request *req = state->cur; + + for (;;) { + unsigned len = state->nbytes; + + req->base.err = err; + + if (!state->offset) + break; + + if (state->offset == len || err) { + u8 *result = req->result; + + ahash_request_set_virt(req, state->src, result, len); + state->offset = 0; + break; + } + + len -= state->offset; + + len = min(PAGE_SIZE, len); + memcpy(state->page, state->src + state->offset, len); + state->offset += len; + req->nbytes = len; + + err = state->op(req); + if (err == -EINPROGRESS) { + if (!list_empty(&state->head) || + state->offset < state->nbytes) + err = -EBUSY; + break; + } + + if (err == -EBUSY) + break; + } + + return err; +} + static int ahash_reqchain_finish(struct ahash_save_req_state *state, int err, u32 mask) { struct ahash_request *req0 = state->req0; struct ahash_request *req = state->cur; + struct crypto_ahash *tfm; struct ahash_request *n; + bool update; - req->base.err = err; + err = ahash_reqchain_virt(state, err, mask); + if (err == -EINPROGRESS || err == -EBUSY) + goto out; if (req != req0) list_add_tail(&req->base.list, &req0->base.list); + tfm = crypto_ahash_reqtfm(req); + update = state->op == crypto_ahash_alg(tfm)->update; + list_for_each_entry_safe(req, n, &state->head, base.list) { list_del_init(&req->base.list); @@ -283,10 +366,27 @@ static int ahash_reqchain_finish(struct ahash_save_req_state *state, req->base.complete = ahash_reqchain_done; req->base.data = state; state->cur = req; + + if (update && ahash_request_isvirt(req) && req->nbytes) { + unsigned len = req->nbytes; + u8 *result = req->result; + + state->src = req->svirt; + state->nbytes = len; + + len = min(PAGE_SIZE, len); + + memcpy(state->page, req->svirt, len); + state->offset = len; + + ahash_request_set_crypt(req, &state->sg, result, len); + } + err = state->op(req); if (err == -EINPROGRESS) { - if (!list_empty(&state->head)) + if (!list_empty(&state->head) || + state->offset < state->nbytes) err = -EBUSY; goto out; } @@ -294,11 +394,14 @@ static int ahash_reqchain_finish(struct ahash_save_req_state *state, if (err == -EBUSY) goto out; - req->base.err = err; + err = ahash_reqchain_virt(state, err, mask); + if (err == -EINPROGRESS || err == -EBUSY) + goto out; + list_add_tail(&req->base.list, &req0->base.list); } - ahash_restore_req(req0); + ahash_restore_req(state); out: return err; @@ -312,7 +415,7 @@ static void ahash_reqchain_done(void *data, int err) data = state->data; if (err == -EINPROGRESS) { - if (!list_empty(&state->head)) + if (!list_empty(&state->head) || state->offset < state->nbytes) return; goto notify; } @@ -329,40 +432,84 @@ static int ahash_do_req_chain(struct ahash_request *req, int (*op)(struct ahash_request *req)) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + bool update = op == crypto_ahash_alg(tfm)->update; struct ahash_save_req_state *state; struct ahash_save_req_state state0; + struct ahash_request *r2; + u8 *page = NULL; int err; - if (!ahash_request_chained(req) || crypto_ahash_req_chain(tfm)) + if (crypto_ahash_req_chain(tfm) || + (!ahash_request_chained(req) && + (!update || !ahash_request_isvirt(req)))) return op(req); - state = &state0; + if (update && ahash_request_hasvirt(req)) { + gfp_t gfp; + u32 flags; + flags = ahash_request_flags(req); + gfp = (flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + page = (void *)__get_free_page(gfp); + err = -ENOMEM; + if (!page) + goto out_set_chain; + } + + state = &state0; if (ahash_is_async(tfm)) { err = ahash_save_req(req, ahash_reqchain_done); - if (err) { - struct ahash_request *r2; - - req->base.err = err; - list_for_each_entry(r2, &req->base.list, base.list) - r2->base.err = err; - - return err; - } + if (err) + goto out_free_page; state = req->base.data; } state->op = op; state->cur = req; + state->page = page; + state->offset = 0; + state->nbytes = 0; INIT_LIST_HEAD(&state->head); list_splice_init(&req->base.list, &state->head); + if (page) + sg_init_one(&state->sg, page, PAGE_SIZE); + + if (update && ahash_request_isvirt(req) && req->nbytes) { + unsigned len = req->nbytes; + u8 *result = req->result; + + state->src = req->svirt; + state->nbytes = len; + + len = min(PAGE_SIZE, len); + + memcpy(page, req->svirt, len); + state->offset = len; + + ahash_request_set_crypt(req, &state->sg, result, len); + } + err = op(req); if (err == -EBUSY || err == -EINPROGRESS) return -EBUSY; return ahash_reqchain_finish(state, err, ~0); + +out_free_page: + if (page) { + memset(page, 0, PAGE_SIZE); + free_page((unsigned long)page); + } + +out_set_chain: + req->base.err = err; + list_for_each_entry(r2, &req->base.list, base.list) + r2->base.err = err; + + return err; } int crypto_ahash_init(struct ahash_request *req) @@ -414,15 +561,19 @@ static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt) req->base.complete = cplt; req->base.data = state; state->req0 = req; + state->page = NULL; return 0; } -static void ahash_restore_req(struct ahash_request *req) +static void ahash_restore_req(struct ahash_save_req_state *state) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ahash_save_req_state *state; + struct ahash_request *req = state->req0; + struct crypto_ahash *tfm; + free_page((unsigned long)state->page); + + tfm = crypto_ahash_reqtfm(req); if (!ahash_is_async(tfm)) return; @@ -504,13 +655,74 @@ int crypto_ahash_finup(struct ahash_request *req) return err; } - if (!crypto_ahash_alg(tfm)->finup) + if (!crypto_ahash_alg(tfm)->finup || + (!crypto_ahash_req_chain(tfm) && ahash_request_hasvirt(req))) return ahash_def_finup(req); return ahash_do_req_chain(req, crypto_ahash_alg(tfm)->finup); } EXPORT_SYMBOL_GPL(crypto_ahash_finup); +static int ahash_def_digest_finish(struct ahash_save_req_state *state, int err) +{ + struct ahash_request *req = state->req0; + struct crypto_ahash *tfm; + + if (err) + goto out; + + tfm = crypto_ahash_reqtfm(req); + if (ahash_is_async(tfm)) + req->base.complete = ahash_def_finup_done1; + + err = crypto_ahash_update(req); + if (err == -EINPROGRESS || err == -EBUSY) + return err; + +out: + ahash_restore_req(state); + return err; +} + +static void ahash_def_digest_done(void *data, int err) +{ + struct ahash_save_req_state *state0 = data; + struct ahash_save_req_state state; + struct ahash_request *areq; + + state = *state0; + areq = state.req0; + if (err == -EINPROGRESS) + goto out; + + areq->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + err = ahash_def_digest_finish(state0, err); + if (err == -EINPROGRESS || err == -EBUSY) + return; + +out: + state.compl(state.data, err); +} + +static int ahash_def_digest(struct ahash_request *req) +{ + struct ahash_save_req_state *state; + int err; + + err = ahash_save_req(req, ahash_def_digest_done); + if (err) + return err; + + state = req->base.data; + + err = crypto_ahash_init(req); + if (err == -EINPROGRESS || err == -EBUSY) + return err; + + return ahash_def_digest_finish(state, err); +} + int crypto_ahash_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -532,6 +744,9 @@ int crypto_ahash_digest(struct ahash_request *req) return err; } + if (!crypto_ahash_req_chain(tfm) && ahash_request_hasvirt(req)) + return ahash_def_digest(req); + if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; @@ -547,17 +762,19 @@ static void ahash_def_finup_done2(void *data, int err) if (err == -EINPROGRESS) return; - ahash_restore_req(areq); + ahash_restore_req(state); ahash_request_complete(areq, err); } -static int ahash_def_finup_finish1(struct ahash_request *req, int err) +static int ahash_def_finup_finish1(struct ahash_save_req_state *state, int err) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ahash_request *req = state->req0; + struct crypto_ahash *tfm; if (err) goto out; + tfm = crypto_ahash_reqtfm(req); if (ahash_is_async(tfm)) req->base.complete = ahash_def_finup_done2; @@ -566,7 +783,7 @@ static int ahash_def_finup_finish1(struct ahash_request *req, int err) return err; out: - ahash_restore_req(req); + ahash_restore_req(state); return err; } @@ -583,7 +800,7 @@ static void ahash_def_finup_done1(void *data, int err) areq->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - err = ahash_def_finup_finish1(areq, err); + err = ahash_def_finup_finish1(state0, err); if (err == -EINPROGRESS || err == -EBUSY) return; @@ -593,17 +810,20 @@ static void ahash_def_finup_done1(void *data, int err) static int ahash_def_finup(struct ahash_request *req) { + struct ahash_save_req_state *state; int err; err = ahash_save_req(req, ahash_def_finup_done1); if (err) return err; + state = req->base.data; + err = crypto_ahash_update(req); if (err == -EINPROGRESS || err == -EBUSY) return err; - return ahash_def_finup_finish1(req, err); + return ahash_def_finup_finish1(state, err); } int crypto_ahash_export(struct ahash_request *req, void *out) diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 0a6f744ce4a1..4e87e39679cb 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -12,6 +12,9 @@ #include #include +/* Set this bit for virtual address instead of SG list. */ +#define CRYPTO_AHASH_REQ_VIRT 0x00000001 + struct crypto_ahash; /** @@ -52,7 +55,10 @@ struct ahash_request { struct crypto_async_request base; unsigned int nbytes; - struct scatterlist *src; + union { + struct scatterlist *src; + const u8 *svirt; + }; u8 *result; void *__ctx[] CRYPTO_MINALIGN_ATTR; @@ -610,9 +616,13 @@ static inline void ahash_request_set_callback(struct ahash_request *req, crypto_completion_t compl, void *data) { + u32 keep = CRYPTO_AHASH_REQ_VIRT; + req->base.complete = compl; req->base.data = data; - req->base.flags = flags; + flags &= ~keep; + req->base.flags &= keep; + req->base.flags |= flags; crypto_reqchain_init(&req->base); } @@ -636,6 +646,30 @@ static inline void ahash_request_set_crypt(struct ahash_request *req, req->src = src; req->nbytes = nbytes; req->result = result; + req->base.flags &= ~CRYPTO_AHASH_REQ_VIRT; +} + +/** + * ahash_request_set_virt() - set virtual address data buffers + * @req: ahash_request handle to be updated + * @src: source virtual address + * @result: buffer that is filled with the message digest -- the caller must + * ensure that the buffer has sufficient space by, for example, calling + * crypto_ahash_digestsize() + * @nbytes: number of bytes to process from the source virtual address + * + * By using this call, the caller references the source virtual address. + * The source virtual address points to the data the message digest is to + * be calculated for. + */ +static inline void ahash_request_set_virt(struct ahash_request *req, + const u8 *src, u8 *result, + unsigned int nbytes) +{ + req->svirt = src; + req->nbytes = nbytes; + req->result = result; + req->base.flags |= CRYPTO_AHASH_REQ_VIRT; } static inline void ahash_request_chain(struct ahash_request *req, diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 81542a48587e..195d6aeeede3 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -15,7 +15,7 @@ struct ahash_request; struct scatterlist; struct crypto_hash_walk { - char *data; + const char *data; unsigned int offset; unsigned int flags; @@ -275,6 +275,11 @@ static inline bool ahash_request_chained(struct ahash_request *req) return crypto_request_chained(&req->base); } +static inline bool ahash_request_isvirt(struct ahash_request *req) +{ + return req->base.flags & CRYPTO_AHASH_REQ_VIRT; +} + static inline bool crypto_ahash_req_chain(struct crypto_ahash *tfm) { return crypto_tfm_req_chain(&tfm->base); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 1d2a6c515d58..61ac11226638 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -126,7 +126,7 @@ */ #define CRYPTO_ALG_FIPS_INTERNAL 0x00020000 -/* Set if the algorithm supports request chains. */ +/* Set if the algorithm supports request chains and virtual addresses. */ #define CRYPTO_ALG_REQ_CHAIN 0x00040000 /* From patchwork Sun Feb 16 03:07:24 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Herbert Xu X-Patchwork-Id: 13976322 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from abb.hmeau.com (abb.hmeau.com [144.6.53.87]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5042E7E1 for ; Sun, 16 Feb 2025 03:07:27 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=144.6.53.87 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675250; cv=none; b=fPJvF6F7x+6GxZEnnqg5YCuX8luJCRx/rz4Vtni1/8Zez8fp3JoVISPMRrvd3uPB3RkHpMfrJpQ0MA/ZOeGgi+/Kxxc+IXtzHTTQqSqYE3HThzO5BK918M9jPBC33a1R5N6hc+us9EIi8uZLlkU2du1fLEnYxbemM14c6uR6mhA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675250; c=relaxed/simple; bh=PPJR8P5RQRLlbn7T1W0+NiGHFT0qPn6JyhQBqNbAlbU=; h=Date:Message-Id:In-Reply-To:References:From:Subject:To:Cc; b=Ihm1nwBpz4DlOcK/eiyajF69wMY2o2+ASAMLxNalflo6acLiS3Q/yzMZLCbJwFQ5Zp+wBJ8LuHvBURkfDe4timaVsON5lr1t3Zi0As6tLpjcYZVFQUN4EGmYtgYb9hcOr4W0FUDRSoZKUYXjgD/3baKQ+XRHpW8aA8ASAXMZnAw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au; spf=pass smtp.mailfrom=gondor.apana.org.au; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b=IMpqNtIl; arc=none smtp.client-ip=144.6.53.87 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b="IMpqNtIl" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=hmeau.com; s=formenos; h=Cc:To:Subject:From:References:In-Reply-To:Message-Id:Date: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help:List-Unsubscribe: List-Subscribe:List-Post:List-Owner:List-Archive; bh=FGKxPWwVEcqQqOc0ZVw4JhKmYFcoBhe/DT7hf3S6hE8=; b=IMpqNtIlUfnhyKqSGUqD+pol2+ 9r8+OV3SqP1ebXCBuz5JfvEarXbB8Vz4hddTrINbpVyp71c4AevoVu8ACYudNp9DNGOgcPc3Xg9ZP TN2+y5Rt4JC7hPCnfC9igsISLKSooFMkQbd7E57Vzw2VNYZIIjlKnnkr+f8ty03rZKTqBSslGcFGA udNlrBmMIVofQeDHGu2f+4I/orFEkgKePKyud76XgJxtg9KyHEvY9poq3hg38GEYYXzLVVLAFlqqx Uzo2MoknfRl3cqbOKKfWbqXSQLQotTG7SLEJ+v7Z6RUGOik6+yEdprdG/lvzDAHSTuznV75de8/wN OPlobEkA==; Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.96 #2 (Debian)) id 1tjUnD-000gYv-2C; Sun, 16 Feb 2025 11:07:25 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Sun, 16 Feb 2025 11:07:24 +0800 Date: Sun, 16 Feb 2025 11:07:24 +0800 Message-Id: <91e2551c839a649fea10a171d9ae0dde104e5679.1739674648.git.herbert@gondor.apana.org.au> In-Reply-To: References: From: Herbert Xu Subject: [v2 PATCH 06/11] crypto: ahash - Set default reqsize from ahash_alg To: Linux Crypto Mailing List Cc: Eric Biggers , Ard Biesheuvel , Megha Dey , Tim Chen Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Add a reqsize field to struct ahash_alg and use it to set the default reqsize so that algorithms with a static reqsize are not forced to create an init_tfm function. Signed-off-by: Herbert Xu --- crypto/ahash.c | 4 ++++ include/crypto/hash.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/crypto/ahash.c b/crypto/ahash.c index 40ccaf4c0cd6..6b19fa6fc628 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -862,6 +862,7 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) struct ahash_alg *alg = crypto_ahash_alg(hash); crypto_ahash_set_statesize(hash, alg->halg.statesize); + crypto_ahash_set_reqsize(hash, alg->reqsize); if (tfm->__crt_alg->cra_type == &crypto_shash_type) return crypto_init_ahash_using_shash(tfm); @@ -1027,6 +1028,9 @@ static int ahash_prepare_alg(struct ahash_alg *alg) if (alg->halg.statesize == 0) return -EINVAL; + if (alg->reqsize && alg->reqsize < alg->halg.statesize) + return -EINVAL; + err = hash_prepare_alg(&alg->halg); if (err) return err; diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 4e87e39679cb..2aa83ee0ec98 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -135,6 +135,7 @@ struct ahash_request { * This is a counterpart to @init_tfm, used to remove * various changes set in @init_tfm. * @clone_tfm: Copy transform into new object, may allocate memory. + * @reqsize: Size of the request context. * @halg: see struct hash_alg_common */ struct ahash_alg { @@ -151,6 +152,8 @@ struct ahash_alg { void (*exit_tfm)(struct crypto_ahash *tfm); int (*clone_tfm)(struct crypto_ahash *dst, struct crypto_ahash *src); + unsigned int reqsize; + struct hash_alg_common halg; }; From patchwork Sun Feb 16 03:07:26 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Herbert Xu X-Patchwork-Id: 13976323 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from abb.hmeau.com (abb.hmeau.com [144.6.53.87]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 38927175AB for ; Sun, 16 Feb 2025 03:07:30 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=144.6.53.87 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675252; cv=none; b=fft8W3msb1Kaur4btknzEdSBi2O/aUCRYCvTwztYKqf8FNeYi4fCdcWzqLD24NUlkq9OBIupS30vSgpAdX7VvGvL7zD+oI31xG3HKndkXcA7spn1Gb0/mZ6OcCVE6lumi+NIRFW8G9Njj8PSDu+SpG553xw/pYyq+BkW28SNki8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675252; c=relaxed/simple; bh=cvsu9qMEHJf/jmokSe2ErOC9R047bAaOIe+RGcmmviw=; h=Date:Message-Id:In-Reply-To:References:From:Subject:To:Cc; b=hlfNaW+BJXhSJi3TKAtZ+m7j0LtHVF5ZNd7y752zXuEThdEjEiJYTZHnp+HStRovJQEY5ewobjwbv2hjpMyv57FhdI/y0p36iaKr88EUD1OErIcoiiOG+ovTRSgFXlxwp2u2R1hEql6YLHd502zKVHQ0gyoHKfQoz6LE3sBiHfQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au; spf=pass smtp.mailfrom=gondor.apana.org.au; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b=IJBEJii5; arc=none smtp.client-ip=144.6.53.87 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b="IJBEJii5" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=hmeau.com; s=formenos; h=Cc:To:Subject:From:References:In-Reply-To:Message-Id:Date: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help:List-Unsubscribe: List-Subscribe:List-Post:List-Owner:List-Archive; bh=kLY3Mf/uW0xOojfZN6Q+S4VUkA2I3+nbK457YK7eDbQ=; b=IJBEJii5teKVV3MAdayjtjEZCI XtcRNv/9xOa6SpXZMfzdOSDY/usJZ3MVaTPoYMdjLyWSzxJ49tQbfi6tw5quZwgB+cgdVKjnez8CP 6mK6l1ekVNzXWN/LongRjnb3nrGdFAIMF1LQib5zx/levoZDZ3xwV+BVnlu4GZ7e8/MkdkyC2nGfW 59uK/DyFBbZoWqtzInHsbSL0pExcTSEJUtdOCuMEKM/FxRW66GBSg7z0ZYOl91SF0aR6CuPulLC9J d7KxltfxGI8a5ZLiJgEh7ZjVSGpZtnGSxPyOUCDSWougSTB/4IUdWgUqaIDC3RylAYNLrQnOoF+jp /zVSu6Dg==; Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.96 #2 (Debian)) id 1tjUnG-000gZH-02; Sun, 16 Feb 2025 11:07:27 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Sun, 16 Feb 2025 11:07:26 +0800 Date: Sun, 16 Feb 2025 11:07:26 +0800 Message-Id: In-Reply-To: References: From: Herbert Xu Subject: [v2 PATCH 07/11] crypto: testmgr - Add multibuffer hash testing To: Linux Crypto Mailing List Cc: Eric Biggers , Ard Biesheuvel , Megha Dey , Tim Chen Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: This is based on a patch by Eric Biggers . Add limited self-test for multibuffer hash code path. This tests only a single request in chain of a random length. The other requests are all of the same length as the one being tested. Potential extension include testing all requests rather than just the single one, and varying the length of each request. Link: https://lore.kernel.org/all/20241001153718.111665-3-ebiggers@kernel.org/ Signed-off-by: Herbert Xu --- crypto/testmgr.c | 132 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 103 insertions(+), 29 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index b69877db3f33..9717b5c0f3c6 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -58,6 +58,9 @@ module_param(fuzz_iterations, uint, 0644); MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations"); #endif +/* Multibuffer hashing is unlimited. Set arbitrary limit for testing. */ +#define HASH_TEST_MAX_MB_MSGS 16 + #ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS /* a perfect nop */ @@ -299,6 +302,11 @@ struct test_sg_division { * @key_offset_relative_to_alignmask: if true, add the algorithm's alignmask to * the @key_offset * @finalization_type: what finalization function to use for hashes + * @multibuffer: test with multibuffer + * @multibuffer_index: random number used to generate the message index to use + * for multibuffer. + * @multibuffer_count: random number used to generate the num_msgs parameter + * for multibuffer * @nosimd: execute with SIMD disabled? Requires !CRYPTO_TFM_REQ_MAY_SLEEP. * This applies to the parts of the operation that aren't controlled * individually by @nosimd_setkey or @src_divs[].nosimd. @@ -318,6 +326,9 @@ struct testvec_config { enum finalization_type finalization_type; bool nosimd; bool nosimd_setkey; + bool multibuffer; + unsigned int multibuffer_index; + unsigned int multibuffer_count; }; #define TESTVEC_CONFIG_NAMELEN 192 @@ -1146,6 +1157,13 @@ static void generate_random_testvec_config(struct rnd_state *rng, break; } + if (prandom_bool(rng)) { + cfg->multibuffer = true; + cfg->multibuffer_index = prandom_u32_state(rng); + cfg->multibuffer_count = prandom_u32_state(rng); + p += scnprintf(p, end - p, " multibuffer"); + } + if (!(cfg->req_flags & CRYPTO_TFM_REQ_MAY_SLEEP)) { if (prandom_bool(rng)) { cfg->nosimd = true; @@ -1446,16 +1464,61 @@ static int test_shash_vec_cfg(const struct hash_testvec *vec, driver, cfg); } -static int do_ahash_op(int (*op)(struct ahash_request *req), - struct ahash_request *req, - struct crypto_wait *wait, bool nosimd) +static int do_ahash_op_multibuffer( + int (*op)(struct ahash_request *req), + struct ahash_request *reqs[HASH_TEST_MAX_MB_MSGS], + struct crypto_wait *wait, + const struct testvec_config *cfg) { + struct ahash_request *req = reqs[0]; + u8 trash[HASH_MAX_DIGESTSIZE]; + unsigned int num_msgs; + unsigned int msg_idx; + int err; + int i; + + num_msgs = 1 + (cfg->multibuffer_count % HASH_TEST_MAX_MB_MSGS); + if (num_msgs == 1) + return op(req); + + msg_idx = cfg->multibuffer_index % num_msgs; + for (i = 1; i < num_msgs; i++) { + struct ahash_request *r2 = reqs[i]; + + ahash_request_set_callback(r2, req->base.flags, NULL, NULL); + ahash_request_set_crypt(r2, req->src, trash, req->nbytes); + ahash_request_chain(r2, req); + } + + if (msg_idx) { + reqs[msg_idx]->result = req->result; + req->result = trash; + } + + err = op(req); + + if (msg_idx) + req->result = reqs[msg_idx]->result; + + return err; +} + +static int do_ahash_op(int (*op)(struct ahash_request *req), + struct ahash_request *reqs[HASH_TEST_MAX_MB_MSGS], + struct crypto_wait *wait, + const struct testvec_config *cfg, + bool nosimd) +{ + struct ahash_request *req = reqs[0]; int err; if (nosimd) crypto_disable_simd_for_test(); - err = op(req); + if (cfg->multibuffer) + err = do_ahash_op_multibuffer(op, reqs, wait, cfg); + else + err = op(req); if (nosimd) crypto_reenable_simd_for_test(); @@ -1485,10 +1548,11 @@ static int check_nonfinal_ahash_op(const char *op, int err, static int test_ahash_vec_cfg(const struct hash_testvec *vec, const char *vec_name, const struct testvec_config *cfg, - struct ahash_request *req, + struct ahash_request *reqs[HASH_TEST_MAX_MB_MSGS], struct test_sglist *tsgl, u8 *hashstate) { + struct ahash_request *req = reqs[0]; struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); const unsigned int digestsize = crypto_ahash_digestsize(tfm); const unsigned int statesize = crypto_ahash_statesize(tfm); @@ -1540,7 +1604,7 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, ahash_request_set_callback(req, req_flags, crypto_req_done, &wait); ahash_request_set_crypt(req, tsgl->sgl, result, vec->psize); - err = do_ahash_op(crypto_ahash_digest, req, &wait, cfg->nosimd); + err = do_ahash_op(crypto_ahash_digest, reqs, &wait, cfg, cfg->nosimd); if (err) { if (err == vec->digest_error) return 0; @@ -1561,7 +1625,7 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, ahash_request_set_callback(req, req_flags, crypto_req_done, &wait); ahash_request_set_crypt(req, NULL, result, 0); - err = do_ahash_op(crypto_ahash_init, req, &wait, cfg->nosimd); + err = do_ahash_op(crypto_ahash_init, reqs, &wait, cfg, cfg->nosimd); err = check_nonfinal_ahash_op("init", err, result, digestsize, driver, vec_name, cfg); if (err) @@ -1577,8 +1641,8 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, crypto_req_done, &wait); ahash_request_set_crypt(req, pending_sgl, result, pending_len); - err = do_ahash_op(crypto_ahash_update, req, &wait, - divs[i]->nosimd); + err = do_ahash_op(crypto_ahash_update, reqs, &wait, + cfg, divs[i]->nosimd); err = check_nonfinal_ahash_op("update", err, result, digestsize, driver, vec_name, cfg); @@ -1621,12 +1685,13 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, ahash_request_set_crypt(req, pending_sgl, result, pending_len); if (cfg->finalization_type == FINALIZATION_TYPE_FINAL) { /* finish with update() and final() */ - err = do_ahash_op(crypto_ahash_update, req, &wait, cfg->nosimd); + err = do_ahash_op(crypto_ahash_update, reqs, &wait, cfg, cfg->nosimd); err = check_nonfinal_ahash_op("update", err, result, digestsize, driver, vec_name, cfg); if (err) return err; - err = do_ahash_op(crypto_ahash_final, req, &wait, cfg->nosimd); + ahash_request_set_callback(req, req_flags, crypto_req_done, &wait); + err = do_ahash_op(crypto_ahash_final, reqs, &wait, cfg, cfg->nosimd); if (err) { pr_err("alg: ahash: %s final() failed with err %d on test vector %s, cfg=\"%s\"\n", driver, err, vec_name, cfg->name); @@ -1634,7 +1699,7 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, } } else { /* finish with finup() */ - err = do_ahash_op(crypto_ahash_finup, req, &wait, cfg->nosimd); + err = do_ahash_op(crypto_ahash_finup, reqs, &wait, cfg, cfg->nosimd); if (err) { pr_err("alg: ahash: %s finup() failed with err %d on test vector %s, cfg=\"%s\"\n", driver, err, vec_name, cfg->name); @@ -1650,7 +1715,7 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, static int test_hash_vec_cfg(const struct hash_testvec *vec, const char *vec_name, const struct testvec_config *cfg, - struct ahash_request *req, + struct ahash_request *reqs[HASH_TEST_MAX_MB_MSGS], struct shash_desc *desc, struct test_sglist *tsgl, u8 *hashstate) @@ -1670,11 +1735,12 @@ static int test_hash_vec_cfg(const struct hash_testvec *vec, return err; } - return test_ahash_vec_cfg(vec, vec_name, cfg, req, tsgl, hashstate); + return test_ahash_vec_cfg(vec, vec_name, cfg, reqs, tsgl, hashstate); } static int test_hash_vec(const struct hash_testvec *vec, unsigned int vec_num, - struct ahash_request *req, struct shash_desc *desc, + struct ahash_request *reqs[HASH_TEST_MAX_MB_MSGS], + struct shash_desc *desc, struct test_sglist *tsgl, u8 *hashstate) { char vec_name[16]; @@ -1686,7 +1752,7 @@ static int test_hash_vec(const struct hash_testvec *vec, unsigned int vec_num, for (i = 0; i < ARRAY_SIZE(default_hash_testvec_configs); i++) { err = test_hash_vec_cfg(vec, vec_name, &default_hash_testvec_configs[i], - req, desc, tsgl, hashstate); + reqs, desc, tsgl, hashstate); if (err) return err; } @@ -1703,7 +1769,7 @@ static int test_hash_vec(const struct hash_testvec *vec, unsigned int vec_num, generate_random_testvec_config(&rng, &cfg, cfgname, sizeof(cfgname)); err = test_hash_vec_cfg(vec, vec_name, &cfg, - req, desc, tsgl, hashstate); + reqs, desc, tsgl, hashstate); if (err) return err; cond_resched(); @@ -1762,11 +1828,12 @@ static void generate_random_hash_testvec(struct rnd_state *rng, */ static int test_hash_vs_generic_impl(const char *generic_driver, unsigned int maxkeysize, - struct ahash_request *req, + struct ahash_request *reqs[HASH_TEST_MAX_MB_MSGS], struct shash_desc *desc, struct test_sglist *tsgl, u8 *hashstate) { + struct ahash_request *req = reqs[0]; struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); const unsigned int digestsize = crypto_ahash_digestsize(tfm); const unsigned int blocksize = crypto_ahash_blocksize(tfm); @@ -1864,7 +1931,7 @@ static int test_hash_vs_generic_impl(const char *generic_driver, sizeof(cfgname)); err = test_hash_vec_cfg(&vec, vec_name, cfg, - req, desc, tsgl, hashstate); + reqs, desc, tsgl, hashstate); if (err) goto out; cond_resched(); @@ -1929,8 +1996,8 @@ static int __alg_test_hash(const struct hash_testvec *vecs, u32 type, u32 mask, const char *generic_driver, unsigned int maxkeysize) { + struct ahash_request *reqs[HASH_TEST_MAX_MB_MSGS] = {}; struct crypto_ahash *atfm = NULL; - struct ahash_request *req = NULL; struct crypto_shash *stfm = NULL; struct shash_desc *desc = NULL; struct test_sglist *tsgl = NULL; @@ -1954,12 +2021,14 @@ static int __alg_test_hash(const struct hash_testvec *vecs, } driver = crypto_ahash_driver_name(atfm); - req = ahash_request_alloc(atfm, GFP_KERNEL); - if (!req) { - pr_err("alg: hash: failed to allocate request for %s\n", - driver); - err = -ENOMEM; - goto out; + for (i = 0; i < HASH_TEST_MAX_MB_MSGS; i++) { + reqs[i] = ahash_request_alloc(atfm, GFP_KERNEL); + if (!reqs[i]) { + pr_err("alg: hash: failed to allocate request for %s\n", + driver); + err = -ENOMEM; + goto out; + } } /* @@ -1995,12 +2064,12 @@ static int __alg_test_hash(const struct hash_testvec *vecs, if (fips_enabled && vecs[i].fips_skip) continue; - err = test_hash_vec(&vecs[i], i, req, desc, tsgl, hashstate); + err = test_hash_vec(&vecs[i], i, reqs, desc, tsgl, hashstate); if (err) goto out; cond_resched(); } - err = test_hash_vs_generic_impl(generic_driver, maxkeysize, req, + err = test_hash_vs_generic_impl(generic_driver, maxkeysize, reqs, desc, tsgl, hashstate); out: kfree(hashstate); @@ -2010,7 +2079,12 @@ static int __alg_test_hash(const struct hash_testvec *vecs, } kfree(desc); crypto_free_shash(stfm); - ahash_request_free(req); + if (reqs[0]) { + ahash_request_set_callback(reqs[0], 0, NULL, NULL); + for (i = 1; i < HASH_TEST_MAX_MB_MSGS && reqs[i]; i++) + ahash_request_chain(reqs[i], reqs[0]); + ahash_request_free(reqs[0]); + } crypto_free_ahash(atfm); return err; } From patchwork Sun Feb 16 03:07:29 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Herbert Xu X-Patchwork-Id: 13976325 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from abb.hmeau.com (abb.hmeau.com [144.6.53.87]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DFA8AD529 for ; Sun, 16 Feb 2025 03:07:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=144.6.53.87 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675257; cv=none; b=gVxLV8JYlrzqzWLHD0v6lavRJ0rAYQEHYXGmNFtg92RnKvyRFDw4IzEwunXRaPlUU8AwmKyuzx4v3RGwy9liHFQfe56/VXq1yefNIvUK+KelGzw2ovBb80JC3rFJ9hhrhVnsTTKUv4niK9CWlveWFHAj8t4ivPvqk7QMln/UcYw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675257; c=relaxed/simple; bh=FgMRcSiz++yxQFbyjgHWDdvMPi0Me/uAuTDmDaziJdk=; h=Date:Message-Id:In-Reply-To:References:From:Subject:To:Cc; b=WRcAu9Tv2RBOe2RAfcFN85LnLLE88xngFWHHos2ojPcumb/G0JID++qDv+c5nFOljyfKBVvi773VPhGerUM6WDBbs+4SitTmmItWmzqUwaKtuC4iFYihZ0JeOLGU2mEBkYxczFp1JoNob/dFgMgSVwGcwB4IAjPdqXxc4dA6X/4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au; spf=pass smtp.mailfrom=gondor.apana.org.au; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b=hVTG/IKs; arc=none smtp.client-ip=144.6.53.87 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b="hVTG/IKs" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=hmeau.com; s=formenos; h=Cc:To:Subject:From:References:In-Reply-To:Message-Id:Date: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help:List-Unsubscribe: List-Subscribe:List-Post:List-Owner:List-Archive; bh=pRdl186KSm6UHqDbgX/MMif8LkpukCGpeS4TFYLpgEc=; b=hVTG/IKsKxK9CNsFPkjTpSxs3u rb68cuqAklLOhIzz12mIb9PZ0UJhefw46Tq5L0MMeDmgrjO83NLAT5EWeXYQGYJQWiMqFIztqv813 C3JZfYTf0Nll8Nz3//gJWXryJnAHqZqNzgDRXkw2fZCF22liRhomhQFU0uMQIH27um2BbFKBGpOWf 92pcQJ6WC3I2YM3DeENh7jrArK5Q2h1VOvdpMZB9lLgIrdAixrlPXminy8MP74FBQTcddFOsdl5wl oPSnklD9Bke7TufoylJIv5WQIKB8jL905Q9H7oQCiMz5K5e4G6g6gVkqBp3zFe2Y7oNY1uwA6RTGZ 4xcNp0cQ==; Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.96 #2 (Debian)) id 1tjUnI-000gZj-10; Sun, 16 Feb 2025 11:07:30 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Sun, 16 Feb 2025 11:07:29 +0800 Date: Sun, 16 Feb 2025 11:07:29 +0800 Message-Id: In-Reply-To: References: From: Herbert Xu Subject: [v2 PATCH 08/11] crypto: x86/sha2 - Restore multibuffer AVX2 support To: Linux Crypto Mailing List Cc: Eric Biggers , Ard Biesheuvel , Megha Dey , Tim Chen Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Resurrect the old multibuffer AVX2 code removed by commit ab8085c130ed ("crypto: x86 - remove SHA multibuffer routines and mcryptd") using the new request chaining interface. This is purely a proof of concept and only meant to illustrate the utility of the new API rather than a serious attempt at improving the performance. However, it is interesting to note that with x8 multibuffer the performance of AVX2 is on par with SHA-NI. testing speed of multibuffer sha256 (sha256-avx2) tcrypt: test 0 ( 16 byte blocks, 16 bytes per update, 1 updates): 1 operation in 184 cycles (16 bytes) tcrypt: test 2 ( 64 byte blocks, 64 bytes per update, 1 updates): 1 operation in 165 cycles (64 bytes) tcrypt: test 5 ( 256 byte blocks, 256 bytes per update, 1 updates): 1 operation in 444 cycles (256 bytes) tcrypt: test 8 ( 1024 byte blocks, 1024 bytes per update, 1 updates): 1 operation in 1549 cycles (1024 bytes) tcrypt: test 12 ( 2048 byte blocks, 2048 bytes per update, 1 updates): 1 operation in 3060 cycles (2048 bytes) tcrypt: test 16 ( 4096 byte blocks, 4096 bytes per update, 1 updates): 1 operation in 5983 cycles (4096 bytes) tcrypt: test 21 ( 8192 byte blocks, 8192 bytes per update, 1 updates): 1 operation in 11980 cycles (8192 bytes) tcrypt: testing speed of async sha256 (sha256-avx2) tcrypt: test 0 ( 16 byte blocks, 16 bytes per update, 1 updates): 475 cycles/operation, 29 cycles/byte tcrypt: test 2 ( 64 byte blocks, 64 bytes per update, 1 updates): 780 cycles/operation, 12 cycles/byte tcrypt: test 5 ( 256 byte blocks, 256 bytes per update, 1 updates): 1872 cycles/operation, 7 cycles/byte tcrypt: test 8 ( 1024 byte blocks, 1024 bytes per update, 1 updates): 5416 cycles/operation, 5 cycles/byte tcrypt: test 12 ( 2048 byte blocks, 2048 bytes per update, 1 updates): 10339 cycles/operation, 5 cycles/byte tcrypt: test 16 ( 4096 byte blocks, 4096 bytes per update, 1 updates): 20214 cycles/operation, 4 cycles/byte tcrypt: test 21 ( 8192 byte blocks, 8192 bytes per update, 1 updates): 40042 cycles/operation, 4 cycles/byte tcrypt: testing speed of async sha256-ni (sha256-ni) tcrypt: test 0 ( 16 byte blocks, 16 bytes per update, 1 updates): 207 cycles/operation, 12 cycles/byte tcrypt: test 2 ( 64 byte blocks, 64 bytes per update, 1 updates): 299 cycles/operation, 4 cycles/byte tcrypt: test 5 ( 256 byte blocks, 256 bytes per update, 1 updates): 543 cycles/operation, 2 cycles/byte tcrypt: test 8 ( 1024 byte blocks, 1024 bytes per update, 1 updates): 1523 cycles/operation, 1 cycles/byte tcrypt: test 12 ( 2048 byte blocks, 2048 bytes per update, 1 updates): 2835 cycles/operation, 1 cycles/byte tcrypt: test 16 ( 4096 byte blocks, 4096 bytes per update, 1 updates): 5459 cycles/operation, 1 cycles/byte tcrypt: test 21 ( 8192 byte blocks, 8192 bytes per update, 1 updates): 10724 cycles/operation, 1 cycles/byte Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 +- arch/x86/crypto/sha256_mb_mgr_datastruct.S | 304 +++++++++++ arch/x86/crypto/sha256_ssse3_glue.c | 540 +++++++++++++++++-- arch/x86/crypto/sha256_x8_avx2.S | 598 +++++++++++++++++++++ 4 files changed, 1401 insertions(+), 43 deletions(-) create mode 100644 arch/x86/crypto/sha256_mb_mgr_datastruct.S create mode 100644 arch/x86/crypto/sha256_x8_avx2.S diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 07b00bfca64b..ab3fb2a9ebea 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -60,7 +60,7 @@ sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ssse3_glue.o sha1-ssse3-$(CONFIG_AS_SHA1_NI) += sha1_ni_asm.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o -sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o +sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o sha256_x8_avx2.o sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o diff --git a/arch/x86/crypto/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256_mb_mgr_datastruct.S new file mode 100644 index 000000000000..5c377bac21d0 --- /dev/null +++ b/arch/x86/crypto/sha256_mb_mgr_datastruct.S @@ -0,0 +1,304 @@ +/* + * Header file for multi buffer SHA256 algorithm data structure + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +# Macros for defining data structures + +# Usage example + +#START_FIELDS # JOB_AES +### name size align +#FIELD _plaintext, 8, 8 # pointer to plaintext +#FIELD _ciphertext, 8, 8 # pointer to ciphertext +#FIELD _IV, 16, 8 # IV +#FIELD _keys, 8, 8 # pointer to keys +#FIELD _len, 4, 4 # length in bytes +#FIELD _status, 4, 4 # status enumeration +#FIELD _user_data, 8, 8 # pointer to user data +#UNION _union, size1, align1, \ +# size2, align2, \ +# size3, align3, \ +# ... +#END_FIELDS +#%assign _JOB_AES_size _FIELD_OFFSET +#%assign _JOB_AES_align _STRUCT_ALIGN + +######################################################################### + +# Alternate "struc-like" syntax: +# STRUCT job_aes2 +# RES_Q .plaintext, 1 +# RES_Q .ciphertext, 1 +# RES_DQ .IV, 1 +# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN +# RES_U .union, size1, align1, \ +# size2, align2, \ +# ... +# ENDSTRUCT +# # Following only needed if nesting +# %assign job_aes2_size _FIELD_OFFSET +# %assign job_aes2_align _STRUCT_ALIGN +# +# RES_* macros take a name, a count and an optional alignment. +# The count in in terms of the base size of the macro, and the +# default alignment is the base size. +# The macros are: +# Macro Base size +# RES_B 1 +# RES_W 2 +# RES_D 4 +# RES_Q 8 +# RES_DQ 16 +# RES_Y 32 +# RES_Z 64 +# +# RES_U defines a union. It's arguments are a name and two or more +# pairs of "size, alignment" +# +# The two assigns are only needed if this structure is being nested +# within another. Even if the assigns are not done, one can still use +# STRUCT_NAME_size as the size of the structure. +# +# Note that for nesting, you still need to assign to STRUCT_NAME_size. +# +# The differences between this and using "struc" directly are that each +# type is implicitly aligned to its natural length (although this can be +# over-ridden with an explicit third parameter), and that the structure +# is padded at the end to its overall alignment. +# + +######################################################################### + +#ifndef _DATASTRUCT_ASM_ +#define _DATASTRUCT_ASM_ + +#define SZ8 8*SHA256_DIGEST_WORD_SIZE +#define ROUNDS 64*SZ8 +#define PTR_SZ 8 +#define SHA256_DIGEST_WORD_SIZE 4 +#define MAX_SHA256_LANES 8 +#define SHA256_DIGEST_WORDS 8 +#define SHA256_DIGEST_ROW_SIZE (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE) +#define SHA256_DIGEST_SIZE (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS) +#define SHA256_BLK_SZ 64 + +# START_FIELDS +.macro START_FIELDS + _FIELD_OFFSET = 0 + _STRUCT_ALIGN = 0 +.endm + +# FIELD name size align +.macro FIELD name size align + _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) + \name = _FIELD_OFFSET + _FIELD_OFFSET = _FIELD_OFFSET + (\size) +.if (\align > _STRUCT_ALIGN) + _STRUCT_ALIGN = \align +.endif +.endm + +# END_FIELDS +.macro END_FIELDS + _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) +.endm + +######################################################################## + +.macro STRUCT p1 +START_FIELDS +.struc \p1 +.endm + +.macro ENDSTRUCT + tmp = _FIELD_OFFSET + END_FIELDS + tmp = (_FIELD_OFFSET - %%tmp) +.if (tmp > 0) + .lcomm tmp +.endif +.endstruc +.endm + +## RES_int name size align +.macro RES_int p1 p2 p3 + name = \p1 + size = \p2 + align = .\p3 + + _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) +.align align +.lcomm name size + _FIELD_OFFSET = _FIELD_OFFSET + (size) +.if (align > _STRUCT_ALIGN) + _STRUCT_ALIGN = align +.endif +.endm + +# macro RES_B name, size [, align] +.macro RES_B _name, _size, _align=1 +RES_int _name _size _align +.endm + +# macro RES_W name, size [, align] +.macro RES_W _name, _size, _align=2 +RES_int _name 2*(_size) _align +.endm + +# macro RES_D name, size [, align] +.macro RES_D _name, _size, _align=4 +RES_int _name 4*(_size) _align +.endm + +# macro RES_Q name, size [, align] +.macro RES_Q _name, _size, _align=8 +RES_int _name 8*(_size) _align +.endm + +# macro RES_DQ name, size [, align] +.macro RES_DQ _name, _size, _align=16 +RES_int _name 16*(_size) _align +.endm + +# macro RES_Y name, size [, align] +.macro RES_Y _name, _size, _align=32 +RES_int _name 32*(_size) _align +.endm + +# macro RES_Z name, size [, align] +.macro RES_Z _name, _size, _align=64 +RES_int _name 64*(_size) _align +.endm + +#endif + + +######################################################################## +#### Define SHA256 Out Of Order Data Structures +######################################################################## + +START_FIELDS # LANE_DATA +### name size align +FIELD _job_in_lane, 8, 8 # pointer to job object +END_FIELDS + + _LANE_DATA_size = _FIELD_OFFSET + _LANE_DATA_align = _STRUCT_ALIGN + +######################################################################## + +START_FIELDS # SHA256_ARGS_X4 +### name size align +FIELD _digest, 4*8*8, 4 # transposed digest +FIELD _data_ptr, 8*8, 8 # array of pointers to data +END_FIELDS + + _SHA256_ARGS_X4_size = _FIELD_OFFSET + _SHA256_ARGS_X4_align = _STRUCT_ALIGN + _SHA256_ARGS_X8_size = _FIELD_OFFSET + _SHA256_ARGS_X8_align = _STRUCT_ALIGN + +####################################################################### + +START_FIELDS # MB_MGR +### name size align +FIELD _args, _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align +FIELD _lens, 4*8, 8 +FIELD _unused_lanes, 8, 8 +FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align +END_FIELDS + + _MB_MGR_size = _FIELD_OFFSET + _MB_MGR_align = _STRUCT_ALIGN + +_args_digest = _args + _digest +_args_data_ptr = _args + _data_ptr + +####################################################################### + +START_FIELDS #STACK_FRAME +### name size align +FIELD _data, 16*SZ8, 1 # transposed digest +FIELD _digest, 8*SZ8, 1 # array of pointers to data +FIELD _ytmp, 4*SZ8, 1 +FIELD _rsp, 8, 1 +END_FIELDS + + _STACK_FRAME_size = _FIELD_OFFSET + _STACK_FRAME_align = _STRUCT_ALIGN + +####################################################################### + +######################################################################## +#### Define constants +######################################################################## + +#define STS_UNKNOWN 0 +#define STS_BEING_PROCESSED 1 +#define STS_COMPLETED 2 + +######################################################################## +#### Define JOB_SHA256 structure +######################################################################## + +START_FIELDS # JOB_SHA256 + +### name size align +FIELD _buffer, 8, 8 # pointer to buffer +FIELD _len, 8, 8 # length in bytes +FIELD _result_digest, 8*4, 32 # Digest (output) +FIELD _status, 4, 4 +FIELD _user_data, 8, 8 +END_FIELDS + + _JOB_SHA256_size = _FIELD_OFFSET + _JOB_SHA256_align = _STRUCT_ALIGN diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index e04a43d9f7d5..130918d01930 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -41,8 +41,24 @@ #include #include +struct sha256_x8_mbctx { + u32 state[8][8]; + const u8 *input[8]; +}; + +struct sha256_reqctx { + struct sha256_state state; + struct crypto_hash_walk walk; + const u8 *input; + int total; + unsigned int next; +}; + asmlinkage void sha256_transform_ssse3(struct sha256_state *state, const u8 *data, int blocks); +asmlinkage void sha256_transform_rorx(struct sha256_state *state, + const u8 *data, int blocks); +asmlinkage void sha256_x8_avx2(struct sha256_x8_mbctx *mbctx, int blocks); static const struct x86_cpu_id module_cpu_ids[] = { #ifdef CONFIG_AS_SHA256_NI @@ -55,14 +71,69 @@ static const struct x86_cpu_id module_cpu_ids[] = { }; MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); -static int _sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len, sha256_block_fn *sha256_xform) +static int sha256_import(struct ahash_request *req, const void *in) { - struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_reqctx *rctx = ahash_request_ctx(req); + memcpy(&rctx->state, in, sizeof(rctx->state)); + return 0; +} + +static int sha256_export(struct ahash_request *req, void *out) +{ + struct sha256_reqctx *rctx = ahash_request_ctx(req); + + memcpy(out, &rctx->state, sizeof(rctx->state)); + return 0; +} + +static int sha256_ahash_init(struct ahash_request *req) +{ + struct sha256_reqctx *rctx = ahash_request_ctx(req); + struct ahash_request *r2; + + sha256_init(&rctx->state); + + if (!ahash_request_chained(req)) + return 0; + + req->base.err = 0; + list_for_each_entry(r2, &req->base.list, base.list) { + r2->base.err = 0; + rctx = ahash_request_ctx(r2); + sha256_init(&rctx->state); + } + + return 0; +} + +static int sha224_ahash_init(struct ahash_request *req) +{ + struct sha256_reqctx *rctx = ahash_request_ctx(req); + struct ahash_request *r2; + + sha224_init(&rctx->state); + + if (!ahash_request_chained(req)) + return 0; + + req->base.err = 0; + list_for_each_entry(r2, &req->base.list, base.list) { + rctx = ahash_request_ctx(r2); + sha224_init(&rctx->state); + } + + return 0; +} + +static void __sha256_update(struct sha256_state *sctx, const u8 *data, + unsigned int len, sha256_block_fn *sha256_xform) +{ if (!crypto_simd_usable() || - (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) - return crypto_sha256_update(desc, data, len); + (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) { + sha256_update(sctx, data, len); + return; + } /* * Make sure struct sha256_state begins directly with the SHA256 @@ -71,25 +142,97 @@ static int _sha256_update(struct shash_desc *desc, const u8 *data, BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); kernel_fpu_begin(); - sha256_base_do_update(desc, data, len, sha256_xform); + lib_sha256_base_do_update(sctx, data, len, sha256_xform); + kernel_fpu_end(); +} + +static int _sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha256_block_fn *sha256_xform) +{ + __sha256_update(shash_desc_ctx(desc), data, len, sha256_xform); + return 0; +} + +static int sha256_ahash_update(struct ahash_request *req, + sha256_block_fn *sha256_xform) +{ + struct sha256_reqctx *rctx = ahash_request_ctx(req); + struct crypto_hash_walk *walk = &rctx->walk; + struct sha256_state *state = &rctx->state; + int nbytes; + + /* + * Make sure struct sha256_state begins directly with the SHA256 + * 256-bit internal state, as this is what the asm functions expect. + */ + BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); + + for (nbytes = crypto_hash_walk_first(req, walk); nbytes > 0; + nbytes = crypto_hash_walk_done(walk, 0)) + __sha256_update(state, walk->data, nbytes, sha256_xform); + + return nbytes; +} + +static void _sha256_finup(struct sha256_state *state, const u8 *data, + unsigned int len, u8 *out, unsigned int ds, + sha256_block_fn *sha256_xform) +{ + if (!crypto_simd_usable()) { + sha256_update(state, data, len); + if (ds == SHA224_DIGEST_SIZE) + sha224_final(state, out); + else + sha256_final(state, out); + return; + } + + kernel_fpu_begin(); + if (len) + lib_sha256_base_do_update(state, data, len, sha256_xform); + lib_sha256_base_do_finalize(state, sha256_xform); kernel_fpu_end(); - return 0; + lib_sha256_base_finish(state, out, ds); +} + +static int sha256_ahash_finup(struct ahash_request *req, bool nodata, + sha256_block_fn *sha256_xform) +{ + struct sha256_reqctx *rctx = ahash_request_ctx(req); + struct crypto_hash_walk *walk = &rctx->walk; + struct sha256_state *state = &rctx->state; + unsigned int ds; + int nbytes; + + ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); + if (nodata || !req->nbytes) { + _sha256_finup(state, NULL, 0, req->result, + ds, sha256_xform); + return 0; + } + + for (nbytes = crypto_hash_walk_first(req, walk); nbytes > 0; + nbytes = crypto_hash_walk_done(walk, 0)) { + if (crypto_hash_walk_last(walk)) { + _sha256_finup(state, walk->data, nbytes, req->result, + ds, sha256_xform); + continue; + } + + __sha256_update(state, walk->data, nbytes, sha256_xform); + } + + return nbytes; } static int sha256_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha256_block_fn *sha256_xform) { - if (!crypto_simd_usable()) - return crypto_sha256_finup(desc, data, len, out); + unsigned int ds = crypto_shash_digestsize(desc->tfm); - kernel_fpu_begin(); - if (len) - sha256_base_do_update(desc, data, len, sha256_xform); - sha256_base_do_finalize(desc, sha256_xform); - kernel_fpu_end(); - - return sha256_base_finish(desc, out); + _sha256_finup(shash_desc_ctx(desc), data, len, out, ds, sha256_xform); + return 0; } static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, @@ -247,61 +390,374 @@ static void unregister_sha256_avx(void) ARRAY_SIZE(sha256_avx_algs)); } -asmlinkage void sha256_transform_rorx(struct sha256_state *state, - const u8 *data, int blocks); - -static int sha256_avx2_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int sha256_pad2(unsigned int partial, struct ahash_request *req) { - return _sha256_update(desc, data, len, sha256_transform_rorx); + const int bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64); + struct sha256_reqctx *rctx = ahash_request_ctx(req); + struct sha256_state *state = &rctx->state; + __be64 *bits; + + if (rctx->total) + return 0; + + rctx->total = -1; + + memset(state->buf + partial, 0, bit_offset - partial); + bits = (__be64 *)(state->buf + bit_offset); + *bits = cpu_to_be64(state->count << 3); + + return SHA256_BLOCK_SIZE; } -static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int sha256_pad1(struct ahash_request *req, bool final) { - return sha256_finup(desc, data, len, out, sha256_transform_rorx); + const int bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64); + struct sha256_reqctx *rctx = ahash_request_ctx(req); + struct sha256_state *state = &rctx->state; + unsigned int partial = state->count; + + if (!final) + return 0; + + rctx->total = 0; + rctx->input = state->buf; + + partial %= SHA256_BLOCK_SIZE; + state->buf[partial++] = 0x80; + + if (partial > bit_offset) { + memset(state->buf + partial, 0, SHA256_BLOCK_SIZE - partial); + return SHA256_BLOCK_SIZE; + } + + return sha256_pad2(partial, req); } -static int sha256_avx2_final(struct shash_desc *desc, u8 *out) +static int sha256_mb_fill(struct ahash_request *req, bool final) { - return sha256_avx2_finup(desc, NULL, 0, out); + struct sha256_reqctx *rctx = ahash_request_ctx(req); + struct sha256_state *state = &rctx->state; + int nbytes = rctx->total; + unsigned int partial; + + partial = state->count % SHA256_BLOCK_SIZE; + while (partial + nbytes < SHA256_BLOCK_SIZE) { + memcpy(state->buf + partial, rctx->input, nbytes); + state->count += nbytes; + partial += nbytes; + + nbytes = crypto_hash_walk_done(&rctx->walk, 0); + if (!nbytes) + return sha256_pad1(req, final); + + rctx->input = rctx->walk.data; + rctx->total = nbytes; + } + + if (partial) { + unsigned int offset = SHA256_BLOCK_SIZE - partial; + + memcpy(state->buf + partial, rctx->input, offset); + rctx->input = state->buf; + + return SHA256_BLOCK_SIZE; + } + + return nbytes; } -static int sha256_avx2_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int sha256_mb_start(struct ahash_request *req, bool nodata, bool final) { - return sha256_base_init(desc) ?: - sha256_avx2_finup(desc, data, len, out); + struct sha256_reqctx *rctx = ahash_request_ctx(req); + int nbytes; + + nbytes = nodata ? 0 : crypto_hash_walk_first(req, &rctx->walk); + if (!nbytes) + return sha256_pad1(req, final); + + rctx->input = rctx->walk.data; + rctx->total = nbytes; + + return sha256_mb_fill(req, final); } -static struct shash_alg sha256_avx2_algs[] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, +static int sha256_mb_next(struct ahash_request *req, unsigned int len, + bool final) +{ + struct sha256_reqctx *rctx = ahash_request_ctx(req); + struct sha256_state *state = &rctx->state; + + if (rctx->input != state->buf) + ; + else if (rctx->total <= 0) + return sha256_pad2(0, req); + else { + len = SHA256_BLOCK_SIZE - state->count % SHA256_BLOCK_SIZE; + rctx->input = rctx->walk.data; + } + + rctx->input += len; + rctx->total -= len; + state->count += len; + + return sha256_mb_fill(req, final); +} + +static struct ahash_request *sha256_update_x8x1( + struct list_head *list, struct ahash_request *r2, + struct ahash_request *reqs[8], bool nodata, bool final) +{ + struct sha256_state *states[8]; + struct sha256_x8_mbctx mbctx; + unsigned int len = 0; + int i = 0; + + do { + struct sha256_reqctx *rctx = ahash_request_ctx(reqs[i]); + unsigned int nbytes; + + nbytes = rctx->next; + if (!i || nbytes < len) + len = nbytes; + + states[i] = &rctx->state; + mbctx.input[i] = rctx->input; + } while (++i < 8 && reqs[i]); + + len &= ~(SHA256_BLOCK_SIZE - 1); + + /* 3 is the break-even point for x8. */ + if (i < 3) { + do { + i--; + sha256_transform_rorx(states[i], mbctx.input[i], + len / SHA256_BLOCK_SIZE); + } while (i); + goto done; + } + + for (; i < 8; i++) { + mbctx.input[i] = mbctx.input[0]; + states[i] = NULL; + } + + for (i = 0; i < 8; i++) { + int j; + + for (j = 0; j < 8; j++) + mbctx.state[i][j] = states[j] ? states[j]->state[i] : 0; + } + + sha256_x8_avx2(&mbctx, len / SHA256_BLOCK_SIZE); + + for (i = 0; i < 8 && states[i]; i++) { + int j; + + for (j = 0; j < 8; j++) + states[i]->state[j] = mbctx.state[j][i]; + } + +done: + i = 0; + do { + struct sha256_reqctx *rctx = ahash_request_ctx(reqs[i]); + + rctx->next = sha256_mb_next(reqs[i], len, final); + + if (rctx->next) { + if (++i >= 8) + break; + continue; + } + + if (i < 7 && reqs[i + 1]) { + memmove(reqs + i, reqs + i + 1, sizeof(r2) * (7 - i)); + reqs[7] = NULL; + continue; + } + + reqs[i] = NULL; + + do { + while (!list_is_last(&r2->base.list, list)) { + r2 = list_next_entry(r2, base.list); + r2->base.err = 0; + + rctx = ahash_request_ctx(r2); + rctx->next = sha256_mb_start(r2, nodata, final); + if (rctx->next) { + reqs[i] = r2; + break; + } + } + } while (reqs[i] && ++i < 8); + + break; + } while (reqs[i]); + + return r2; +} + +static void sha256_update_x8(struct list_head *list, + struct ahash_request *reqs[8], int i, + bool nodata, bool final) +{ + struct ahash_request *r2 = reqs[i - 1]; + + do { + r2 = sha256_update_x8x1(list, r2, reqs, nodata, final); + } while (reqs[0]); +} + +static void sha256_chain(struct ahash_request *req, bool nodata, bool final) +{ + struct sha256_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + unsigned int ds = crypto_ahash_digestsize(tfm); + struct ahash_request *reqs[8] = {}; + struct ahash_request *r2; + int i; + + req->base.err = 0; + reqs[0] = req; + rctx->next = sha256_mb_start(req, nodata, final); + i = !!rctx->next; + list_for_each_entry(r2, &req->base.list, base.list) { + struct sha256_reqctx *r2ctx = ahash_request_ctx(r2); + + r2->base.err = 0; + + r2ctx = ahash_request_ctx(r2); + r2ctx->next = sha256_mb_start(r2, nodata, final); + if (!r2ctx->next) + continue; + + reqs[i++] = r2; + if (i >= 8) + break; + } + + if (i) + sha256_update_x8(&req->base.list, reqs, i, nodata, final); + + if (!final) + return; + + lib_sha256_base_finish(&rctx->state, req->result, ds); + list_for_each_entry(r2, &req->base.list, base.list) { + struct sha256_reqctx *r2ctx = ahash_request_ctx(r2); + + lib_sha256_base_finish(&r2ctx->state, r2->result, ds); + } +} + +static int sha256_avx2_update(struct ahash_request *req) +{ + struct ahash_request *r2; + int err; + + if (ahash_request_chained(req) && crypto_simd_usable()) { + sha256_chain(req, false, false); + return 0; + } + + err = sha256_ahash_update(req, sha256_transform_rorx); + if (!ahash_request_chained(req)) + return err; + + req->base.err = err; + + list_for_each_entry(r2, &req->base.list, base.list) { + err = sha256_ahash_update(r2, sha256_transform_rorx); + r2->base.err = err; + } + + return 0; +} + +static int _sha256_avx2_finup(struct ahash_request *req, bool nodata) +{ + struct ahash_request *r2; + int err; + + if (ahash_request_chained(req) && crypto_simd_usable()) { + sha256_chain(req, nodata, true); + return 0; + } + + err = sha256_ahash_finup(req, nodata, sha256_transform_rorx); + if (!ahash_request_chained(req)) + return err; + + req->base.err = err; + + list_for_each_entry(r2, &req->base.list, base.list) { + err = sha256_ahash_finup(r2, nodata, sha256_transform_rorx); + r2->base.err = err; + } + + return 0; +} + +static int sha256_avx2_finup(struct ahash_request *req) +{ + return _sha256_avx2_finup(req, false); +} + +static int sha256_avx2_final(struct ahash_request *req) +{ + return _sha256_avx2_finup(req, true); +} + +static int sha256_avx2_digest(struct ahash_request *req) +{ + return sha256_ahash_init(req) ?: + sha256_avx2_finup(req); +} + +static int sha224_avx2_digest(struct ahash_request *req) +{ + return sha224_ahash_init(req) ?: + sha256_avx2_finup(req); +} + +static struct ahash_alg sha256_avx2_algs[] = { { + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct sha256_state), + .reqsize = sizeof(struct sha256_reqctx), + .init = sha256_ahash_init, .update = sha256_avx2_update, .final = sha256_avx2_final, .finup = sha256_avx2_finup, .digest = sha256_avx2_digest, - .descsize = sizeof(struct sha256_state), - .base = { + .import = sha256_import, + .export = sha256_export, + .halg.base = { .cra_name = "sha256", .cra_driver_name = "sha256-avx2", .cra_priority = 170, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, + .cra_flags = CRYPTO_ALG_REQ_CHAIN, } }, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct sha256_state), + .reqsize = sizeof(struct sha256_reqctx), + .init = sha224_ahash_init, .update = sha256_avx2_update, .final = sha256_avx2_final, .finup = sha256_avx2_finup, - .descsize = sizeof(struct sha256_state), - .base = { + .digest = sha224_avx2_digest, + .import = sha256_import, + .export = sha256_export, + .halg.base = { .cra_name = "sha224", .cra_driver_name = "sha224-avx2", .cra_priority = 170, .cra_blocksize = SHA224_BLOCK_SIZE, .cra_module = THIS_MODULE, + .cra_flags = CRYPTO_ALG_REQ_CHAIN, } } }; @@ -317,7 +773,7 @@ static bool avx2_usable(void) static int register_sha256_avx2(void) { if (avx2_usable()) - return crypto_register_shashes(sha256_avx2_algs, + return crypto_register_ahashes(sha256_avx2_algs, ARRAY_SIZE(sha256_avx2_algs)); return 0; } @@ -325,7 +781,7 @@ static int register_sha256_avx2(void) static void unregister_sha256_avx2(void) { if (avx2_usable()) - crypto_unregister_shashes(sha256_avx2_algs, + crypto_unregister_ahashes(sha256_avx2_algs, ARRAY_SIZE(sha256_avx2_algs)); } diff --git a/arch/x86/crypto/sha256_x8_avx2.S b/arch/x86/crypto/sha256_x8_avx2.S new file mode 100644 index 000000000000..ce74f8963236 --- /dev/null +++ b/arch/x86/crypto/sha256_x8_avx2.S @@ -0,0 +1,598 @@ +/* + * Multi-buffer SHA256 algorithm hash compute routine + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "sha256_mb_mgr_datastruct.S" + +## code to compute oct SHA256 using SSE-256 +## outer calling routine takes care of save and restore of XMM registers +## Logic designed/laid out by JDG + +## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; %ymm0-15 +## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15 +## Linux preserves: rdi rbp r8 +## +## clobbers %ymm0-15 + +arg1 = %rdi +arg2 = %rsi +reg3 = %rcx +reg4 = %rdx + +# Common definitions +STATE = arg1 +INP_SIZE = arg2 + +IDX = %rax +ROUND = %rbx +TBL = reg3 + +inp0 = %r9 +inp1 = %r10 +inp2 = %r11 +inp3 = %r12 +inp4 = %r13 +inp5 = %r14 +inp6 = %r15 +inp7 = reg4 + +a = %ymm0 +b = %ymm1 +c = %ymm2 +d = %ymm3 +e = %ymm4 +f = %ymm5 +g = %ymm6 +h = %ymm7 + +T1 = %ymm8 + +a0 = %ymm12 +a1 = %ymm13 +a2 = %ymm14 +TMP = %ymm15 +TMP0 = %ymm6 +TMP1 = %ymm7 + +TT0 = %ymm8 +TT1 = %ymm9 +TT2 = %ymm10 +TT3 = %ymm11 +TT4 = %ymm12 +TT5 = %ymm13 +TT6 = %ymm14 +TT7 = %ymm15 + +# Define stack usage + +# Assume stack aligned to 32 bytes before call +# Therefore FRAMESZ mod 32 must be 32-8 = 24 + +#define FRAMESZ 0x388 + +#define VMOVPS vmovups + +# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 +# "transpose" data in {r0...r7} using temps {t0...t1} +# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7} +# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} +# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} +# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} +# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} +# r4 = {e7 e6 e5 e4 e3 e2 e1 e0} +# r5 = {f7 f6 f5 f4 f3 f2 f1 f0} +# r6 = {g7 g6 g5 g4 g3 g2 g1 g0} +# r7 = {h7 h6 h5 h4 h3 h2 h1 h0} +# +# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7} +# r0 = {h0 g0 f0 e0 d0 c0 b0 a0} +# r1 = {h1 g1 f1 e1 d1 c1 b1 a1} +# r2 = {h2 g2 f2 e2 d2 c2 b2 a2} +# r3 = {h3 g3 f3 e3 d3 c3 b3 a3} +# r4 = {h4 g4 f4 e4 d4 c4 b4 a4} +# r5 = {h5 g5 f5 e5 d5 c5 b5 a5} +# r6 = {h6 g6 f6 e6 d6 c6 b6 a6} +# r7 = {h7 g7 f7 e7 d7 c7 b7 a7} +# + +.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1 + # process top half (r0..r3) {a...d} + vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} + vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} + vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} + vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} + vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1} + vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2} + vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3} + vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0} + + # use r2 in place of t0 + # process bottom half (r4..r7) {e...h} + vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0} + vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2} + vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0} + vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2} + vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1} + vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2} + vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3} + vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0} + + vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6 + vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2 + vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5 + vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1 + vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7 + vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3 + vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4 + vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0 + +.endm + +.macro ROTATE_ARGS +TMP_ = h +h = g +g = f +f = e +e = d +d = c +c = b +b = a +a = TMP_ +.endm + +.macro _PRORD reg imm tmp + vpslld $(32-\imm),\reg,\tmp + vpsrld $\imm,\reg, \reg + vpor \tmp,\reg, \reg +.endm + +# PRORD_nd reg, imm, tmp, src +.macro _PRORD_nd reg imm tmp src + vpslld $(32-\imm), \src, \tmp + vpsrld $\imm, \src, \reg + vpor \tmp, \reg, \reg +.endm + +# PRORD dst/src, amt +.macro PRORD reg imm + _PRORD \reg,\imm,TMP +.endm + +# PRORD_nd dst, src, amt +.macro PRORD_nd reg tmp imm + _PRORD_nd \reg, \imm, TMP, \tmp +.endm + +# arguments passed implicitly in preprocessor symbols i, a...h +.macro ROUND_00_15 _T1 i + PRORD_nd a0,e,5 # sig1: a0 = (e >> 5) + + vpxor g, f, a2 # ch: a2 = f^g + vpand e,a2, a2 # ch: a2 = (f^g)&e + vpxor g, a2, a2 # a2 = ch + + PRORD_nd a1,e,25 # sig1: a1 = (e >> 25) + + vmovdqu \_T1,(SZ8*(\i & 0xf))(%rsp) + vpaddd (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K + vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5) + PRORD a0, 6 # sig1: a0 = (e >> 6) ^ (e >> 11) + vpaddd a2, h, h # h = h + ch + PRORD_nd a2,a,11 # sig0: a2 = (a >> 11) + vpaddd \_T1,h, h # h = h + ch + W + K + vpxor a1, a0, a0 # a0 = sigma1 + PRORD_nd a1,a,22 # sig0: a1 = (a >> 22) + vpxor c, a, \_T1 # maj: T1 = a^c + add $SZ8, ROUND # ROUND++ + vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b + vpaddd a0, h, h + vpaddd h, d, d + vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11) + PRORD a2,2 # sig0: a2 = (a >> 2) ^ (a >> 13) + vpxor a1, a2, a2 # a2 = sig0 + vpand c, a, a1 # maj: a1 = a&c + vpor \_T1, a1, a1 # a1 = maj + vpaddd a1, h, h # h = h + ch + W + K + maj + vpaddd a2, h, h # h = h + ch + W + K + maj + sigma0 + ROTATE_ARGS +.endm + +# arguments passed implicitly in preprocessor symbols i, a...h +.macro ROUND_16_XX _T1 i + vmovdqu (SZ8*((\i-15)&0xf))(%rsp), \_T1 + vmovdqu (SZ8*((\i-2)&0xf))(%rsp), a1 + vmovdqu \_T1, a0 + PRORD \_T1,11 + vmovdqu a1, a2 + PRORD a1,2 + vpxor a0, \_T1, \_T1 + PRORD \_T1, 7 + vpxor a2, a1, a1 + PRORD a1, 17 + vpsrld $3, a0, a0 + vpxor a0, \_T1, \_T1 + vpsrld $10, a2, a2 + vpxor a2, a1, a1 + vpaddd (SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1 + vpaddd (SZ8*((\i-7)&0xf))(%rsp), a1, a1 + vpaddd a1, \_T1, \_T1 + + ROUND_00_15 \_T1,\i +.endm + +# void sha256_x8_avx2(struct sha256_mbctx *ctx, int blocks); +# +# arg 1 : ctx : pointer to array of pointers to input data +# arg 2 : blocks : size of input in blocks + # save rsp, allocate 32-byte aligned for local variables +SYM_FUNC_START(sha256_x8_avx2) + # save callee-saved clobbered registers to comply with C function ABI + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + push %rbp + mov %rsp, %rbp + + sub $FRAMESZ, %rsp + and $~0x1F, %rsp + + # Load the pre-transposed incoming digest. + vmovdqu 0*SHA256_DIGEST_ROW_SIZE(STATE),a + vmovdqu 1*SHA256_DIGEST_ROW_SIZE(STATE),b + vmovdqu 2*SHA256_DIGEST_ROW_SIZE(STATE),c + vmovdqu 3*SHA256_DIGEST_ROW_SIZE(STATE),d + vmovdqu 4*SHA256_DIGEST_ROW_SIZE(STATE),e + vmovdqu 5*SHA256_DIGEST_ROW_SIZE(STATE),f + vmovdqu 6*SHA256_DIGEST_ROW_SIZE(STATE),g + vmovdqu 7*SHA256_DIGEST_ROW_SIZE(STATE),h + + lea K256_8(%rip),TBL + + # load the address of each of the 4 message lanes + # getting ready to transpose input onto stack + mov _args_data_ptr+0*PTR_SZ(STATE),inp0 + mov _args_data_ptr+1*PTR_SZ(STATE),inp1 + mov _args_data_ptr+2*PTR_SZ(STATE),inp2 + mov _args_data_ptr+3*PTR_SZ(STATE),inp3 + mov _args_data_ptr+4*PTR_SZ(STATE),inp4 + mov _args_data_ptr+5*PTR_SZ(STATE),inp5 + mov _args_data_ptr+6*PTR_SZ(STATE),inp6 + mov _args_data_ptr+7*PTR_SZ(STATE),inp7 + + xor IDX, IDX +lloop: + xor ROUND, ROUND + + # save old digest + vmovdqu a, _digest(%rsp) + vmovdqu b, _digest+1*SZ8(%rsp) + vmovdqu c, _digest+2*SZ8(%rsp) + vmovdqu d, _digest+3*SZ8(%rsp) + vmovdqu e, _digest+4*SZ8(%rsp) + vmovdqu f, _digest+5*SZ8(%rsp) + vmovdqu g, _digest+6*SZ8(%rsp) + vmovdqu h, _digest+7*SZ8(%rsp) + i = 0 +.rep 2 + VMOVPS i*32(inp0, IDX), TT0 + VMOVPS i*32(inp1, IDX), TT1 + VMOVPS i*32(inp2, IDX), TT2 + VMOVPS i*32(inp3, IDX), TT3 + VMOVPS i*32(inp4, IDX), TT4 + VMOVPS i*32(inp5, IDX), TT5 + VMOVPS i*32(inp6, IDX), TT6 + VMOVPS i*32(inp7, IDX), TT7 + vmovdqu g, _ytmp(%rsp) + vmovdqu h, _ytmp+1*SZ8(%rsp) + TRANSPOSE8 TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7, TMP0, TMP1 + vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1 + vmovdqu _ytmp(%rsp), g + vpshufb TMP1, TT0, TT0 + vpshufb TMP1, TT1, TT1 + vpshufb TMP1, TT2, TT2 + vpshufb TMP1, TT3, TT3 + vpshufb TMP1, TT4, TT4 + vpshufb TMP1, TT5, TT5 + vpshufb TMP1, TT6, TT6 + vpshufb TMP1, TT7, TT7 + vmovdqu _ytmp+1*SZ8(%rsp), h + vmovdqu TT4, _ytmp(%rsp) + vmovdqu TT5, _ytmp+1*SZ8(%rsp) + vmovdqu TT6, _ytmp+2*SZ8(%rsp) + vmovdqu TT7, _ytmp+3*SZ8(%rsp) + ROUND_00_15 TT0,(i*8+0) + vmovdqu _ytmp(%rsp), TT0 + ROUND_00_15 TT1,(i*8+1) + vmovdqu _ytmp+1*SZ8(%rsp), TT1 + ROUND_00_15 TT2,(i*8+2) + vmovdqu _ytmp+2*SZ8(%rsp), TT2 + ROUND_00_15 TT3,(i*8+3) + vmovdqu _ytmp+3*SZ8(%rsp), TT3 + ROUND_00_15 TT0,(i*8+4) + ROUND_00_15 TT1,(i*8+5) + ROUND_00_15 TT2,(i*8+6) + ROUND_00_15 TT3,(i*8+7) + i = (i+1) +.endr + add $64, IDX + i = (i*8) + + jmp Lrounds_16_xx +.align 16 +Lrounds_16_xx: +.rep 16 + ROUND_16_XX T1, i + i = (i+1) +.endr + + cmp $ROUNDS,ROUND + jb Lrounds_16_xx + + # add old digest + vpaddd _digest+0*SZ8(%rsp), a, a + vpaddd _digest+1*SZ8(%rsp), b, b + vpaddd _digest+2*SZ8(%rsp), c, c + vpaddd _digest+3*SZ8(%rsp), d, d + vpaddd _digest+4*SZ8(%rsp), e, e + vpaddd _digest+5*SZ8(%rsp), f, f + vpaddd _digest+6*SZ8(%rsp), g, g + vpaddd _digest+7*SZ8(%rsp), h, h + + sub $1, INP_SIZE # unit is blocks + jne lloop + + # write back to memory (state object) the transposed digest + vmovdqu a, 0*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu b, 1*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu c, 2*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu d, 3*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu e, 4*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu f, 5*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu g, 6*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu h, 7*SHA256_DIGEST_ROW_SIZE(STATE) + + # update input pointers + add IDX, inp0 + mov inp0, _args_data_ptr+0*8(STATE) + add IDX, inp1 + mov inp1, _args_data_ptr+1*8(STATE) + add IDX, inp2 + mov inp2, _args_data_ptr+2*8(STATE) + add IDX, inp3 + mov inp3, _args_data_ptr+3*8(STATE) + add IDX, inp4 + mov inp4, _args_data_ptr+4*8(STATE) + add IDX, inp5 + mov inp5, _args_data_ptr+5*8(STATE) + add IDX, inp6 + mov inp6, _args_data_ptr+6*8(STATE) + add IDX, inp7 + mov inp7, _args_data_ptr+7*8(STATE) + + # Postamble + mov %rbp, %rsp + pop %rbp + + # restore callee-saved clobbered registers + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + + RET +SYM_FUNC_END(sha256_x8_avx2) + +.section .rodata.K256_8, "a", @progbits +.align 64 +K256_8: + .octa 0x428a2f98428a2f98428a2f98428a2f98 + .octa 0x428a2f98428a2f98428a2f98428a2f98 + .octa 0x71374491713744917137449171374491 + .octa 0x71374491713744917137449171374491 + .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf + .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf + .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5 + .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5 + .octa 0x3956c25b3956c25b3956c25b3956c25b + .octa 0x3956c25b3956c25b3956c25b3956c25b + .octa 0x59f111f159f111f159f111f159f111f1 + .octa 0x59f111f159f111f159f111f159f111f1 + .octa 0x923f82a4923f82a4923f82a4923f82a4 + .octa 0x923f82a4923f82a4923f82a4923f82a4 + .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5 + .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5 + .octa 0xd807aa98d807aa98d807aa98d807aa98 + .octa 0xd807aa98d807aa98d807aa98d807aa98 + .octa 0x12835b0112835b0112835b0112835b01 + .octa 0x12835b0112835b0112835b0112835b01 + .octa 0x243185be243185be243185be243185be + .octa 0x243185be243185be243185be243185be + .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3 + .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3 + .octa 0x72be5d7472be5d7472be5d7472be5d74 + .octa 0x72be5d7472be5d7472be5d7472be5d74 + .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe + .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe + .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7 + .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7 + .octa 0xc19bf174c19bf174c19bf174c19bf174 + .octa 0xc19bf174c19bf174c19bf174c19bf174 + .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1 + .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1 + .octa 0xefbe4786efbe4786efbe4786efbe4786 + .octa 0xefbe4786efbe4786efbe4786efbe4786 + .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6 + .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6 + .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc + .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc + .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f + .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f + .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa + .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa + .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc + .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc + .octa 0x76f988da76f988da76f988da76f988da + .octa 0x76f988da76f988da76f988da76f988da + .octa 0x983e5152983e5152983e5152983e5152 + .octa 0x983e5152983e5152983e5152983e5152 + .octa 0xa831c66da831c66da831c66da831c66d + .octa 0xa831c66da831c66da831c66da831c66d + .octa 0xb00327c8b00327c8b00327c8b00327c8 + .octa 0xb00327c8b00327c8b00327c8b00327c8 + .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7 + .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7 + .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3 + .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3 + .octa 0xd5a79147d5a79147d5a79147d5a79147 + .octa 0xd5a79147d5a79147d5a79147d5a79147 + .octa 0x06ca635106ca635106ca635106ca6351 + .octa 0x06ca635106ca635106ca635106ca6351 + .octa 0x14292967142929671429296714292967 + .octa 0x14292967142929671429296714292967 + .octa 0x27b70a8527b70a8527b70a8527b70a85 + .octa 0x27b70a8527b70a8527b70a8527b70a85 + .octa 0x2e1b21382e1b21382e1b21382e1b2138 + .octa 0x2e1b21382e1b21382e1b21382e1b2138 + .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc + .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc + .octa 0x53380d1353380d1353380d1353380d13 + .octa 0x53380d1353380d1353380d1353380d13 + .octa 0x650a7354650a7354650a7354650a7354 + .octa 0x650a7354650a7354650a7354650a7354 + .octa 0x766a0abb766a0abb766a0abb766a0abb + .octa 0x766a0abb766a0abb766a0abb766a0abb + .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e + .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e + .octa 0x92722c8592722c8592722c8592722c85 + .octa 0x92722c8592722c8592722c8592722c85 + .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1 + .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1 + .octa 0xa81a664ba81a664ba81a664ba81a664b + .octa 0xa81a664ba81a664ba81a664ba81a664b + .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70 + .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70 + .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3 + .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3 + .octa 0xd192e819d192e819d192e819d192e819 + .octa 0xd192e819d192e819d192e819d192e819 + .octa 0xd6990624d6990624d6990624d6990624 + .octa 0xd6990624d6990624d6990624d6990624 + .octa 0xf40e3585f40e3585f40e3585f40e3585 + .octa 0xf40e3585f40e3585f40e3585f40e3585 + .octa 0x106aa070106aa070106aa070106aa070 + .octa 0x106aa070106aa070106aa070106aa070 + .octa 0x19a4c11619a4c11619a4c11619a4c116 + .octa 0x19a4c11619a4c11619a4c11619a4c116 + .octa 0x1e376c081e376c081e376c081e376c08 + .octa 0x1e376c081e376c081e376c081e376c08 + .octa 0x2748774c2748774c2748774c2748774c + .octa 0x2748774c2748774c2748774c2748774c + .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5 + .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5 + .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3 + .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3 + .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a + .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a + .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f + .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f + .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3 + .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3 + .octa 0x748f82ee748f82ee748f82ee748f82ee + .octa 0x748f82ee748f82ee748f82ee748f82ee + .octa 0x78a5636f78a5636f78a5636f78a5636f + .octa 0x78a5636f78a5636f78a5636f78a5636f + .octa 0x84c8781484c8781484c8781484c87814 + .octa 0x84c8781484c8781484c8781484c87814 + .octa 0x8cc702088cc702088cc702088cc70208 + .octa 0x8cc702088cc702088cc702088cc70208 + .octa 0x90befffa90befffa90befffa90befffa + .octa 0x90befffa90befffa90befffa90befffa + .octa 0xa4506ceba4506ceba4506ceba4506ceb + .octa 0xa4506ceba4506ceba4506ceba4506ceb + .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 + .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 + .octa 0xc67178f2c67178f2c67178f2c67178f2 + .octa 0xc67178f2c67178f2c67178f2c67178f2 + +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 +PSHUFFLE_BYTE_FLIP_MASK: +.octa 0x0c0d0e0f08090a0b0405060700010203 +.octa 0x0c0d0e0f08090a0b0405060700010203 + +.section .rodata.cst256.K256, "aM", @progbits, 256 +.align 64 +.global K256 +K256: + .int 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .int 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .int 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .int 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .int 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .int 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .int 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .int 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .int 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .int 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .int 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .int 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .int 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .int 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .int 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .int 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 From patchwork Sun Feb 16 03:07:31 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Herbert Xu X-Patchwork-Id: 13976324 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from abb.hmeau.com (abb.hmeau.com [144.6.53.87]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CB7BB10A1F for ; Sun, 16 Feb 2025 03:07:34 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=144.6.53.87 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675256; cv=none; b=ER5mXAo8XGCpQxUSIkkHK21Skp0gSD1VYaVt/wDSvo9I3FnQuGSvUDUNy05cVZ4mzYOmTMAJIeTUmsZr6mJaKH7I2xqaBiOWK5AF4oTvFaiSqjsLt8513frO9KPsiny+yMJcbLbNDoAykhJMstDmC0gZzwEb5AjGQBzVRNDPiRU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675256; c=relaxed/simple; bh=35yDBQpzpN6NuQDIyD47ciJFy71wiMY/6pgQ0NL/268=; h=Date:Message-Id:In-Reply-To:References:From:Subject:To:Cc; b=YU1r8YXNsvUD9pU3pIityiv83wHOOYmXQNoU2LmfI6uv99MTG3JZZGKrXF+vdYs/7KZQ3pBtrHBF+A25ZsqUjA8ff/eW3sVGYflyIcrh/bOPl+rHIU1LZc4dsCtkrw1r9v2sOot2XeEArdcNt/Bb+32Gdi5GfW7ETw5NAxjcVug= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au; spf=pass smtp.mailfrom=gondor.apana.org.au; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b=ZIF93BTV; arc=none smtp.client-ip=144.6.53.87 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b="ZIF93BTV" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=hmeau.com; s=formenos; h=Cc:To:Subject:From:References:In-Reply-To:Message-Id:Date: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help:List-Unsubscribe: List-Subscribe:List-Post:List-Owner:List-Archive; bh=/Gy8ojeyn/pSetWrgiMW7tNsrPDhJ+xUsPdhwiBTYgM=; b=ZIF93BTVB/yehw23wiBlQJ0SKE sX28cEjd4VVnom8X7mgS1YsYUOFY2ikCh3Lb1MdxP95UbUZSmsMxDswoKZ96gZw/Y+blGM2RKGyAJ ZGi8v7m4LuhC4NGp/RB4TCnLwyecUzJaWZduKUDU5Z/bFUBt4QH7ifjKkpWyx5Gdsgq80EqQRIT4G J6x+lYf6pu01I51xp33+HgWsEfX3eEPcHM+mYpmoQqn479jFDo4hpE78wmtfwbltTiQYkTZngsbpw fbqvMEr0s1lUs0+kKrTOo9CpU+AKwqKWPeSgDtwyOvti00usNugRHwigKTjBmllYZKs84UBU5GlEj GlZULOrw==; Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.96 #2 (Debian)) id 1tjUnK-000gZw-1z; Sun, 16 Feb 2025 11:07:32 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Sun, 16 Feb 2025 11:07:31 +0800 Date: Sun, 16 Feb 2025 11:07:31 +0800 Message-Id: In-Reply-To: References: From: Herbert Xu Subject: [v2 PATCH 09/11] crypto: hash - Add sync hash interface To: Linux Crypto Mailing List Cc: Eric Biggers , Ard Biesheuvel , Megha Dey , Tim Chen Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Introduce a new sync hash interface based on ahash, similar to sync skcipher. It will replace shash for existing users. Signed-off-by: Herbert Xu --- crypto/ahash.c | 37 ++++++++++++++++ include/crypto/hash.h | 100 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) diff --git a/crypto/ahash.c b/crypto/ahash.c index 6b19fa6fc628..fafce2e47a78 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -949,6 +949,27 @@ struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, u32 type, } EXPORT_SYMBOL_GPL(crypto_alloc_ahash); +struct crypto_sync_hash *crypto_alloc_sync_hash(const char *alg_name, + u32 type, u32 mask) +{ + struct crypto_ahash *tfm; + + /* Only sync algorithms allowed. */ + mask |= CRYPTO_ALG_ASYNC; + type &= ~CRYPTO_ALG_ASYNC; + + tfm = crypto_alloc_ahash(alg_name, type, mask); + + if (!IS_ERR(tfm) && WARN_ON(crypto_ahash_reqsize(tfm) > + MAX_SYNC_HASH_REQSIZE)) { + crypto_free_ahash(tfm); + return ERR_PTR(-EINVAL); + } + + return container_of(tfm, struct crypto_sync_hash, base); +} +EXPORT_SYMBOL_GPL(crypto_alloc_sync_hash); + int crypto_has_ahash(const char *alg_name, u32 type, u32 mask) { return crypto_type_has_alg(alg_name, &crypto_ahash_type, type, mask); @@ -1123,5 +1144,21 @@ void ahash_request_free(struct ahash_request *req) } EXPORT_SYMBOL_GPL(ahash_request_free); +int crypto_sync_hash_digest(struct crypto_sync_hash *tfm, const u8 *data, + unsigned int len, u8 *out) +{ + SYNC_HASH_REQUEST_ON_STACK(req, tfm); + int err; + + ahash_request_set_callback(req, 0, NULL, NULL); + ahash_request_set_virt(req, data, out, len); + err = crypto_ahash_digest(req); + + ahash_request_zero(req); + + return err; +} +EXPORT_SYMBOL_GPL(crypto_shash_tfm_digest); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Asynchronous cryptographic hash type"); diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 2aa83ee0ec98..f6e0c44331a3 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -8,6 +8,7 @@ #ifndef _CRYPTO_HASH_H #define _CRYPTO_HASH_H +#include #include #include #include @@ -162,6 +163,8 @@ struct shash_desc { void *__ctx[] __aligned(ARCH_SLAB_MINALIGN); }; +struct sync_hash_requests; + #define HASH_MAX_DIGESTSIZE 64 /* @@ -169,12 +172,30 @@ struct shash_desc { * containing a 'struct sha3_state'. */ #define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 360) +#define MAX_SYNC_HASH_REQSIZE HASH_MAX_DESCSIZE #define SHASH_DESC_ON_STACK(shash, ctx) \ char __##shash##_desc[sizeof(struct shash_desc) + HASH_MAX_DESCSIZE] \ __aligned(__alignof__(struct shash_desc)); \ struct shash_desc *shash = (struct shash_desc *)__##shash##_desc +#define SYNC_HASH_REQUEST_ON_STACK(name, _tfm) \ + char __##name##_req[sizeof(struct ahash_request) + \ + MAX_SYNC_HASH_REQSIZE \ + ] CRYPTO_MINALIGN_ATTR; \ + struct ahash_request *name = \ + (((struct ahash_request *)__##name##_req)->base.tfm = \ + crypto_sync_hash_tfm((_tfm)), \ + (void *)__##name##_req) + +#define SYNC_HASH_REQUESTS_ON_STACK(name, _n, _tfm) \ + char __##name##_req[(_n) * ALIGN(sizeof(struct ahash_request) + \ + MAX_SYNC_HASH_REQSIZE, \ + CRYPTO_MINALIGN) \ + ] CRYPTO_MINALIGN_ATTR; \ + struct sync_hash_requests *name = sync_hash_requests_on_stack_init( \ + __##name##_req, sizeof(__##name##_req), (_tfm)) + /** * struct shash_alg - synchronous message digest definition * @init: see struct ahash_alg @@ -241,6 +262,10 @@ struct crypto_shash { struct crypto_tfm base; }; +struct crypto_sync_hash { + struct crypto_ahash base; +}; + /** * DOC: Asynchronous Message Digest API * @@ -273,6 +298,9 @@ static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm) struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, u32 type, u32 mask); +struct crypto_sync_hash *crypto_alloc_sync_hash(const char *alg_name, + u32 type, u32 mask); + struct crypto_ahash *crypto_clone_ahash(struct crypto_ahash *tfm); static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm) @@ -280,6 +308,12 @@ static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm) return &tfm->base; } +static inline struct crypto_tfm *crypto_sync_hash_tfm( + struct crypto_sync_hash *tfm) +{ + return crypto_ahash_tfm(&tfm->base); +} + /** * crypto_free_ahash() - zeroize and free the ahash handle * @tfm: cipher handle to be freed @@ -291,6 +325,11 @@ static inline void crypto_free_ahash(struct crypto_ahash *tfm) crypto_destroy_tfm(tfm, crypto_ahash_tfm(tfm)); } +static inline void crypto_free_sync_hash(struct crypto_sync_hash *tfm) +{ + crypto_free_ahash(&tfm->base); +} + /** * crypto_has_ahash() - Search for the availability of an ahash. * @alg_name: is the cra_name / name or cra_driver_name / driver name of the @@ -313,6 +352,12 @@ static inline const char *crypto_ahash_driver_name(struct crypto_ahash *tfm) return crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm)); } +static inline const char *crypto_sync_hash_driver_name( + struct crypto_sync_hash *tfm) +{ + return crypto_ahash_driver_name(&tfm->base); +} + /** * crypto_ahash_blocksize() - obtain block size for cipher * @tfm: cipher handle @@ -327,6 +372,12 @@ static inline unsigned int crypto_ahash_blocksize(struct crypto_ahash *tfm) return crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); } +static inline unsigned int crypto_sync_hash_blocksize( + struct crypto_sync_hash *tfm) +{ + return crypto_ahash_blocksize(&tfm->base); +} + static inline struct hash_alg_common *__crypto_hash_alg_common( struct crypto_alg *alg) { @@ -354,6 +405,12 @@ static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm) return crypto_hash_alg_common(tfm)->digestsize; } +static inline unsigned int crypto_sync_hash_digestsize( + struct crypto_sync_hash *tfm) +{ + return crypto_ahash_digestsize(&tfm->base); +} + /** * crypto_ahash_statesize() - obtain size of the ahash state * @tfm: cipher handle @@ -369,6 +426,12 @@ static inline unsigned int crypto_ahash_statesize(struct crypto_ahash *tfm) return tfm->statesize; } +static inline unsigned int crypto_sync_hash_statesize( + struct crypto_sync_hash *tfm) +{ + return crypto_ahash_statesize(&tfm->base); +} + static inline u32 crypto_ahash_get_flags(struct crypto_ahash *tfm) { return crypto_tfm_get_flags(crypto_ahash_tfm(tfm)); @@ -877,6 +940,9 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data, int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data, unsigned int len, u8 *out); +int crypto_sync_hash_digest(struct crypto_sync_hash *tfm, const u8 *data, + unsigned int len, u8 *out); + /** * crypto_shash_export() - extract operational state for message digest * @desc: reference to the operational state handle whose state is exported @@ -982,6 +1048,13 @@ static inline void shash_desc_zero(struct shash_desc *desc) sizeof(*desc) + crypto_shash_descsize(desc->tfm)); } +static inline void ahash_request_zero(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + + memzero_explicit(req, sizeof(*req) + crypto_ahash_reqsize(tfm)); +} + static inline int ahash_request_err(struct ahash_request *req) { return req->base.err; @@ -992,4 +1065,31 @@ static inline bool ahash_is_async(struct crypto_ahash *tfm) return crypto_tfm_is_async(&tfm->base); } +static inline struct ahash_request *sync_hash_requests( + struct sync_hash_requests *reqs, int i) +{ + unsigned unit = sizeof(struct ahash_request) + MAX_SYNC_HASH_REQSIZE; + unsigned alunit = ALIGN(unit, CRYPTO_MINALIGN); + + return (void *)((char *)reqs + i * alunit); +} + +static inline struct sync_hash_requests *sync_hash_requests_on_stack_init( + char *buf, unsigned len, struct crypto_sync_hash *tfm) +{ + unsigned unit = sizeof(struct ahash_request) + MAX_SYNC_HASH_REQSIZE; + unsigned alunit = ALIGN(unit, CRYPTO_MINALIGN); + struct sync_hash_requests *reqs = (void *)buf; + int n = len / alunit; + int i; + + for (i = 0; i < n; i++) { + struct ahash_request *req = sync_hash_requests(reqs, i); + + req->base.tfm = crypto_sync_hash_tfm(tfm); + } + + return reqs; +} + #endif /* _CRYPTO_HASH_H */ From patchwork Sun Feb 16 03:07:33 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Herbert Xu X-Patchwork-Id: 13976326 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from abb.hmeau.com (abb.hmeau.com [144.6.53.87]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6613510A1F for ; Sun, 16 Feb 2025 03:07:37 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=144.6.53.87 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675259; cv=none; b=LPDpBuvy0DhvQ+iRdfRKtIFZZWLZexSrvWQIfwBxAhQwMLGMAsDLphMqlaTsBGL3W3pOg8nRsZSuu46FI3+Y/9hwbBi23vmlEVfhjgAWP5NZcC7OOPKylrG06oflAVb5daPgp6A/HQ7QpF5BO/TE6FWej93NRE3l/BZGYhgq6uQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675259; c=relaxed/simple; bh=3n7dW+Dn06EC4hrLV6q4tdY3D7DDC32kKDRs/inG9Dc=; h=Date:Message-Id:In-Reply-To:References:From:Subject:To:Cc; b=NiilKUWE6OhNv1VIpNMY5Xf1UaXUqmhCNIKwQLZo5ZfxM+fPZ9p8tqKA9wZvF/SoLvpeMbRji9Gjc9LS2pxLPQ9PC7VOu3CI7KrkU8kzez2uLsZjp/6aFPMecnPl61LdFPEh6K2FaRf//fF8gZaYbTvzwrkQvkIXrdcoAvcl69M= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au; spf=pass smtp.mailfrom=gondor.apana.org.au; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b=Z8OREJTD; arc=none smtp.client-ip=144.6.53.87 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b="Z8OREJTD" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=hmeau.com; s=formenos; h=Cc:To:Subject:From:References:In-Reply-To:Message-Id:Date: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help:List-Unsubscribe: List-Subscribe:List-Post:List-Owner:List-Archive; bh=AfJ3SA/bRPE6cedgO69rzRNOIPPNNWgDTd0omE6JgLc=; b=Z8OREJTDYVUfKTZNaS9n5vQkyz GCbtht/dIVEDcoOoHla0MGNzDET4NKthaT77gOnTrLbukhIQjgB8gj2NBsU7lERg72wWZBzA1Fu/A w/Zya0eJBzWNBNVhAMnCL4q73t2Yu1oRMtVXjAZDYRgcJ6ClBjTun/vpkgADIcVyJ08pk2YW1O9PB mGo7/s3/RWFuMzVmGMgcSRMgRCzql3ED/Eo1uzX4xuzrZydmfHizv15qS/MJfmOmvIrBasSMnUb51 +DVsErh3GSwT2drO6z/qrCm97lxwzJ9YxNi9w+qwtTBPjys/KfYBLH+zJiUZHmxC81Rc5oyN1slKm YBREdUvQ==; Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.96 #2 (Debian)) id 1tjUnM-000ga9-2r; Sun, 16 Feb 2025 11:07:34 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Sun, 16 Feb 2025 11:07:33 +0800 Date: Sun, 16 Feb 2025 11:07:33 +0800 Message-Id: <513768f4907245e15e5f12bb20bd50762c3cc25b.1739674648.git.herbert@gondor.apana.org.au> In-Reply-To: References: From: Herbert Xu Subject: [v2 PATCH 10/11] fsverity: Use sync hash instead of shash To: Linux Crypto Mailing List Cc: Eric Biggers , Ard Biesheuvel , Megha Dey , Tim Chen Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Use the sync hash interface instead of shash. Signed-off-by: Herbert Xu --- fs/verity/fsverity_private.h | 2 +- fs/verity/hash_algs.c | 41 +++++++++++++++++++----------------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index b3506f56e180..aecc221daf8b 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -20,7 +20,7 @@ /* A hash algorithm supported by fs-verity */ struct fsverity_hash_alg { - struct crypto_shash *tfm; /* hash tfm, allocated on demand */ + struct crypto_sync_hash *tfm; /* hash tfm, allocated on demand */ const char *name; /* crypto API name, e.g. sha256 */ unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */ unsigned int block_size; /* block size in bytes, e.g. 64 for SHA-256 */ diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c index 6b08b1d9a7d7..e088bcfe5ed1 100644 --- a/fs/verity/hash_algs.c +++ b/fs/verity/hash_algs.c @@ -43,7 +43,7 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, unsigned int num) { struct fsverity_hash_alg *alg; - struct crypto_shash *tfm; + struct crypto_sync_hash *tfm; int err; if (num >= ARRAY_SIZE(fsverity_hash_algs) || @@ -62,7 +62,7 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, if (alg->tfm != NULL) goto out_unlock; - tfm = crypto_alloc_shash(alg->name, 0, 0); + tfm = crypto_alloc_sync_hash(alg->name, 0, 0); if (IS_ERR(tfm)) { if (PTR_ERR(tfm) == -ENOENT) { fsverity_warn(inode, @@ -79,20 +79,20 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, } err = -EINVAL; - if (WARN_ON_ONCE(alg->digest_size != crypto_shash_digestsize(tfm))) + if (WARN_ON_ONCE(alg->digest_size != crypto_sync_hash_digestsize(tfm))) goto err_free_tfm; - if (WARN_ON_ONCE(alg->block_size != crypto_shash_blocksize(tfm))) + if (WARN_ON_ONCE(alg->block_size != crypto_sync_hash_blocksize(tfm))) goto err_free_tfm; pr_info("%s using implementation \"%s\"\n", - alg->name, crypto_shash_driver_name(tfm)); + alg->name, crypto_sync_hash_driver_name(tfm)); /* pairs with smp_load_acquire() above */ smp_store_release(&alg->tfm, tfm); goto out_unlock; err_free_tfm: - crypto_free_shash(tfm); + crypto_free_sync_hash(tfm); alg = ERR_PTR(err); out_unlock: mutex_unlock(&fsverity_hash_alg_init_mutex); @@ -112,17 +112,15 @@ const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, const u8 *salt, size_t salt_size) { u8 *hashstate = NULL; - SHASH_DESC_ON_STACK(desc, alg->tfm); + SYNC_HASH_REQUEST_ON_STACK(req, alg->tfm); u8 *padded_salt = NULL; size_t padded_salt_size; int err; - desc->tfm = alg->tfm; - if (salt_size == 0) return NULL; - hashstate = kmalloc(crypto_shash_statesize(alg->tfm), GFP_KERNEL); + hashstate = kmalloc(crypto_sync_hash_statesize(alg->tfm), GFP_KERNEL); if (!hashstate) return ERR_PTR(-ENOMEM); @@ -140,15 +138,19 @@ const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, goto err_free; } memcpy(padded_salt, salt, salt_size); - err = crypto_shash_init(desc); + + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); + + err = crypto_ahash_init(req); if (err) goto err_free; - err = crypto_shash_update(desc, padded_salt, padded_salt_size); + ahash_request_set_virt(req, padded_salt, NULL, padded_salt_size); + err = crypto_ahash_update(req); if (err) goto err_free; - err = crypto_shash_export(desc, hashstate); + err = crypto_ahash_export(req, hashstate); if (err) goto err_free; out: @@ -176,21 +178,22 @@ const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, int fsverity_hash_block(const struct merkle_tree_params *params, const struct inode *inode, const void *data, u8 *out) { - SHASH_DESC_ON_STACK(desc, params->hash_alg->tfm); + SYNC_HASH_REQUEST_ON_STACK(req, params->hash_alg->tfm); int err; - desc->tfm = params->hash_alg->tfm; + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); + ahash_request_set_virt(req, data, out, params->block_size); if (params->hashstate) { - err = crypto_shash_import(desc, params->hashstate); + err = crypto_ahash_import(req, params->hashstate); if (err) { fsverity_err(inode, "Error %d importing hash state", err); return err; } - err = crypto_shash_finup(desc, data, params->block_size, out); + err = crypto_ahash_finup(req); } else { - err = crypto_shash_digest(desc, data, params->block_size, out); + err = crypto_ahash_digest(req); } if (err) fsverity_err(inode, "Error %d computing block hash", err); @@ -209,7 +212,7 @@ int fsverity_hash_block(const struct merkle_tree_params *params, int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, const void *data, size_t size, u8 *out) { - return crypto_shash_tfm_digest(alg->tfm, data, size, out); + return crypto_sync_hash_digest(alg->tfm, data, size, out); } void __init fsverity_check_hash_algs(void) From patchwork Sun Feb 16 03:07:35 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 13976327 X-Patchwork-Delegate: herbert@gondor.apana.org.au Received: from abb.hmeau.com (abb.hmeau.com [144.6.53.87]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8711018027 for ; Sun, 16 Feb 2025 03:07:39 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=144.6.53.87 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675261; cv=none; b=DYDC7hcmhPncEcRb/WNNHaNFcWBN1ZfYopQi6E32yfjWSiHgb+Y7xcn2I6iCrAD8hhoSlRoi1Fr1LXpnsubNid4VEwdCxRCJKbVpr/Bw/YRifFdDbVO82KtkY3mkCt+5Xa4DH9SpXfdhg/Rvw/W8wmI8fBhOkjtDIO+SZyOMJw0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1739675261; c=relaxed/simple; bh=Rjy/TlZfCvvivByd7aDUZlNoawUqdRePD1fCzBywmIQ=; h=Date:Message-Id:In-Reply-To:References:From:Subject:To:Cc; b=Ox3mOz0dWK07x8q9kdBivBeVXdBkQOB519xkb2rgPgoML9ZCGT1TwiyO3qrcwq5ChLSe6yGv45uox1lAMBgCFBhQ7m3+sDp9eh1iNCUDI5HKr+4VzsBQsRxH4QlB0h3Ezq57dFh18YgBs3Ng3/Z4po2oh3W35txHEtr+xleG//4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=gondor.apana.org.au; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b=aEcSeJwf; arc=none smtp.client-ip=144.6.53.87 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gondor.apana.org.au Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=hmeau.com header.i=@hmeau.com header.b="aEcSeJwf" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=hmeau.com; s=formenos; h=Cc:To:Subject:From:References:In-Reply-To:Message-Id:Date: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help:List-Unsubscribe: List-Subscribe:List-Post:List-Owner:List-Archive; bh=rvcAqMKpJHp+/RooV/oWQbv8bhWfOMn+9E9F8T+I0nE=; b=aEcSeJwfh7+ksuP0nyge7+J6VP RxOj9wYlmn4763vRR9EnAblPQJKeUMYH9+g9uDZ5v2KfwkVrFG8ifdYhxoIPiLQRxYonJO7XqKGN3 5XxTlVVcs7sZn8yywQB6X1yLhJRciOcu7jtC7PL16+r3G84hWMO8EsiwuQMihJeht56xekaDUWnoM qJCxV3AjBQFT/rQp75y/jbNRROfLf77uATESTb5IiYUuZCPR1LvBhxLnzk8uriOdKJ01sy2b1xITx FarBhmw1ZWm7FQbEgYflK0fhAuOqaj9x1eSCgHdRJNruRha7Vz6Btpkqocedx2UjtNo5TJ/N8TjcC Xwww7qbw==; Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.96 #2 (Debian)) id 1tjUnP-000gaN-0b; Sun, 16 Feb 2025 11:07:36 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Sun, 16 Feb 2025 11:07:35 +0800 Date: Sun, 16 Feb 2025 11:07:35 +0800 Message-Id: In-Reply-To: References: From: Eric Biggers Subject: [v2 PATCH 11/11] fsverity: improve performance by using multibuffer hashing To: Linux Crypto Mailing List Cc: Eric Biggers , Ard Biesheuvel , Megha Dey , Tim Chen Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: When supported by the hash algorithm, use crypto_shash_finup_mb() to interleave the hashing of pairs of data blocks. On some CPUs this nearly doubles hashing performance. The increase in overall throughput of cold-cache fsverity reads that I'm seeing on arm64 and x86_64 is roughly 35% (though this metric is hard to measure as it jumps around a lot). For now this is only done on the verification path, and only for data blocks, not Merkle tree blocks. We could use finup_mb on Merkle tree blocks too, but that is less important as there aren't as many Merkle tree blocks as data blocks, and that would require some additional code restructuring. We could also use finup_mb to accelerate building the Merkle tree, but verification performance is more important. Reviewed-by: Sami Tolvanen Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- fs/verity/fsverity_private.h | 2 + fs/verity/verify.c | 179 +++++++++++++++++++++++++++++------ 2 files changed, 151 insertions(+), 30 deletions(-) diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index aecc221daf8b..3d03fb1e41f0 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -152,6 +152,8 @@ static inline void fsverity_init_signature(void) /* verify.c */ +#define FS_VERITY_MAX_PENDING_DATA_BLOCKS 2 + void __init fsverity_init_workqueue(void); #endif /* _FSVERITY_PRIVATE_H */ diff --git a/fs/verity/verify.c b/fs/verity/verify.c index 4fcad0825a12..15bf0887a827 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -10,6 +10,27 @@ #include #include +struct fsverity_pending_block { + const void *data; + u64 pos; + u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE]; +}; + +struct fsverity_verification_context { + struct inode *inode; + struct fsverity_info *vi; + unsigned long max_ra_pages; + + /* + * This is the queue of data blocks that are pending verification. We + * allow multiple blocks to be queued up in order to support multibuffer + * hashing, i.e. interleaving the hashing of multiple messages. On many + * CPUs this improves performance significantly. + */ + int num_pending; + struct fsverity_pending_block pending_blocks[FS_VERITY_MAX_PENDING_DATA_BLOCKS]; +}; + static struct workqueue_struct *fsverity_read_workqueue; /* @@ -79,7 +100,7 @@ static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage, } /* - * Verify a single data block against the file's Merkle tree. + * Verify the hash of a single data block against the file's Merkle tree. * * In principle, we need to verify the entire path to the root node. However, * for efficiency the filesystem may cache the hash blocks. Therefore we need @@ -90,8 +111,10 @@ static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage, */ static bool verify_data_block(struct inode *inode, struct fsverity_info *vi, - const void *data, u64 data_pos, unsigned long max_ra_pages) + const struct fsverity_pending_block *dblock, + unsigned long max_ra_pages) { + const u64 data_pos = dblock->pos; const struct merkle_tree_params *params = &vi->tree_params; const unsigned int hsize = params->digest_size; int level; @@ -115,8 +138,12 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi, */ u64 hidx = data_pos >> params->log_blocksize; - /* Up to 1 + FS_VERITY_MAX_LEVELS pages may be mapped at once */ - BUILD_BUG_ON(1 + FS_VERITY_MAX_LEVELS > KM_MAX_IDX); + /* + * Up to FS_VERITY_MAX_PENDING_DATA_BLOCKS + FS_VERITY_MAX_LEVELS pages + * may be mapped at once. + */ + BUILD_BUG_ON(FS_VERITY_MAX_PENDING_DATA_BLOCKS + + FS_VERITY_MAX_LEVELS > KM_MAX_IDX); if (unlikely(data_pos >= inode->i_size)) { /* @@ -127,7 +154,7 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi, * any part past EOF should be all zeroes. Therefore, we need * to verify that any data blocks fully past EOF are all zeroes. */ - if (memchr_inv(data, 0, params->block_size)) { + if (memchr_inv(dblock->data, 0, params->block_size)) { fsverity_err(inode, "FILE CORRUPTED! Data past EOF is not zeroed"); return false; @@ -221,10 +248,8 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi, put_page(hpage); } - /* Finally, verify the data block. */ - if (fsverity_hash_block(params, inode, data, real_hash) != 0) - goto error; - if (memcmp(want_hash, real_hash, hsize) != 0) + /* Finally, verify the hash of the data block. */ + if (memcmp(want_hash, dblock->real_hash, hsize) != 0) goto corrupted; return true; @@ -233,7 +258,8 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi, "FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN", data_pos, level - 1, params->hash_alg->name, hsize, want_hash, - params->hash_alg->name, hsize, real_hash); + params->hash_alg->name, hsize, + level == 0 ? dblock->real_hash : real_hash); error: for (; level > 0; level--) { kunmap_local(hblocks[level - 1].addr); @@ -242,13 +268,91 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi, return false; } -static bool -verify_data_blocks(struct folio *data_folio, size_t len, size_t offset, - unsigned long max_ra_pages) +static void +fsverity_init_verification_context(struct fsverity_verification_context *ctx, + struct inode *inode, + unsigned long max_ra_pages) { - struct inode *inode = data_folio->mapping->host; - struct fsverity_info *vi = inode->i_verity_info; - const unsigned int block_size = vi->tree_params.block_size; + ctx->inode = inode; + ctx->vi = inode->i_verity_info; + ctx->max_ra_pages = max_ra_pages; + ctx->num_pending = 0; +} + +static void +fsverity_clear_pending_blocks(struct fsverity_verification_context *ctx) +{ + int i; + + for (i = ctx->num_pending - 1; i >= 0; i--) { + kunmap_local(ctx->pending_blocks[i].data); + ctx->pending_blocks[i].data = NULL; + } + ctx->num_pending = 0; +} + +static bool +fsverity_verify_pending_blocks(struct fsverity_verification_context *ctx) +{ + struct inode *inode = ctx->inode; + struct fsverity_info *vi = ctx->vi; + const struct merkle_tree_params *params = &vi->tree_params; + SYNC_HASH_REQUESTS_ON_STACK(reqs, FS_VERITY_MAX_PENDING_DATA_BLOCKS, params->hash_alg->tfm); + struct ahash_request *req; + int i; + int err; + + if (ctx->num_pending == 0) + return true; + + req = sync_hash_requests(reqs, 0); + for (i = 0; i < ctx->num_pending; i++) { + struct ahash_request *reqi = sync_hash_requests(reqs, i); + + ahash_request_set_callback(reqi, CRYPTO_TFM_REQ_MAY_SLEEP, + NULL, NULL); + ahash_request_set_virt(reqi, ctx->pending_blocks[i].data, + ctx->pending_blocks[i].real_hash, + params->block_size); + if (i) + ahash_request_chain(reqi, req); + if (!params->hashstate) + continue; + + err = crypto_ahash_import(reqi, params->hashstate); + if (err) { + fsverity_err(inode, "Error %d importing hash state", err); + return false; + } + } + + if (params->hashstate) + err = crypto_ahash_finup(req); + else + err = crypto_ahash_digest(req); + if (err) { + fsverity_err(inode, "Error %d computing block hashes", err); + return false; + } + + for (i = 0; i < ctx->num_pending; i++) { + if (!verify_data_block(inode, vi, &ctx->pending_blocks[i], + ctx->max_ra_pages)) + return false; + } + + fsverity_clear_pending_blocks(ctx); + return true; +} + +static bool +fsverity_add_data_blocks(struct fsverity_verification_context *ctx, + struct folio *data_folio, size_t len, size_t offset) +{ + struct fsverity_info *vi = ctx->vi; + const struct merkle_tree_params *params = &vi->tree_params; + const unsigned int block_size = params->block_size; + const int mb_max_msgs = FS_VERITY_MAX_PENDING_DATA_BLOCKS; u64 pos = (u64)data_folio->index << PAGE_SHIFT; if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offset, block_size))) @@ -257,14 +361,11 @@ verify_data_blocks(struct folio *data_folio, size_t len, size_t offset, folio_test_uptodate(data_folio))) return false; do { - void *data; - bool valid; - - data = kmap_local_folio(data_folio, offset); - valid = verify_data_block(inode, vi, data, pos + offset, - max_ra_pages); - kunmap_local(data); - if (!valid) + ctx->pending_blocks[ctx->num_pending].data = + kmap_local_folio(data_folio, offset); + ctx->pending_blocks[ctx->num_pending].pos = pos + offset; + if (++ctx->num_pending == mb_max_msgs && + !fsverity_verify_pending_blocks(ctx)) return false; offset += block_size; len -= block_size; @@ -286,7 +387,15 @@ verify_data_blocks(struct folio *data_folio, size_t len, size_t offset, */ bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset) { - return verify_data_blocks(folio, len, offset, 0); + struct fsverity_verification_context ctx; + + fsverity_init_verification_context(&ctx, folio->mapping->host, 0); + + if (fsverity_add_data_blocks(&ctx, folio, len, offset) && + fsverity_verify_pending_blocks(&ctx)) + return true; + fsverity_clear_pending_blocks(&ctx); + return false; } EXPORT_SYMBOL_GPL(fsverity_verify_blocks); @@ -307,6 +416,8 @@ EXPORT_SYMBOL_GPL(fsverity_verify_blocks); */ void fsverity_verify_bio(struct bio *bio) { + struct inode *inode = bio_first_folio_all(bio)->mapping->host; + struct fsverity_verification_context ctx; struct folio_iter fi; unsigned long max_ra_pages = 0; @@ -323,13 +434,21 @@ void fsverity_verify_bio(struct bio *bio) max_ra_pages = bio->bi_iter.bi_size >> (PAGE_SHIFT + 2); } + fsverity_init_verification_context(&ctx, inode, max_ra_pages); + bio_for_each_folio_all(fi, bio) { - if (!verify_data_blocks(fi.folio, fi.length, fi.offset, - max_ra_pages)) { - bio->bi_status = BLK_STS_IOERR; - break; - } + if (!fsverity_add_data_blocks(&ctx, fi.folio, fi.length, + fi.offset)) + goto ioerr; } + + if (!fsverity_verify_pending_blocks(&ctx)) + goto ioerr; + return; + +ioerr: + fsverity_clear_pending_blocks(&ctx); + bio->bi_status = BLK_STS_IOERR; } EXPORT_SYMBOL_GPL(fsverity_verify_bio); #endif /* CONFIG_BLOCK */