@@ -115,18 +115,11 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
return srclen;
}
-static int poly1305_simd_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
+static void poly1305_simd_do_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
unsigned int bytes;
- /* kernel_fpu_begin/end is costly, use fallback for small updates */
- if (srclen <= 288 || !crypto_simd_usable())
- return crypto_poly1305_update(desc, src, srclen);
-
- kernel_fpu_begin();
-
if (unlikely(dctx->buflen)) {
bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
memcpy(dctx->buf + dctx->buflen, src, bytes);
@@ -147,12 +140,50 @@ static int poly1305_simd_update(struct shash_desc *desc,
srclen = bytes;
}
- kernel_fpu_end();
-
if (unlikely(srclen)) {
dctx->buflen = srclen;
memcpy(dctx->buf, src, srclen);
}
+}
+
+static int poly1305_simd_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ /* kernel_fpu_begin/end is costly, use fallback for small updates */
+ if (srclen <= 288 || !crypto_simd_usable())
+ return crypto_poly1305_update(desc, src, srclen);
+
+ kernel_fpu_begin();
+ poly1305_simd_do_update(desc, src, srclen);
+ kernel_fpu_end();
+
+ return 0;
+}
+
+static int poly1305_simd_update_from_sg(struct shash_desc *desc,
+ struct scatterlist *sg,
+ unsigned int srclen,
+ int flags)
+{
+ bool do_simd = crypto_simd_usable() && srclen > 288;
+ struct crypto_hash_walk walk;
+ int nbytes;
+
+ if (do_simd) {
+ kernel_fpu_begin();
+ flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ }
+
+ for (nbytes = crypto_shash_walk_sg(desc, sg, srclen, &walk, flags);
+ nbytes > 0;
+ nbytes = crypto_hash_walk_done(&walk, 0)) {
+ if (do_simd)
+ poly1305_simd_do_update(desc, walk.data, nbytes);
+ else
+ crypto_poly1305_update(desc, walk.data, nbytes);
+ }
+ if (do_simd)
+ kernel_fpu_end();
return 0;
}
@@ -161,6 +192,7 @@ static struct shash_alg alg = {
.digestsize = POLY1305_DIGEST_SIZE,
.init = poly1305_simd_init,
.update = poly1305_simd_update,
+ .update_from_sg = poly1305_simd_update_from_sg,
.final = crypto_poly1305_final,
.descsize = sizeof(struct poly1305_simd_desc_ctx),
.base = {
In order to reduce the number of invocations of the RFC7539 template into the Poly1305 driver, implement the new internal .update_from_sg method that allows the driver to amortize the cost of FPU preserve/ restore sequences over a larger chunk of input. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> --- arch/x86/crypto/poly1305_glue.c | 54 ++++++++++++++++---- 1 file changed, 43 insertions(+), 11 deletions(-)