diff mbox series

[v3,12/31] crypto: arm64/chacha - Add support for chaining

Message ID E1k0JtB-0006Np-A3@fornost.hmeau.com (mailing list archive)
State Changes Requested
Delegated to: Herbert Xu
Headers show
Series crypto: skcipher - Add support for no chaining and partial chaining | expand

Commit Message

Herbert Xu July 28, 2020, 7:19 a.m. UTC
As it stands chacha cannot do chaining.  That is, it has to handle
each request as a whole.  This patch adds support for chaining when
the CRYPTO_TFM_REQ_MORE flag is set.
    
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 arch/arm64/crypto/chacha-neon-glue.c |   43 ++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 15 deletions(-)

Comments

Ard Biesheuvel July 29, 2020, 6:16 a.m. UTC | #1
On Tue, 28 Jul 2020 at 10:19, Herbert Xu <herbert@gondor.apana.org.au> wrote:
>
> As it stands chacha cannot do chaining.  That is, it has to handle
> each request as a whole.  This patch adds support for chaining when
> the CRYPTO_TFM_REQ_MORE flag is set.
>
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Only state[12] needs to be preserved, since it contains the block
counter. Everything else in the state can be derived from the IV.

So by doing the init unconditionally, and overriding state[12] to the
captured value (if it exists), we can get rid of the redundant copy of
state, which also avoids inconsistencies if IV and state are out of
sync.

> ---
>
>  arch/arm64/crypto/chacha-neon-glue.c |   43 ++++++++++++++++++++++-------------
>  1 file changed, 28 insertions(+), 15 deletions(-)
>
> diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
> index af2bbca38e70f..d82c574ddcc00 100644
> --- a/arch/arm64/crypto/chacha-neon-glue.c
> +++ b/arch/arm64/crypto/chacha-neon-glue.c
> @@ -19,10 +19,8 @@
>   * (at your option) any later version.
>   */
>
> -#include <crypto/algapi.h>
>  #include <crypto/internal/chacha.h>
>  #include <crypto/internal/simd.h>
> -#include <crypto/internal/skcipher.h>
>  #include <linux/jump_label.h>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
> @@ -101,16 +99,16 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
>  }
>  EXPORT_SYMBOL(chacha_crypt_arch);
>
> -static int chacha_neon_stream_xor(struct skcipher_request *req,
> -                                 const struct chacha_ctx *ctx, const u8 *iv)
> +static int chacha_neon_stream_xor(struct skcipher_request *req, int nrounds)
>  {
> +       struct chacha_reqctx *rctx = skcipher_request_ctx(req);
>         struct skcipher_walk walk;
> -       u32 state[16];
> +       u32 *state = rctx->state;
>         int err;
>
> -       err = skcipher_walk_virt(&walk, req, false);
> +       rctx->init = req->base.flags & CRYPTO_TFM_REQ_MORE;
>
> -       chacha_init_generic(state, ctx->key, iv);
> +       err = skcipher_walk_virt(&walk, req, false);
>
>         while (walk.nbytes > 0) {
>                 unsigned int nbytes = walk.nbytes;
> @@ -122,11 +120,11 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
>                     !crypto_simd_usable()) {
>                         chacha_crypt_generic(state, walk.dst.virt.addr,
>                                              walk.src.virt.addr, nbytes,
> -                                            ctx->nrounds);
> +                                            nrounds);
>                 } else {
>                         kernel_neon_begin();
>                         chacha_doneon(state, walk.dst.virt.addr,
> -                                     walk.src.virt.addr, nbytes, ctx->nrounds);
> +                                     walk.src.virt.addr, nbytes, nrounds);
>                         kernel_neon_end();
>                 }
>                 err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
> @@ -138,26 +136,38 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
>  static int chacha_neon(struct skcipher_request *req)
>  {
>         struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> +       struct chacha_reqctx *rctx = skcipher_request_ctx(req);
>         struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
>
> -       return chacha_neon_stream_xor(req, ctx, req->iv);
> +       if (!rctx->init)
> +               chacha_init_generic(rctx->state, ctx->key, req->iv);
> +
> +       return chacha_neon_stream_xor(req, ctx->nrounds);
>  }
>
>  static int xchacha_neon(struct skcipher_request *req)
>  {
>         struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> +       struct chacha_reqctx *rctx = skcipher_request_ctx(req);
>         struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
> -       struct chacha_ctx subctx;
> -       u32 state[16];
> +       int nrounds = ctx->nrounds;
> +       u32 *state = rctx->state;
>         u8 real_iv[16];
> +       u32 key[8];
> +
> +       if (rctx->init)
> +               goto skip_init;
>
>         chacha_init_generic(state, ctx->key, req->iv);
> -       hchacha_block_arch(state, subctx.key, ctx->nrounds);
> -       subctx.nrounds = ctx->nrounds;
> +       hchacha_block_arch(state, key, nrounds);
>
>         memcpy(&real_iv[0], req->iv + 24, 8);
>         memcpy(&real_iv[8], req->iv + 16, 8);
> -       return chacha_neon_stream_xor(req, &subctx, real_iv);
> +
> +       chacha_init_generic(state, key, real_iv);
> +
> +skip_init:
> +       return chacha_neon_stream_xor(req, nrounds);
>  }
>
>  static struct skcipher_alg algs[] = {
> @@ -174,6 +184,7 @@ static struct skcipher_alg algs[] = {
>                 .ivsize                 = CHACHA_IV_SIZE,
>                 .chunksize              = CHACHA_BLOCK_SIZE,
>                 .walksize               = 5 * CHACHA_BLOCK_SIZE,
> +               .reqsize                = sizeof(struct chacha_reqctx),
>                 .setkey                 = chacha20_setkey,
>                 .encrypt                = chacha_neon,
>                 .decrypt                = chacha_neon,
> @@ -190,6 +201,7 @@ static struct skcipher_alg algs[] = {
>                 .ivsize                 = XCHACHA_IV_SIZE,
>                 .chunksize              = CHACHA_BLOCK_SIZE,
>                 .walksize               = 5 * CHACHA_BLOCK_SIZE,
> +               .reqsize                = sizeof(struct chacha_reqctx),
>                 .setkey                 = chacha20_setkey,
>                 .encrypt                = xchacha_neon,
>                 .decrypt                = xchacha_neon,
> @@ -206,6 +218,7 @@ static struct skcipher_alg algs[] = {
>                 .ivsize                 = XCHACHA_IV_SIZE,
>                 .chunksize              = CHACHA_BLOCK_SIZE,
>                 .walksize               = 5 * CHACHA_BLOCK_SIZE,
> +               .reqsize                = sizeof(struct chacha_reqctx),
>                 .setkey                 = chacha12_setkey,
>                 .encrypt                = xchacha_neon,
>                 .decrypt                = xchacha_neon,
Herbert Xu July 29, 2020, 6:28 a.m. UTC | #2
On Wed, Jul 29, 2020 at 09:16:55AM +0300, Ard Biesheuvel wrote:
>
> Only state[12] needs to be preserved, since it contains the block
> counter. Everything else in the state can be derived from the IV.
> 
> So by doing the init unconditionally, and overriding state[12] to the
> captured value (if it exists), we can get rid of the redundant copy of
> state, which also avoids inconsistencies if IV and state are out of
> sync.

Good point.  In fact we could try to put the counter back into
the IV just like CTR.  Let me have a play with this to see what
it would look like.

Thanks,
diff mbox series

Patch

diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index af2bbca38e70f..d82c574ddcc00 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -19,10 +19,8 @@ 
  * (at your option) any later version.
  */
 
-#include <crypto/algapi.h>
 #include <crypto/internal/chacha.h>
 #include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
 #include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -101,16 +99,16 @@  void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
 }
 EXPORT_SYMBOL(chacha_crypt_arch);
 
-static int chacha_neon_stream_xor(struct skcipher_request *req,
-				  const struct chacha_ctx *ctx, const u8 *iv)
+static int chacha_neon_stream_xor(struct skcipher_request *req, int nrounds)
 {
+	struct chacha_reqctx *rctx = skcipher_request_ctx(req);
 	struct skcipher_walk walk;
-	u32 state[16];
+	u32 *state = rctx->state;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, false);
+	rctx->init = req->base.flags & CRYPTO_TFM_REQ_MORE;
 
-	chacha_init_generic(state, ctx->key, iv);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while (walk.nbytes > 0) {
 		unsigned int nbytes = walk.nbytes;
@@ -122,11 +120,11 @@  static int chacha_neon_stream_xor(struct skcipher_request *req,
 		    !crypto_simd_usable()) {
 			chacha_crypt_generic(state, walk.dst.virt.addr,
 					     walk.src.virt.addr, nbytes,
-					     ctx->nrounds);
+					     nrounds);
 		} else {
 			kernel_neon_begin();
 			chacha_doneon(state, walk.dst.virt.addr,
-				      walk.src.virt.addr, nbytes, ctx->nrounds);
+				      walk.src.virt.addr, nbytes, nrounds);
 			kernel_neon_end();
 		}
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
@@ -138,26 +136,38 @@  static int chacha_neon_stream_xor(struct skcipher_request *req,
 static int chacha_neon(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_reqctx *rctx = skcipher_request_ctx(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return chacha_neon_stream_xor(req, ctx, req->iv);
+	if (!rctx->init)
+		chacha_init_generic(rctx->state, ctx->key, req->iv);
+
+	return chacha_neon_stream_xor(req, ctx->nrounds);
 }
 
 static int xchacha_neon(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_reqctx *rctx = skcipher_request_ctx(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct chacha_ctx subctx;
-	u32 state[16];
+	int nrounds = ctx->nrounds;
+	u32 *state = rctx->state;
 	u8 real_iv[16];
+	u32 key[8];
+
+	if (rctx->init)
+		goto skip_init;
 
 	chacha_init_generic(state, ctx->key, req->iv);
-	hchacha_block_arch(state, subctx.key, ctx->nrounds);
-	subctx.nrounds = ctx->nrounds;
+	hchacha_block_arch(state, key, nrounds);
 
 	memcpy(&real_iv[0], req->iv + 24, 8);
 	memcpy(&real_iv[8], req->iv + 16, 8);
-	return chacha_neon_stream_xor(req, &subctx, real_iv);
+
+	chacha_init_generic(state, key, real_iv);
+
+skip_init:
+	return chacha_neon_stream_xor(req, nrounds);
 }
 
 static struct skcipher_alg algs[] = {
@@ -174,6 +184,7 @@  static struct skcipher_alg algs[] = {
 		.ivsize			= CHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
+		.reqsize		= sizeof(struct chacha_reqctx),
 		.setkey			= chacha20_setkey,
 		.encrypt		= chacha_neon,
 		.decrypt		= chacha_neon,
@@ -190,6 +201,7 @@  static struct skcipher_alg algs[] = {
 		.ivsize			= XCHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
+		.reqsize		= sizeof(struct chacha_reqctx),
 		.setkey			= chacha20_setkey,
 		.encrypt		= xchacha_neon,
 		.decrypt		= xchacha_neon,
@@ -206,6 +218,7 @@  static struct skcipher_alg algs[] = {
 		.ivsize			= XCHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
+		.reqsize		= sizeof(struct chacha_reqctx),
 		.setkey			= chacha12_setkey,
 		.encrypt		= xchacha_neon,
 		.decrypt		= xchacha_neon,