diff mbox series

crypto: xts - add support for ciphertext stealing

Message ID 20190809063106.316-1-ard.biesheuvel@linaro.org (mailing list archive)
State Superseded
Delegated to: Herbert Xu
Headers show
Series crypto: xts - add support for ciphertext stealing | expand

Commit Message

Ard Biesheuvel Aug. 9, 2019, 6:31 a.m. UTC
Add support for the missing ciphertext stealing part of the XTS-AES
specification, which permits inputs of any size >= the block size.

Cc: Pascal van Leeuwen <pvanleeuwen@verimatrix.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
This is an alternative approach to Pascal's [0]: instead of instantiating
a separate cipher to deal with the tail, invoke the same ECB skcipher used
for the bulk of the data.

[0] https://lore.kernel.org/linux-crypto/1565245094-8584-1-git-send-email-pvanleeuwen@verimatrix.com/

 crypto/xts.c | 148 +++++++++++++++++---
 1 file changed, 130 insertions(+), 18 deletions(-)

Comments

Milan Broz Aug. 9, 2019, 8:12 a.m. UTC | #1
On 09/08/2019 08:31, Ard Biesheuvel wrote:
> Add support for the missing ciphertext stealing part of the XTS-AES
> specification, which permits inputs of any size >= the block size.
> 
> Cc: Pascal van Leeuwen <pvanleeuwen@verimatrix.com>
> Cc: Ondrej Mosnacek <omosnace@redhat.com>
> Cc: Milan Broz <gmazyland@gmail.com>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
> This is an alternative approach to Pascal's [0]: instead of instantiating
> a separate cipher to deal with the tail, invoke the same ECB skcipher used
> for the bulk of the data.
> 
> [0] https://lore.kernel.org/linux-crypto/1565245094-8584-1-git-send-email-pvanleeuwen@verimatrix.com/

I added the 8byte IVs test vectors, and run vectors test with AF_ALG.
Seems it works (the same in comparison with OpenSSL and gcrypt XTS implementations).

So whatever patch version is applied, I am happy with it :-)

If it helps anything, you can add

Tested-by: Milan Broz <gmazyland@gmail.com>

m.

> 
>  crypto/xts.c | 148 +++++++++++++++++---
>  1 file changed, 130 insertions(+), 18 deletions(-)
> 
> diff --git a/crypto/xts.c b/crypto/xts.c
> index 11211003db7e..fc9edc6eb11e 100644
> --- a/crypto/xts.c
> +++ b/crypto/xts.c
> @@ -34,6 +34,7 @@ struct xts_instance_ctx {
>  
>  struct rctx {
>  	le128 t;
> +	struct scatterlist sg[2];
>  	struct skcipher_request subreq;
>  };
>  
> @@ -84,10 +85,11 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
>   * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
>   * just doing the gf128mul_x_ble() calls again.
>   */
> -static int xor_tweak(struct skcipher_request *req, bool second_pass)
> +static int xor_tweak(struct skcipher_request *req, bool second_pass, bool enc)
>  {
>  	struct rctx *rctx = skcipher_request_ctx(req);
>  	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> +	const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);
>  	const int bs = XTS_BLOCK_SIZE;
>  	struct skcipher_walk w;
>  	le128 t = rctx->t;
> @@ -109,6 +111,20 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass)
>  		wdst = w.dst.virt.addr;
>  
>  		do {
> +			if (unlikely(cts) &&
> +			    w.total - w.nbytes + avail < 2 * XTS_BLOCK_SIZE) {
> +				if (!enc) {
> +					if (second_pass)
> +						rctx->t = t;
> +					gf128mul_x_ble(&t, &t);
> +				}
> +				le128_xor(wdst, &t, wsrc);
> +				if (enc && second_pass)
> +					gf128mul_x_ble(&rctx->t, &t);
> +				skcipher_walk_done(&w, avail - bs);
> +				return 0;
> +			}
> +
>  			le128_xor(wdst++, &t, wsrc++);
>  			gf128mul_x_ble(&t, &t);
>  		} while ((avail -= bs) >= bs);
> @@ -119,17 +135,70 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass)
>  	return err;
>  }
>  
> -static int xor_tweak_pre(struct skcipher_request *req)
> +static int xor_tweak_pre(struct skcipher_request *req, bool enc)
>  {
> -	return xor_tweak(req, false);
> +	return xor_tweak(req, false, enc);
>  }
>  
> -static int xor_tweak_post(struct skcipher_request *req)
> +static int xor_tweak_post(struct skcipher_request *req, bool enc)
>  {
> -	return xor_tweak(req, true);
> +	return xor_tweak(req, true, enc);
>  }
>  
> -static void crypt_done(struct crypto_async_request *areq, int err)
> +static void cts_done(struct crypto_async_request *areq, int err)
> +{
> +	struct skcipher_request *req = areq->data;
> +	le128 b;
> +
> +	if (!err) {
> +		struct rctx *rctx = skcipher_request_ctx(req);
> +
> +		scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 0);
> +		le128_xor(&b, &rctx->t, &b);
> +		scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 1);
> +	}
> +
> +	skcipher_request_complete(req, err);
> +}
> +
> +static int cts_final(struct skcipher_request *req,
> +		     int (*crypt)(struct skcipher_request *req))
> +{
> +	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
> +	int offset = req->cryptlen & ~(XTS_BLOCK_SIZE - 1);
> +	struct rctx *rctx = skcipher_request_ctx(req);
> +	struct skcipher_request *subreq = &rctx->subreq;
> +	int tail = req->cryptlen % XTS_BLOCK_SIZE;
> +	struct scatterlist *sg;
> +	le128 b[2];
> +	int err;
> +
> +	sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - XTS_BLOCK_SIZE);
> +
> +	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0);
> +	memcpy(b + 1, b, tail);
> +	scatterwalk_map_and_copy(b, req->src, offset, tail, 0);
> +
> +	le128_xor(b, &rctx->t, b);
> +
> +	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE + tail, 1);
> +
> +	skcipher_request_set_tfm(subreq, ctx->child);
> +	skcipher_request_set_callback(subreq, req->base.flags, cts_done, req);
> +	skcipher_request_set_crypt(subreq, sg, sg, XTS_BLOCK_SIZE, NULL);
> +
> +	err = crypt(subreq);
> +	if (err)
> +		return err;
> +
> +	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0);
> +	le128_xor(b, &rctx->t, b);
> +	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 1);
> +
> +	return 0;
> +}
> +
> +static void encrypt_done(struct crypto_async_request *areq, int err)
>  {
>  	struct skcipher_request *req = areq->data;
>  
> @@ -137,47 +206,90 @@ static void crypt_done(struct crypto_async_request *areq, int err)
>  		struct rctx *rctx = skcipher_request_ctx(req);
>  
>  		rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
> -		err = xor_tweak_post(req);
> +		err = xor_tweak_post(req, true);
> +
> +		if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
> +			err = cts_final(req, crypto_skcipher_encrypt);
> +			if (err == -EINPROGRESS)
> +				return;
> +		}
>  	}
>  
>  	skcipher_request_complete(req, err);
>  }
>  
> -static void init_crypt(struct skcipher_request *req)
> +static void decrypt_done(struct crypto_async_request *areq, int err)
> +{
> +	struct skcipher_request *req = areq->data;
> +
> +	if (!err) {
> +		struct rctx *rctx = skcipher_request_ctx(req);
> +
> +		rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
> +		err = xor_tweak_post(req, false);
> +
> +		if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
> +			err = cts_final(req, crypto_skcipher_decrypt);
> +			if (err == -EINPROGRESS)
> +				return;
> +		}
> +	}
> +
> +	skcipher_request_complete(req, err);
> +}
> +
> +static int init_crypt(struct skcipher_request *req, crypto_completion_t compl)
>  {
>  	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
>  	struct rctx *rctx = skcipher_request_ctx(req);
>  	struct skcipher_request *subreq = &rctx->subreq;
>  
> +	if (req->cryptlen < XTS_BLOCK_SIZE)
> +		return -EINVAL;
> +
>  	skcipher_request_set_tfm(subreq, ctx->child);
> -	skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
> +	skcipher_request_set_callback(subreq, req->base.flags, compl, req);
>  	skcipher_request_set_crypt(subreq, req->dst, req->dst,
> -				   req->cryptlen, NULL);
> +				   req->cryptlen & ~(XTS_BLOCK_SIZE - 1), NULL);
>  
>  	/* calculate first value of T */
>  	crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv);
> +
> +	return 0;
>  }
>  
>  static int encrypt(struct skcipher_request *req)
>  {
>  	struct rctx *rctx = skcipher_request_ctx(req);
>  	struct skcipher_request *subreq = &rctx->subreq;
> +	int err;
>  
> -	init_crypt(req);
> -	return xor_tweak_pre(req) ?:
> -		crypto_skcipher_encrypt(subreq) ?:
> -		xor_tweak_post(req);
> +	err = init_crypt(req, encrypt_done) ?:
> +	      xor_tweak_pre(req, true) ?:
> +	      crypto_skcipher_encrypt(subreq) ?:
> +	      xor_tweak_post(req, true);
> +
> +	if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
> +		return err;
> +
> +	return cts_final(req, crypto_skcipher_encrypt);
>  }
>  
>  static int decrypt(struct skcipher_request *req)
>  {
>  	struct rctx *rctx = skcipher_request_ctx(req);
>  	struct skcipher_request *subreq = &rctx->subreq;
> +	int err;
> +
> +	err = init_crypt(req, decrypt_done) ?:
> +	      xor_tweak_pre(req, false) ?:
> +	      crypto_skcipher_decrypt(subreq) ?:
> +	      xor_tweak_post(req, false);
> +
> +	if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
> +		return err;
>  
> -	init_crypt(req);
> -	return xor_tweak_pre(req) ?:
> -		crypto_skcipher_decrypt(subreq) ?:
> -		xor_tweak_post(req);
> +	return cts_final(req, crypto_skcipher_decrypt);
>  }
>  
>  static int init_tfm(struct crypto_skcipher *tfm)
>
Pascal Van Leeuwen Aug. 9, 2019, 10:21 a.m. UTC | #2
Ard, 

Nitpicking: you patch does not fix the comment at the top stating that 
sector sizes which are not a multiple of 16 bytes are not supported.

Otherwise, it works fine over here and I like the way you actually
queue up that final cipher call, which largely addresses my performance
concerns w.r.t. hardware acceleration :-)

> -----Original Message-----
> From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> Sent: Friday, August 9, 2019 8:31 AM
> To: linux-crypto@vger.kernel.org
> Cc: herbert@gondor.apana.org.au; ebiggers@kernel.org; Ard Biesheuvel
> <ard.biesheuvel@linaro.org>; Pascal Van Leeuwen <pvanleeuwen@verimatrix.com>; Ondrej
> Mosnacek <omosnace@redhat.com>; Milan Broz <gmazyland@gmail.com>
> Subject: [PATCH] crypto: xts - add support for ciphertext stealing
> 
> Add support for the missing ciphertext stealing part of the XTS-AES
> specification, which permits inputs of any size >= the block size.
> 
> Cc: Pascal van Leeuwen <pvanleeuwen@verimatrix.com>
> Cc: Ondrej Mosnacek <omosnace@redhat.com>
> Cc: Milan Broz <gmazyland@gmail.com>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

Tested-by: Pascal van Leeuwen <pvanleeuwen@verimatrix.com>

> ---
> This is an alternative approach to Pascal's [0]: instead of instantiating
> a separate cipher to deal with the tail, invoke the same ECB skcipher used
> for the bulk of the data.
> 
> [0] https://lore.kernel.org/linux-crypto/1565245094-8584-1-git-send-email-
> pvanleeuwen@verimatrix.com/
> 
>  crypto/xts.c | 148 +++++++++++++++++---
>  1 file changed, 130 insertions(+), 18 deletions(-)
> 
> diff --git a/crypto/xts.c b/crypto/xts.c
> index 11211003db7e..fc9edc6eb11e 100644
> --- a/crypto/xts.c
> +++ b/crypto/xts.c
> @@ -34,6 +34,7 @@ struct xts_instance_ctx {
> 
>  struct rctx {
>  	le128 t;
> +	struct scatterlist sg[2];
>  	struct skcipher_request subreq;
>  };
> 
> @@ -84,10 +85,11 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
>   * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
>   * just doing the gf128mul_x_ble() calls again.
>   */
> -static int xor_tweak(struct skcipher_request *req, bool second_pass)
> +static int xor_tweak(struct skcipher_request *req, bool second_pass, bool enc)
>  {
>  	struct rctx *rctx = skcipher_request_ctx(req);
>  	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> +	const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);
>  	const int bs = XTS_BLOCK_SIZE;
>  	struct skcipher_walk w;
>  	le128 t = rctx->t;
> @@ -109,6 +111,20 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass)
>  		wdst = w.dst.virt.addr;
> 
>  		do {
> +			if (unlikely(cts) &&
> +			    w.total - w.nbytes + avail < 2 * XTS_BLOCK_SIZE) {
> +				if (!enc) {
> +					if (second_pass)
> +						rctx->t = t;
> +					gf128mul_x_ble(&t, &t);
> +				}
> +				le128_xor(wdst, &t, wsrc);
> +				if (enc && second_pass)
> +					gf128mul_x_ble(&rctx->t, &t);
> +				skcipher_walk_done(&w, avail - bs);
> +				return 0;
> +			}
> +
>  			le128_xor(wdst++, &t, wsrc++);
>  			gf128mul_x_ble(&t, &t);
>  		} while ((avail -= bs) >= bs);
> @@ -119,17 +135,70 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass)
>  	return err;
>  }
> 
> -static int xor_tweak_pre(struct skcipher_request *req)
> +static int xor_tweak_pre(struct skcipher_request *req, bool enc)
>  {
> -	return xor_tweak(req, false);
> +	return xor_tweak(req, false, enc);
>  }
> 
> -static int xor_tweak_post(struct skcipher_request *req)
> +static int xor_tweak_post(struct skcipher_request *req, bool enc)
>  {
> -	return xor_tweak(req, true);
> +	return xor_tweak(req, true, enc);
>  }
> 
> -static void crypt_done(struct crypto_async_request *areq, int err)
> +static void cts_done(struct crypto_async_request *areq, int err)
> +{
> +	struct skcipher_request *req = areq->data;
> +	le128 b;
> +
> +	if (!err) {
> +		struct rctx *rctx = skcipher_request_ctx(req);
> +
> +		scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 0);
> +		le128_xor(&b, &rctx->t, &b);
> +		scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 1);
> +	}
> +
> +	skcipher_request_complete(req, err);
> +}
> +
> +static int cts_final(struct skcipher_request *req,
> +		     int (*crypt)(struct skcipher_request *req))
> +{
> +	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
> +	int offset = req->cryptlen & ~(XTS_BLOCK_SIZE - 1);
> +	struct rctx *rctx = skcipher_request_ctx(req);
> +	struct skcipher_request *subreq = &rctx->subreq;
> +	int tail = req->cryptlen % XTS_BLOCK_SIZE;
> +	struct scatterlist *sg;
> +	le128 b[2];
> +	int err;
> +
> +	sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - XTS_BLOCK_SIZE);
> +
> +	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0);
> +	memcpy(b + 1, b, tail);
> +	scatterwalk_map_and_copy(b, req->src, offset, tail, 0);
> +
> +	le128_xor(b, &rctx->t, b);
> +
> +	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE + tail, 1);
> +
> +	skcipher_request_set_tfm(subreq, ctx->child);
> +	skcipher_request_set_callback(subreq, req->base.flags, cts_done, req);
> +	skcipher_request_set_crypt(subreq, sg, sg, XTS_BLOCK_SIZE, NULL);
> +
> +	err = crypt(subreq);
> +	if (err)
> +		return err;
> +
> +	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0);
> +	le128_xor(b, &rctx->t, b);
> +	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 1);
> +
> +	return 0;
> +}
> +
> +static void encrypt_done(struct crypto_async_request *areq, int err)
>  {
>  	struct skcipher_request *req = areq->data;
> 
> @@ -137,47 +206,90 @@ static void crypt_done(struct crypto_async_request *areq, int err)
>  		struct rctx *rctx = skcipher_request_ctx(req);
> 
>  		rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
> -		err = xor_tweak_post(req);
> +		err = xor_tweak_post(req, true);
> +
> +		if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
> +			err = cts_final(req, crypto_skcipher_encrypt);
> +			if (err == -EINPROGRESS)
> +				return;
> +		}
>  	}
> 
>  	skcipher_request_complete(req, err);
>  }
> 
> -static void init_crypt(struct skcipher_request *req)
> +static void decrypt_done(struct crypto_async_request *areq, int err)
> +{
> +	struct skcipher_request *req = areq->data;
> +
> +	if (!err) {
> +		struct rctx *rctx = skcipher_request_ctx(req);
> +
> +		rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
> +		err = xor_tweak_post(req, false);
> +
> +		if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
> +			err = cts_final(req, crypto_skcipher_decrypt);
> +			if (err == -EINPROGRESS)
> +				return;
> +		}
> +	}
> +
> +	skcipher_request_complete(req, err);
> +}
> +
> +static int init_crypt(struct skcipher_request *req, crypto_completion_t compl)
>  {
>  	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
>  	struct rctx *rctx = skcipher_request_ctx(req);
>  	struct skcipher_request *subreq = &rctx->subreq;
> 
> +	if (req->cryptlen < XTS_BLOCK_SIZE)
> +		return -EINVAL;
> +
>  	skcipher_request_set_tfm(subreq, ctx->child);
> -	skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
> +	skcipher_request_set_callback(subreq, req->base.flags, compl, req);
>  	skcipher_request_set_crypt(subreq, req->dst, req->dst,
> -				   req->cryptlen, NULL);
> +				   req->cryptlen & ~(XTS_BLOCK_SIZE - 1), NULL);
> 
>  	/* calculate first value of T */
>  	crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv);
> +
> +	return 0;
>  }
> 
>  static int encrypt(struct skcipher_request *req)
>  {
>  	struct rctx *rctx = skcipher_request_ctx(req);
>  	struct skcipher_request *subreq = &rctx->subreq;
> +	int err;
> 
> -	init_crypt(req);
> -	return xor_tweak_pre(req) ?:
> -		crypto_skcipher_encrypt(subreq) ?:
> -		xor_tweak_post(req);
> +	err = init_crypt(req, encrypt_done) ?:
> +	      xor_tweak_pre(req, true) ?:
> +	      crypto_skcipher_encrypt(subreq) ?:
> +	      xor_tweak_post(req, true);
> +
> +	if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
> +		return err;
> +
> +	return cts_final(req, crypto_skcipher_encrypt);
>  }
> 
>  static int decrypt(struct skcipher_request *req)
>  {
>  	struct rctx *rctx = skcipher_request_ctx(req);
>  	struct skcipher_request *subreq = &rctx->subreq;
> +	int err;
> +
> +	err = init_crypt(req, decrypt_done) ?:
> +	      xor_tweak_pre(req, false) ?:
> +	      crypto_skcipher_decrypt(subreq) ?:
> +	      xor_tweak_post(req, false);
> +
> +	if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
> +		return err;
> 
> -	init_crypt(req);
> -	return xor_tweak_pre(req) ?:
> -		crypto_skcipher_decrypt(subreq) ?:
> -		xor_tweak_post(req);
> +	return cts_final(req, crypto_skcipher_decrypt);
>  }
> 
>  static int init_tfm(struct crypto_skcipher *tfm)
> --
> 2.17.1

Regards,
Pascal van Leeuwen
Silicon IP Architect, Multi-Protocol Engines @ Verimatrix
www.insidesecure.com
Pascal Van Leeuwen Aug. 9, 2019, 3 p.m. UTC | #3
> -----Original Message-----
> From: linux-crypto-owner@vger.kernel.org <linux-crypto-owner@vger.kernel.org> On Behalf Of
> Pascal Van Leeuwen
> Sent: Friday, August 9, 2019 12:22 PM
> To: Ard Biesheuvel <ard.biesheuvel@linaro.org>; linux-crypto@vger.kernel.org
> Cc: herbert@gondor.apana.org.au; ebiggers@kernel.org; Ondrej Mosnacek
> <omosnace@redhat.com>; Milan Broz <gmazyland@gmail.com>
> Subject: RE: [PATCH] crypto: xts - add support for ciphertext stealing
> 
> Ard,
> 
> Nitpicking: you patch does not fix the comment at the top stating that
> sector sizes which are not a multiple of 16 bytes are not supported.
> 
> Otherwise, it works fine over here and I like the way you actually
> queue up that final cipher call, which largely addresses my performance
> concerns w.r.t. hardware acceleration :-)
>
Actually, I just noticed it did NOT work fine, the first CTS vector (5)
was failing. Sorry for missing that little detail before.
Setting cra_blocksize to 1 instead of 16 solves that issue.

Still sure cra_blocksize should be set to 16? Because to me, that doesn't
make sense for something that is fundamentally NOT a blockcipher.

>
> > -----Original Message-----
> > From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> > Sent: Friday, August 9, 2019 8:31 AM
> > To: linux-crypto@vger.kernel.org
> > Cc: herbert@gondor.apana.org.au; ebiggers@kernel.org; Ard Biesheuvel
> > <ard.biesheuvel@linaro.org>; Pascal Van Leeuwen <pvanleeuwen@verimatrix.com>; Ondrej
> > Mosnacek <omosnace@redhat.com>; Milan Broz <gmazyland@gmail.com>
> > Subject: [PATCH] crypto: xts - add support for ciphertext stealing
> >
> > Add support for the missing ciphertext stealing part of the XTS-AES
> > specification, which permits inputs of any size >= the block size.
> >
> > Cc: Pascal van Leeuwen <pvanleeuwen@verimatrix.com>
> > Cc: Ondrej Mosnacek <omosnace@redhat.com>
> > Cc: Milan Broz <gmazyland@gmail.com>
> > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> 
> Tested-by: Pascal van Leeuwen <pvanleeuwen@verimatrix.com>
> 
Eh ... tested yes ... working ... no ...

> > ---
> > This is an alternative approach to Pascal's [0]: instead of instantiating
> > a separate cipher to deal with the tail, invoke the same ECB skcipher used
> > for the bulk of the data.
> >
> > [0] https://lore.kernel.org/linux-crypto/1565245094-8584-1-git-send-email-
> > pvanleeuwen@verimatrix.com/
> >
> >  crypto/xts.c | 148 +++++++++++++++++---
> >  1 file changed, 130 insertions(+), 18 deletions(-)
> >
> > diff --git a/crypto/xts.c b/crypto/xts.c
> > index 11211003db7e..fc9edc6eb11e 100644
> > --- a/crypto/xts.c
> > +++ b/crypto/xts.c
> > @@ -34,6 +34,7 @@ struct xts_instance_ctx {
> >
> >  struct rctx {
> >  	le128 t;
> > +	struct scatterlist sg[2];
> >  	struct skcipher_request subreq;
> >  };
> >
> > @@ -84,10 +85,11 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
> >   * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
> >   * just doing the gf128mul_x_ble() calls again.
> >   */
> > -static int xor_tweak(struct skcipher_request *req, bool second_pass)
> > +static int xor_tweak(struct skcipher_request *req, bool second_pass, bool enc)
> >  {
> >  	struct rctx *rctx = skcipher_request_ctx(req);
> >  	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> > +	const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);
> >  	const int bs = XTS_BLOCK_SIZE;
> >  	struct skcipher_walk w;
> >  	le128 t = rctx->t;
> > @@ -109,6 +111,20 @@ static int xor_tweak(struct skcipher_request *req, bool
> second_pass)
> >  		wdst = w.dst.virt.addr;
> >
> >  		do {
> > +			if (unlikely(cts) &&
> > +			    w.total - w.nbytes + avail < 2 * XTS_BLOCK_SIZE) {
> > +				if (!enc) {
> > +					if (second_pass)
> > +						rctx->t = t;
> > +					gf128mul_x_ble(&t, &t);
> > +				}
> > +				le128_xor(wdst, &t, wsrc);
> > +				if (enc && second_pass)
> > +					gf128mul_x_ble(&rctx->t, &t);
> > +				skcipher_walk_done(&w, avail - bs);
> > +				return 0;
> > +			}
> > +
> >  			le128_xor(wdst++, &t, wsrc++);
> >  			gf128mul_x_ble(&t, &t);
> >  		} while ((avail -= bs) >= bs);
> > @@ -119,17 +135,70 @@ static int xor_tweak(struct skcipher_request *req, bool
> second_pass)
> >  	return err;
> >  }
> >
> > -static int xor_tweak_pre(struct skcipher_request *req)
> > +static int xor_tweak_pre(struct skcipher_request *req, bool enc)
> >  {
> > -	return xor_tweak(req, false);
> > +	return xor_tweak(req, false, enc);
> >  }
> >
> > -static int xor_tweak_post(struct skcipher_request *req)
> > +static int xor_tweak_post(struct skcipher_request *req, bool enc)
> >  {
> > -	return xor_tweak(req, true);
> > +	return xor_tweak(req, true, enc);
> >  }
> >
> > -static void crypt_done(struct crypto_async_request *areq, int err)
> > +static void cts_done(struct crypto_async_request *areq, int err)
> > +{
> > +	struct skcipher_request *req = areq->data;
> > +	le128 b;
> > +
> > +	if (!err) {
> > +		struct rctx *rctx = skcipher_request_ctx(req);
> > +
> > +		scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 0);
> > +		le128_xor(&b, &rctx->t, &b);
> > +		scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 1);
> > +	}
> > +
> > +	skcipher_request_complete(req, err);
> > +}
> > +
> > +static int cts_final(struct skcipher_request *req,
> > +		     int (*crypt)(struct skcipher_request *req))
> > +{
> > +	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
> > +	int offset = req->cryptlen & ~(XTS_BLOCK_SIZE - 1);
> > +	struct rctx *rctx = skcipher_request_ctx(req);
> > +	struct skcipher_request *subreq = &rctx->subreq;
> > +	int tail = req->cryptlen % XTS_BLOCK_SIZE;
> > +	struct scatterlist *sg;
> > +	le128 b[2];
> > +	int err;
> > +
> > +	sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - XTS_BLOCK_SIZE);
> > +
> > +	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0);
> > +	memcpy(b + 1, b, tail);
> > +	scatterwalk_map_and_copy(b, req->src, offset, tail, 0);
> > +
> > +	le128_xor(b, &rctx->t, b);
> > +
> > +	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE + tail, 1);
> > +
> > +	skcipher_request_set_tfm(subreq, ctx->child);
> > +	skcipher_request_set_callback(subreq, req->base.flags, cts_done, req);
> > +	skcipher_request_set_crypt(subreq, sg, sg, XTS_BLOCK_SIZE, NULL);
> > +
> > +	err = crypt(subreq);
> > +	if (err)
> > +		return err;
> > +
> > +	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0);
> > +	le128_xor(b, &rctx->t, b);
> > +	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 1);
> > +
> > +	return 0;
> > +}
> > +
> > +static void encrypt_done(struct crypto_async_request *areq, int err)
> >  {
> >  	struct skcipher_request *req = areq->data;
> >
> > @@ -137,47 +206,90 @@ static void crypt_done(struct crypto_async_request *areq, int err)
> >  		struct rctx *rctx = skcipher_request_ctx(req);
> >
> >  		rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
> > -		err = xor_tweak_post(req);
> > +		err = xor_tweak_post(req, true);
> > +
> > +		if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
> > +			err = cts_final(req, crypto_skcipher_encrypt);
> > +			if (err == -EINPROGRESS)
> > +				return;
> > +		}
> >  	}
> >
> >  	skcipher_request_complete(req, err);
> >  }
> >
> > -static void init_crypt(struct skcipher_request *req)
> > +static void decrypt_done(struct crypto_async_request *areq, int err)
> > +{
> > +	struct skcipher_request *req = areq->data;
> > +
> > +	if (!err) {
> > +		struct rctx *rctx = skcipher_request_ctx(req);
> > +
> > +		rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
> > +		err = xor_tweak_post(req, false);
> > +
> > +		if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
> > +			err = cts_final(req, crypto_skcipher_decrypt);
> > +			if (err == -EINPROGRESS)
> > +				return;
> > +		}
> > +	}
> > +
> > +	skcipher_request_complete(req, err);
> > +}
> > +
> > +static int init_crypt(struct skcipher_request *req, crypto_completion_t compl)
> >  {
> >  	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
> >  	struct rctx *rctx = skcipher_request_ctx(req);
> >  	struct skcipher_request *subreq = &rctx->subreq;
> >
> > +	if (req->cryptlen < XTS_BLOCK_SIZE)
> > +		return -EINVAL;
> > +
> >  	skcipher_request_set_tfm(subreq, ctx->child);
> > -	skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
> > +	skcipher_request_set_callback(subreq, req->base.flags, compl, req);
> >  	skcipher_request_set_crypt(subreq, req->dst, req->dst,
> > -				   req->cryptlen, NULL);
> > +				   req->cryptlen & ~(XTS_BLOCK_SIZE - 1), NULL);
> >
> >  	/* calculate first value of T */
> >  	crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv);
> > +
> > +	return 0;
> >  }
> >
> >  static int encrypt(struct skcipher_request *req)
> >  {
> >  	struct rctx *rctx = skcipher_request_ctx(req);
> >  	struct skcipher_request *subreq = &rctx->subreq;
> > +	int err;
> >
> > -	init_crypt(req);
> > -	return xor_tweak_pre(req) ?:
> > -		crypto_skcipher_encrypt(subreq) ?:
> > -		xor_tweak_post(req);
> > +	err = init_crypt(req, encrypt_done) ?:
> > +	      xor_tweak_pre(req, true) ?:
> > +	      crypto_skcipher_encrypt(subreq) ?:
> > +	      xor_tweak_post(req, true);
> > +
> > +	if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
> > +		return err;
> > +
> > +	return cts_final(req, crypto_skcipher_encrypt);
> >  }
> >
> >  static int decrypt(struct skcipher_request *req)
> >  {
> >  	struct rctx *rctx = skcipher_request_ctx(req);
> >  	struct skcipher_request *subreq = &rctx->subreq;
> > +	int err;
> > +
> > +	err = init_crypt(req, decrypt_done) ?:
> > +	      xor_tweak_pre(req, false) ?:
> > +	      crypto_skcipher_decrypt(subreq) ?:
> > +	      xor_tweak_post(req, false);
> > +
> > +	if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
> > +		return err;
> >
> > -	init_crypt(req);
> > -	return xor_tweak_pre(req) ?:
> > -		crypto_skcipher_decrypt(subreq) ?:
> > -		xor_tweak_post(req);
> > +	return cts_final(req, crypto_skcipher_decrypt);
> >  }
> >
> >  static int init_tfm(struct crypto_skcipher *tfm)
> > --
> > 2.17.1
> 
> Regards,
> Pascal van Leeuwen
> Silicon IP Architect, Multi-Protocol Engines @ Verimatrix
> www.insidesecure.com

Regards,
Pascal van Leeuwen
Silicon IP Architect, Multi-Protocol Engines @ Verimatrix
www.insidesecure.com
Ard Biesheuvel Aug. 9, 2019, 5:13 p.m. UTC | #4
On Fri, 9 Aug 2019 at 18:00, Pascal Van Leeuwen
<pvanleeuwen@verimatrix.com> wrote:
>
> > -----Original Message-----
> > From: linux-crypto-owner@vger.kernel.org <linux-crypto-owner@vger.kernel.org> On Behalf Of
> > Pascal Van Leeuwen
> > Sent: Friday, August 9, 2019 12:22 PM
> > To: Ard Biesheuvel <ard.biesheuvel@linaro.org>; linux-crypto@vger.kernel.org
> > Cc: herbert@gondor.apana.org.au; ebiggers@kernel.org; Ondrej Mosnacek
> > <omosnace@redhat.com>; Milan Broz <gmazyland@gmail.com>
> > Subject: RE: [PATCH] crypto: xts - add support for ciphertext stealing
> >
> > Ard,
> >
> > Nitpicking: you patch does not fix the comment at the top stating that
> > sector sizes which are not a multiple of 16 bytes are not supported.
> >
> > Otherwise, it works fine over here and I like the way you actually
> > queue up that final cipher call, which largely addresses my performance
> > concerns w.r.t. hardware acceleration :-)
> >
> Actually, I just noticed it did NOT work fine, the first CTS vector (5)
> was failing. Sorry for missing that little detail before.
> Setting cra_blocksize to 1 instead of 16 solves that issue.
>
> Still sure cra_blocksize should be set to 16? Because to me, that doesn't
> make sense for something that is fundamentally NOT a blockcipher.
>

Yes. I spotted an issue in the async handling, I'll send out a v2.

> >
> > > -----Original Message-----
> > > From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> > > Sent: Friday, August 9, 2019 8:31 AM
> > > To: linux-crypto@vger.kernel.org
> > > Cc: herbert@gondor.apana.org.au; ebiggers@kernel.org; Ard Biesheuvel
> > > <ard.biesheuvel@linaro.org>; Pascal Van Leeuwen <pvanleeuwen@verimatrix.com>; Ondrej
> > > Mosnacek <omosnace@redhat.com>; Milan Broz <gmazyland@gmail.com>
> > > Subject: [PATCH] crypto: xts - add support for ciphertext stealing
> > >
> > > Add support for the missing ciphertext stealing part of the XTS-AES
> > > specification, which permits inputs of any size >= the block size.
> > >
> > > Cc: Pascal van Leeuwen <pvanleeuwen@verimatrix.com>
> > > Cc: Ondrej Mosnacek <omosnace@redhat.com>
> > > Cc: Milan Broz <gmazyland@gmail.com>
> > > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> >
> > Tested-by: Pascal van Leeuwen <pvanleeuwen@verimatrix.com>
> >
> Eh ... tested yes ... working ... no ...
>
> > > ---
> > > This is an alternative approach to Pascal's [0]: instead of instantiating
> > > a separate cipher to deal with the tail, invoke the same ECB skcipher used
> > > for the bulk of the data.
> > >
> > > [0] https://lore.kernel.org/linux-crypto/1565245094-8584-1-git-send-email-
> > > pvanleeuwen@verimatrix.com/
> > >
> > >  crypto/xts.c | 148 +++++++++++++++++---
> > >  1 file changed, 130 insertions(+), 18 deletions(-)
> > >
> > > diff --git a/crypto/xts.c b/crypto/xts.c
> > > index 11211003db7e..fc9edc6eb11e 100644
> > > --- a/crypto/xts.c
> > > +++ b/crypto/xts.c
> > > @@ -34,6 +34,7 @@ struct xts_instance_ctx {
> > >
> > >  struct rctx {
> > >     le128 t;
> > > +   struct scatterlist sg[2];
> > >     struct skcipher_request subreq;
> > >  };
> > >
> > > @@ -84,10 +85,11 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
> > >   * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
> > >   * just doing the gf128mul_x_ble() calls again.
> > >   */
> > > -static int xor_tweak(struct skcipher_request *req, bool second_pass)
> > > +static int xor_tweak(struct skcipher_request *req, bool second_pass, bool enc)
> > >  {
> > >     struct rctx *rctx = skcipher_request_ctx(req);
> > >     struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> > > +   const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);
> > >     const int bs = XTS_BLOCK_SIZE;
> > >     struct skcipher_walk w;
> > >     le128 t = rctx->t;
> > > @@ -109,6 +111,20 @@ static int xor_tweak(struct skcipher_request *req, bool
> > second_pass)
> > >             wdst = w.dst.virt.addr;
> > >
> > >             do {
> > > +                   if (unlikely(cts) &&
> > > +                       w.total - w.nbytes + avail < 2 * XTS_BLOCK_SIZE) {
> > > +                           if (!enc) {
> > > +                                   if (second_pass)
> > > +                                           rctx->t = t;
> > > +                                   gf128mul_x_ble(&t, &t);
> > > +                           }
> > > +                           le128_xor(wdst, &t, wsrc);
> > > +                           if (enc && second_pass)
> > > +                                   gf128mul_x_ble(&rctx->t, &t);
> > > +                           skcipher_walk_done(&w, avail - bs);
> > > +                           return 0;
> > > +                   }
> > > +
> > >                     le128_xor(wdst++, &t, wsrc++);
> > >                     gf128mul_x_ble(&t, &t);
> > >             } while ((avail -= bs) >= bs);
> > > @@ -119,17 +135,70 @@ static int xor_tweak(struct skcipher_request *req, bool
> > second_pass)
> > >     return err;
> > >  }
> > >
> > > -static int xor_tweak_pre(struct skcipher_request *req)
> > > +static int xor_tweak_pre(struct skcipher_request *req, bool enc)
> > >  {
> > > -   return xor_tweak(req, false);
> > > +   return xor_tweak(req, false, enc);
> > >  }
> > >
> > > -static int xor_tweak_post(struct skcipher_request *req)
> > > +static int xor_tweak_post(struct skcipher_request *req, bool enc)
> > >  {
> > > -   return xor_tweak(req, true);
> > > +   return xor_tweak(req, true, enc);
> > >  }
> > >
> > > -static void crypt_done(struct crypto_async_request *areq, int err)
> > > +static void cts_done(struct crypto_async_request *areq, int err)
> > > +{
> > > +   struct skcipher_request *req = areq->data;
> > > +   le128 b;
> > > +
> > > +   if (!err) {
> > > +           struct rctx *rctx = skcipher_request_ctx(req);
> > > +
> > > +           scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 0);
> > > +           le128_xor(&b, &rctx->t, &b);
> > > +           scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 1);
> > > +   }
> > > +
> > > +   skcipher_request_complete(req, err);
> > > +}
> > > +
> > > +static int cts_final(struct skcipher_request *req,
> > > +                int (*crypt)(struct skcipher_request *req))
> > > +{
> > > +   struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
> > > +   int offset = req->cryptlen & ~(XTS_BLOCK_SIZE - 1);
> > > +   struct rctx *rctx = skcipher_request_ctx(req);
> > > +   struct skcipher_request *subreq = &rctx->subreq;
> > > +   int tail = req->cryptlen % XTS_BLOCK_SIZE;
> > > +   struct scatterlist *sg;
> > > +   le128 b[2];
> > > +   int err;
> > > +
> > > +   sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - XTS_BLOCK_SIZE);
> > > +
> > > +   scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0);
> > > +   memcpy(b + 1, b, tail);
> > > +   scatterwalk_map_and_copy(b, req->src, offset, tail, 0);
> > > +
> > > +   le128_xor(b, &rctx->t, b);
> > > +
> > > +   scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE + tail, 1);
> > > +
> > > +   skcipher_request_set_tfm(subreq, ctx->child);
> > > +   skcipher_request_set_callback(subreq, req->base.flags, cts_done, req);
> > > +   skcipher_request_set_crypt(subreq, sg, sg, XTS_BLOCK_SIZE, NULL);
> > > +
> > > +   err = crypt(subreq);
> > > +   if (err)
> > > +           return err;
> > > +
> > > +   scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0);
> > > +   le128_xor(b, &rctx->t, b);
> > > +   scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 1);
> > > +
> > > +   return 0;
> > > +}
> > > +
> > > +static void encrypt_done(struct crypto_async_request *areq, int err)
> > >  {
> > >     struct skcipher_request *req = areq->data;
> > >
> > > @@ -137,47 +206,90 @@ static void crypt_done(struct crypto_async_request *areq, int err)
> > >             struct rctx *rctx = skcipher_request_ctx(req);
> > >
> > >             rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
> > > -           err = xor_tweak_post(req);
> > > +           err = xor_tweak_post(req, true);
> > > +
> > > +           if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
> > > +                   err = cts_final(req, crypto_skcipher_encrypt);
> > > +                   if (err == -EINPROGRESS)
> > > +                           return;
> > > +           }
> > >     }
> > >
> > >     skcipher_request_complete(req, err);
> > >  }
> > >
> > > -static void init_crypt(struct skcipher_request *req)
> > > +static void decrypt_done(struct crypto_async_request *areq, int err)
> > > +{
> > > +   struct skcipher_request *req = areq->data;
> > > +
> > > +   if (!err) {
> > > +           struct rctx *rctx = skcipher_request_ctx(req);
> > > +
> > > +           rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
> > > +           err = xor_tweak_post(req, false);
> > > +
> > > +           if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
> > > +                   err = cts_final(req, crypto_skcipher_decrypt);
> > > +                   if (err == -EINPROGRESS)
> > > +                           return;
> > > +           }
> > > +   }
> > > +
> > > +   skcipher_request_complete(req, err);
> > > +}
> > > +
> > > +static int init_crypt(struct skcipher_request *req, crypto_completion_t compl)
> > >  {
> > >     struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
> > >     struct rctx *rctx = skcipher_request_ctx(req);
> > >     struct skcipher_request *subreq = &rctx->subreq;
> > >
> > > +   if (req->cryptlen < XTS_BLOCK_SIZE)
> > > +           return -EINVAL;
> > > +
> > >     skcipher_request_set_tfm(subreq, ctx->child);
> > > -   skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
> > > +   skcipher_request_set_callback(subreq, req->base.flags, compl, req);
> > >     skcipher_request_set_crypt(subreq, req->dst, req->dst,
> > > -                              req->cryptlen, NULL);
> > > +                              req->cryptlen & ~(XTS_BLOCK_SIZE - 1), NULL);
> > >
> > >     /* calculate first value of T */
> > >     crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv);
> > > +
> > > +   return 0;
> > >  }
> > >
> > >  static int encrypt(struct skcipher_request *req)
> > >  {
> > >     struct rctx *rctx = skcipher_request_ctx(req);
> > >     struct skcipher_request *subreq = &rctx->subreq;
> > > +   int err;
> > >
> > > -   init_crypt(req);
> > > -   return xor_tweak_pre(req) ?:
> > > -           crypto_skcipher_encrypt(subreq) ?:
> > > -           xor_tweak_post(req);
> > > +   err = init_crypt(req, encrypt_done) ?:
> > > +         xor_tweak_pre(req, true) ?:
> > > +         crypto_skcipher_encrypt(subreq) ?:
> > > +         xor_tweak_post(req, true);
> > > +
> > > +   if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
> > > +           return err;
> > > +
> > > +   return cts_final(req, crypto_skcipher_encrypt);
> > >  }
> > >
> > >  static int decrypt(struct skcipher_request *req)
> > >  {
> > >     struct rctx *rctx = skcipher_request_ctx(req);
> > >     struct skcipher_request *subreq = &rctx->subreq;
> > > +   int err;
> > > +
> > > +   err = init_crypt(req, decrypt_done) ?:
> > > +         xor_tweak_pre(req, false) ?:
> > > +         crypto_skcipher_decrypt(subreq) ?:
> > > +         xor_tweak_post(req, false);
> > > +
> > > +   if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
> > > +           return err;
> > >
> > > -   init_crypt(req);
> > > -   return xor_tweak_pre(req) ?:
> > > -           crypto_skcipher_decrypt(subreq) ?:
> > > -           xor_tweak_post(req);
> > > +   return cts_final(req, crypto_skcipher_decrypt);
> > >  }
> > >
> > >  static int init_tfm(struct crypto_skcipher *tfm)
> > > --
> > > 2.17.1
> >
> > Regards,
> > Pascal van Leeuwen
> > Silicon IP Architect, Multi-Protocol Engines @ Verimatrix
> > www.insidesecure.com
>
> Regards,
> Pascal van Leeuwen
> Silicon IP Architect, Multi-Protocol Engines @ Verimatrix
> www.insidesecure.com
diff mbox series

Patch

diff --git a/crypto/xts.c b/crypto/xts.c
index 11211003db7e..fc9edc6eb11e 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -34,6 +34,7 @@  struct xts_instance_ctx {
 
 struct rctx {
 	le128 t;
+	struct scatterlist sg[2];
 	struct skcipher_request subreq;
 };
 
@@ -84,10 +85,11 @@  static int setkey(struct crypto_skcipher *parent, const u8 *key,
  * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
  * just doing the gf128mul_x_ble() calls again.
  */
-static int xor_tweak(struct skcipher_request *req, bool second_pass)
+static int xor_tweak(struct skcipher_request *req, bool second_pass, bool enc)
 {
 	struct rctx *rctx = skcipher_request_ctx(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);
 	const int bs = XTS_BLOCK_SIZE;
 	struct skcipher_walk w;
 	le128 t = rctx->t;
@@ -109,6 +111,20 @@  static int xor_tweak(struct skcipher_request *req, bool second_pass)
 		wdst = w.dst.virt.addr;
 
 		do {
+			if (unlikely(cts) &&
+			    w.total - w.nbytes + avail < 2 * XTS_BLOCK_SIZE) {
+				if (!enc) {
+					if (second_pass)
+						rctx->t = t;
+					gf128mul_x_ble(&t, &t);
+				}
+				le128_xor(wdst, &t, wsrc);
+				if (enc && second_pass)
+					gf128mul_x_ble(&rctx->t, &t);
+				skcipher_walk_done(&w, avail - bs);
+				return 0;
+			}
+
 			le128_xor(wdst++, &t, wsrc++);
 			gf128mul_x_ble(&t, &t);
 		} while ((avail -= bs) >= bs);
@@ -119,17 +135,70 @@  static int xor_tweak(struct skcipher_request *req, bool second_pass)
 	return err;
 }
 
-static int xor_tweak_pre(struct skcipher_request *req)
+static int xor_tweak_pre(struct skcipher_request *req, bool enc)
 {
-	return xor_tweak(req, false);
+	return xor_tweak(req, false, enc);
 }
 
-static int xor_tweak_post(struct skcipher_request *req)
+static int xor_tweak_post(struct skcipher_request *req, bool enc)
 {
-	return xor_tweak(req, true);
+	return xor_tweak(req, true, enc);
 }
 
-static void crypt_done(struct crypto_async_request *areq, int err)
+static void cts_done(struct crypto_async_request *areq, int err)
+{
+	struct skcipher_request *req = areq->data;
+	le128 b;
+
+	if (!err) {
+		struct rctx *rctx = skcipher_request_ctx(req);
+
+		scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 0);
+		le128_xor(&b, &rctx->t, &b);
+		scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 1);
+	}
+
+	skcipher_request_complete(req, err);
+}
+
+static int cts_final(struct skcipher_request *req,
+		     int (*crypt)(struct skcipher_request *req))
+{
+	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+	int offset = req->cryptlen & ~(XTS_BLOCK_SIZE - 1);
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *subreq = &rctx->subreq;
+	int tail = req->cryptlen % XTS_BLOCK_SIZE;
+	struct scatterlist *sg;
+	le128 b[2];
+	int err;
+
+	sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - XTS_BLOCK_SIZE);
+
+	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0);
+	memcpy(b + 1, b, tail);
+	scatterwalk_map_and_copy(b, req->src, offset, tail, 0);
+
+	le128_xor(b, &rctx->t, b);
+
+	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE + tail, 1);
+
+	skcipher_request_set_tfm(subreq, ctx->child);
+	skcipher_request_set_callback(subreq, req->base.flags, cts_done, req);
+	skcipher_request_set_crypt(subreq, sg, sg, XTS_BLOCK_SIZE, NULL);
+
+	err = crypt(subreq);
+	if (err)
+		return err;
+
+	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0);
+	le128_xor(b, &rctx->t, b);
+	scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 1);
+
+	return 0;
+}
+
+static void encrypt_done(struct crypto_async_request *areq, int err)
 {
 	struct skcipher_request *req = areq->data;
 
@@ -137,47 +206,90 @@  static void crypt_done(struct crypto_async_request *areq, int err)
 		struct rctx *rctx = skcipher_request_ctx(req);
 
 		rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-		err = xor_tweak_post(req);
+		err = xor_tweak_post(req, true);
+
+		if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
+			err = cts_final(req, crypto_skcipher_encrypt);
+			if (err == -EINPROGRESS)
+				return;
+		}
 	}
 
 	skcipher_request_complete(req, err);
 }
 
-static void init_crypt(struct skcipher_request *req)
+static void decrypt_done(struct crypto_async_request *areq, int err)
+{
+	struct skcipher_request *req = areq->data;
+
+	if (!err) {
+		struct rctx *rctx = skcipher_request_ctx(req);
+
+		rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+		err = xor_tweak_post(req, false);
+
+		if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
+			err = cts_final(req, crypto_skcipher_decrypt);
+			if (err == -EINPROGRESS)
+				return;
+		}
+	}
+
+	skcipher_request_complete(req, err);
+}
+
+static int init_crypt(struct skcipher_request *req, crypto_completion_t compl)
 {
 	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	struct rctx *rctx = skcipher_request_ctx(req);
 	struct skcipher_request *subreq = &rctx->subreq;
 
+	if (req->cryptlen < XTS_BLOCK_SIZE)
+		return -EINVAL;
+
 	skcipher_request_set_tfm(subreq, ctx->child);
-	skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
+	skcipher_request_set_callback(subreq, req->base.flags, compl, req);
 	skcipher_request_set_crypt(subreq, req->dst, req->dst,
-				   req->cryptlen, NULL);
+				   req->cryptlen & ~(XTS_BLOCK_SIZE - 1), NULL);
 
 	/* calculate first value of T */
 	crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv);
+
+	return 0;
 }
 
 static int encrypt(struct skcipher_request *req)
 {
 	struct rctx *rctx = skcipher_request_ctx(req);
 	struct skcipher_request *subreq = &rctx->subreq;
+	int err;
 
-	init_crypt(req);
-	return xor_tweak_pre(req) ?:
-		crypto_skcipher_encrypt(subreq) ?:
-		xor_tweak_post(req);
+	err = init_crypt(req, encrypt_done) ?:
+	      xor_tweak_pre(req, true) ?:
+	      crypto_skcipher_encrypt(subreq) ?:
+	      xor_tweak_post(req, true);
+
+	if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
+		return err;
+
+	return cts_final(req, crypto_skcipher_encrypt);
 }
 
 static int decrypt(struct skcipher_request *req)
 {
 	struct rctx *rctx = skcipher_request_ctx(req);
 	struct skcipher_request *subreq = &rctx->subreq;
+	int err;
+
+	err = init_crypt(req, decrypt_done) ?:
+	      xor_tweak_pre(req, false) ?:
+	      crypto_skcipher_decrypt(subreq) ?:
+	      xor_tweak_post(req, false);
+
+	if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
+		return err;
 
-	init_crypt(req);
-	return xor_tweak_pre(req) ?:
-		crypto_skcipher_decrypt(subreq) ?:
-		xor_tweak_post(req);
+	return cts_final(req, crypto_skcipher_decrypt);
 }
 
 static int init_tfm(struct crypto_skcipher *tfm)