diff mbox

[v3b,5/5] crypto: marvell: factor out common import/export functions

Message ID E1Zke3a-00042e-Jx@rmk-PC.arm.linux.org.uk (mailing list archive)
State Accepted
Delegated to: Herbert Xu
Headers show

Commit Message

Russell King Oct. 9, 2015, 8:14 p.m. UTC
As all the import functions and export functions are virtually
identical, factor out their common parts into a generic
mv_cesa_ahash_import() and mv_cesa_ahash_export() respectively.  This
performs the actual import or export, and we pass the data pointers and
length into these functions.

We have to switch a % const operation to do_div() in the common import
function to avoid provoking gcc to use the expensive 64-bit by 64-bit
modulus operation.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
Replacement to patch 5, going a little further, as requested by Boris.

 drivers/crypto/marvell/hash.c | 157 ++++++++++++++----------------------------
 1 file changed, 53 insertions(+), 104 deletions(-)

Comments

Boris BREZILLON Oct. 9, 2015, 8:19 p.m. UTC | #1
On Fri, 09 Oct 2015 21:14:22 +0100
Russell King <rmk+kernel@arm.linux.org.uk> wrote:

> As all the import functions and export functions are virtually
> identical, factor out their common parts into a generic
> mv_cesa_ahash_import() and mv_cesa_ahash_export() respectively.  This
> performs the actual import or export, and we pass the data pointers and
> length into these functions.
> 
> We have to switch a % const operation to do_div() in the common import
> function to avoid provoking gcc to use the expensive 64-bit by 64-bit
> modulus operation.
> 
> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

Looks good to me.

Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>

Thanks again for taking care of that.

Regards,

Boris

> ---
> Replacement to patch 5, going a little further, as requested by Boris.
> 
>  drivers/crypto/marvell/hash.c | 157 ++++++++++++++----------------------------
>  1 file changed, 53 insertions(+), 104 deletions(-)
> 
> diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
> index b7c2c05f1a01..a24ceda7acfe 100644
> --- a/drivers/crypto/marvell/hash.c
> +++ b/drivers/crypto/marvell/hash.c
> @@ -795,39 +795,32 @@ static int mv_cesa_ahash_finup(struct ahash_request *req)
>  	return ret;
>  }
>  
> -static int mv_cesa_md5_init(struct ahash_request *req)
> +static int mv_cesa_ahash_export(struct ahash_request *req, void *hash,
> +				u64 *len, void *cache)
>  {
> -	struct mv_cesa_op_ctx tmpl;
> -
> -	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5);
> -
> -	mv_cesa_ahash_init(req, &tmpl);
> -
> -	return 0;
> -}
> -
> -static int mv_cesa_md5_export(struct ahash_request *req, void *out)
> -{
> -	struct md5_state *out_state = out;
>  	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
>  	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
>  	unsigned int digsize = crypto_ahash_digestsize(ahash);
> +	unsigned int blocksize;
> +
> +	blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(ahash));
>  
> -	out_state->byte_count = creq->len;
> -	memcpy(out_state->hash, creq->state, digsize);
> -	memset(out_state->block, 0, sizeof(out_state->block));
> +	*len = creq->len;
> +	memcpy(hash, creq->state, digsize);
> +	memset(cache, 0, blocksize);
>  	if (creq->cache)
> -		memcpy(out_state->block, creq->cache, creq->cache_ptr);
> +		memcpy(cache, creq->cache, creq->cache_ptr);
>  
>  	return 0;
>  }
>  
> -static int mv_cesa_md5_import(struct ahash_request *req, const void *in)
> +static int mv_cesa_ahash_import(struct ahash_request *req, const void *hash,
> +				u64 len, const void *cache)
>  {
> -	const struct md5_state *in_state = in;
>  	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
>  	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
>  	unsigned int digsize = crypto_ahash_digestsize(ahash);
> +	unsigned int blocksize;
>  	unsigned int cache_ptr;
>  	int ret;
>  
> @@ -835,16 +828,17 @@ static int mv_cesa_md5_import(struct ahash_request *req, const void *in)
>  	if (ret)
>  		return ret;
>  
> -	if (in_state->byte_count >= sizeof(in_state->block))
> +	blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(ahash));
> +	if (len >= blocksize)
>  		mv_cesa_update_op_cfg(&creq->op_tmpl,
>  				      CESA_SA_DESC_CFG_MID_FRAG,
>  				      CESA_SA_DESC_CFG_FRAG_MSK);
>  
> -	creq->len = in_state->byte_count;
> -	memcpy(creq->state, in_state->hash, digsize);
> +	creq->len = len;
> +	memcpy(creq->state, hash, digsize);
>  	creq->cache_ptr = 0;
>  
> -	cache_ptr = creq->len % sizeof(in_state->block);
> +	cache_ptr = do_div(len, blocksize);
>  	if (!cache_ptr)
>  		return 0;
>  
> @@ -852,12 +846,39 @@ static int mv_cesa_md5_import(struct ahash_request *req, const void *in)
>  	if (ret)
>  		return ret;
>  
> -	memcpy(creq->cache, in_state->block, cache_ptr);
> +	memcpy(creq->cache, cache, cache_ptr);
>  	creq->cache_ptr = cache_ptr;
>  
>  	return 0;
>  }
>  
> +static int mv_cesa_md5_init(struct ahash_request *req)
> +{
> +	struct mv_cesa_op_ctx tmpl;
> +
> +	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5);
> +
> +	mv_cesa_ahash_init(req, &tmpl);
> +
> +	return 0;
> +}
> +
> +static int mv_cesa_md5_export(struct ahash_request *req, void *out)
> +{
> +	struct md5_state *out_state = out;
> +
> +	return mv_cesa_ahash_export(req, out_state->hash,
> +				    &out_state->byte_count, out_state->block);
> +}
> +
> +static int mv_cesa_md5_import(struct ahash_request *req, const void *in)
> +{
> +	const struct md5_state *in_state = in;
> +
> +	return mv_cesa_ahash_import(req, in_state->hash, in_state->byte_count,
> +				    in_state->block);
> +}
> +
>  static int mv_cesa_md5_digest(struct ahash_request *req)
>  {
>  	int ret;
> @@ -908,53 +929,17 @@ static int mv_cesa_sha1_init(struct ahash_request *req)
>  static int mv_cesa_sha1_export(struct ahash_request *req, void *out)
>  {
>  	struct sha1_state *out_state = out;
> -	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
> -	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
> -	unsigned int digsize = crypto_ahash_digestsize(ahash);
> -
> -	out_state->count = creq->len;
> -	memcpy(out_state->state, creq->state, digsize);
> -	memset(out_state->buffer, 0, sizeof(out_state->buffer));
> -	if (creq->cache)
> -		memcpy(out_state->buffer, creq->cache, creq->cache_ptr);
>  
> -	return 0;
> +	return mv_cesa_ahash_export(req, out_state->state, &out_state->count,
> +				    out_state->buffer);
>  }
>  
>  static int mv_cesa_sha1_import(struct ahash_request *req, const void *in)
>  {
>  	const struct sha1_state *in_state = in;
> -	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
> -	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
> -	unsigned int digsize = crypto_ahash_digestsize(ahash);
> -	unsigned int cache_ptr;
> -	int ret;
> -
> -	ret = crypto_ahash_init(req);
> -	if (ret)
> -		return ret;
> -
> -	if (in_state->count >= SHA1_BLOCK_SIZE)
> -		mv_cesa_update_op_cfg(&creq->op_tmpl,
> -				      CESA_SA_DESC_CFG_MID_FRAG,
> -				      CESA_SA_DESC_CFG_FRAG_MSK);
> -
> -	creq->len = in_state->count;
> -	memcpy(creq->state, in_state->state, digsize);
> -	creq->cache_ptr = 0;
> -
> -	cache_ptr = creq->len % SHA1_BLOCK_SIZE;
> -	if (!cache_ptr)
> -		return 0;
> -
> -	ret = mv_cesa_ahash_alloc_cache(req);
> -	if (ret)
> -		return ret;
> -
> -	memcpy(creq->cache, in_state->buffer, cache_ptr);
> -	creq->cache_ptr = cache_ptr;
>  
> -	return 0;
> +	return mv_cesa_ahash_import(req, in_state->state, in_state->count,
> +				    in_state->buffer);
>  }
>  
>  static int mv_cesa_sha1_digest(struct ahash_request *req)
> @@ -1018,53 +1003,17 @@ static int mv_cesa_sha256_digest(struct ahash_request *req)
>  static int mv_cesa_sha256_export(struct ahash_request *req, void *out)
>  {
>  	struct sha256_state *out_state = out;
> -	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
> -	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
> -	unsigned int ds = crypto_ahash_digestsize(ahash);
>  
> -	out_state->count = creq->len;
> -	memcpy(out_state->state, creq->state, ds);
> -	memset(out_state->buf, 0, sizeof(out_state->buf));
> -	if (creq->cache)
> -		memcpy(out_state->buf, creq->cache, creq->cache_ptr);
> -
> -	return 0;
> +	return mv_cesa_ahash_export(req, out_state->state, &out_state->count,
> +				    out_state->buf);
>  }
>  
>  static int mv_cesa_sha256_import(struct ahash_request *req, const void *in)
>  {
>  	const struct sha256_state *in_state = in;
> -	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
> -	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
> -	unsigned int digsize = crypto_ahash_digestsize(ahash);
> -	unsigned int cache_ptr;
> -	int ret;
> -
> -	ret = crypto_ahash_init(req);
> -	if (ret)
> -		return ret;
> -
> -	if (in_state->count >= SHA256_BLOCK_SIZE)
> -		mv_cesa_update_op_cfg(&creq->op_tmpl,
> -				      CESA_SA_DESC_CFG_MID_FRAG,
> -				      CESA_SA_DESC_CFG_FRAG_MSK);
>  
> -	creq->len = in_state->count;
> -	memcpy(creq->state, in_state->state, digsize);
> -	creq->cache_ptr = 0;
> -
> -	cache_ptr = creq->len % SHA256_BLOCK_SIZE;
> -	if (!cache_ptr)
> -		return 0;
> -
> -	ret = mv_cesa_ahash_alloc_cache(req);
> -	if (ret)
> -		return ret;
> -
> -	memcpy(creq->cache, in_state->buf, cache_ptr);
> -	creq->cache_ptr = cache_ptr;
> -
> -	return 0;
> +	return mv_cesa_ahash_import(req, in_state->state, in_state->count,
> +				    in_state->buf);
>  }
>  
>  struct ahash_alg mv_sha256_alg = {
Arnaud Ebalard Oct. 9, 2015, 10:37 p.m. UTC | #2
Hi Russel,

Russell King <rmk+kernel@arm.linux.org.uk> writes:

> As all the import functions and export functions are virtually
> identical, factor out their common parts into a generic
> mv_cesa_ahash_import() and mv_cesa_ahash_export() respectively.  This
> performs the actual import or export, and we pass the data pointers and
> length into these functions.
>
> We have to switch a % const operation to do_div() in the common import
> function to avoid provoking gcc to use the expensive 64-bit by 64-bit
> modulus operation.
>
> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

Thanks for the refactoring and for the fixes. All patches look good to 
me. Out of curiosity, can I ask what perf you get w/ openssh or openssl
using AF_ALG and the CESA?

Cheers,

a+
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King - ARM Linux Oct. 9, 2015, 11:51 p.m. UTC | #3
On Sat, Oct 10, 2015 at 12:37:33AM +0200, Arnaud Ebalard wrote:
> Hi Russel,
> 
> Russell King <rmk+kernel@arm.linux.org.uk> writes:
> 
> > As all the import functions and export functions are virtually
> > identical, factor out their common parts into a generic
> > mv_cesa_ahash_import() and mv_cesa_ahash_export() respectively.  This
> > performs the actual import or export, and we pass the data pointers and
> > length into these functions.
> >
> > We have to switch a % const operation to do_div() in the common import
> > function to avoid provoking gcc to use the expensive 64-bit by 64-bit
> > modulus operation.
> >
> > Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
> 
> Thanks for the refactoring and for the fixes. All patches look good to 
> me. Out of curiosity, can I ask what perf you get w/ openssh or openssl
> using AF_ALG and the CESA?

I would do, but it seems this AF_ALG plugin for openssl isn't
actually using it for encryption.  When I try:

	openssl speed -engine af_alg aes-128-cbc

I get results for using openssl's software implementation.  If I do:

	openssl speed -engine af_alg md5

then I get results from using the kernel's MD5.  Hence, I think the
only thing that I think openssh is using it for is the digest stuff,
not the crypto itself.  I can't be certain about that though.

I've tried debugging the af_alg engine plugin, but I'm not getting
very far (I'm not an openssl hacker!)  I see it registering the
function to get the ciphers (via ENGINE_set_ciphers), and I see this
called several times, returning a list of NID_xxx values describing
the methods it supports, which includes aes-128-cbc.  However,
unlike the equivalent digest function, I never see it called
requesting any of the ciphers.  Maybe it's an openssl bug, or a
"feature" preventing hardware crypto?  Maybe something is missing
from its initialisation?  I've no idea yet.  It seems I'm not alone
in this - this report from April 2015 is exactly what I'm seeing:

https://mta.openssl.org/pipermail/openssl-users/2015-April/001124.html

However, I'm coming to the conclusion that AF_ALG with openssl is a
dead project, and the only interface that everyone is using for that
is cryptodev - probably contary to Herbert and/or DaveM's wishes.  For
example, the openwrt guys seem to only support cryptodev, according to
their wiki page on the subject of hardware crypto:

http://wiki.openwrt.org/doc/hardware/cryptographic.hardware.accelerators

Here's the references to code for AF_ALG with openssl I've found so far:

Original af_alg plugin (dead):

http://src.carnivore.it/users/common/af_alg/

3rd party "maintained" af_alg openssl plugin, derived from commit
1851bbb010c38878c83729be844f168192059189 in the above repo but with
no history:

https://github.com/RidgeRun/af-alg-rr

and that doesn't contain any changes to the C code originally committed.
Whether this C code contains changes or not is anyone's guess: there's
no way to refer back to the original repository.

Anyway, here's the digest results:

Software:
The 'numbers' are in 1000s of bytes per second processed.
type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
md5              13948.89k    42477.61k   104619.41k   165140.82k   199273.13k
sha1             13091.91k    36463.89k    75393.88k   103893.33k   117104.50k
sha256           13573.92k    30492.25k    52700.33k    64247.81k    68722.69k

Hardware:
The 'numbers' are in 1000s of bytes per second processed.
type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
md5               3964.55k    13782.11k    43181.71k   180263.38k  1446616.18k
sha1              4609.16k     8922.35k    35422.87k   333575.31k  2122547.20k
sha256           13519.62k    30484.10k    52547.47k    64285.21k    68530.60k

There's actually something suspicious while running these tests:

Doing md5 for 3s on 16 size blocks: 32212 md5's in 0.13s
Doing md5 for 3s on 64 size blocks: 23688 md5's in 0.11s
Doing md5 for 3s on 256 size blocks: 23615 md5's in 0.14s
Doing md5 for 3s on 1024 size blocks: 22885 md5's in 0.13s
Doing md5 for 3s on 8192 size blocks: 15893 md5's in 0.09s
Doing sha1 for 3s on 16 size blocks: 31688 sha1's in 0.11s
Doing sha1 for 3s on 64 size blocks: 23700 sha1's in 0.17s
Doing sha1 for 3s on 256 size blocks: 23523 sha1's in 0.17s
Doing sha1 for 3s on 1024 size blocks: 22803 sha1's in 0.07s
Doing sha1 for 3s on 8192 size blocks: 15546 sha1's in 0.06s
Doing sha256 for 3s on 16 size blocks: 2518030 sha256's in 2.98s
Doing sha256 for 3s on 64 size blocks: 1419416 sha256's in 2.98s
Doing sha256 for 3s on 256 size blocks: 613738 sha256's in 2.99s
Doing sha256 for 3s on 1024 size blocks: 187080 sha256's in 2.98s
Doing sha256 for 3s on 8192 size blocks: 25013 sha256's in 2.99s

from the hardware - note the "in" figures are rediculously low, yet
they do wait 3s for each test.  Also, the sha256 results are close
enough to being the software version.

No ideas on any of this yet... but I'm not about to start digging in
the openssl code to try and work out what it's up to.  As I say, I
think this is AF_ALG with openssl is a dead project.
Arnaud Ebalard Oct. 10, 2015, 10:31 a.m. UTC | #4
Hi Russel,

Russell King - ARM Linux <linux@arm.linux.org.uk> writes:

> On Sat, Oct 10, 2015 at 12:37:33AM +0200, Arnaud Ebalard wrote:
>> Hi Russel,
>> 
>> Russell King <rmk+kernel@arm.linux.org.uk> writes:
>> 
>> > As all the import functions and export functions are virtually
>> > identical, factor out their common parts into a generic
>> > mv_cesa_ahash_import() and mv_cesa_ahash_export() respectively.  This
>> > performs the actual import or export, and we pass the data pointers and
>> > length into these functions.
>> >
>> > We have to switch a % const operation to do_div() in the common import
>> > function to avoid provoking gcc to use the expensive 64-bit by 64-bit
>> > modulus operation.
>> >
>> > Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
>> 
>> Thanks for the refactoring and for the fixes. All patches look good to 
>> me. Out of curiosity, can I ask what perf you get w/ openssh or openssl
>> using AF_ALG and the CESA?
>
> I would do, but it seems this AF_ALG plugin for openssl isn't
> actually using it for encryption.  When I try:
>
> 	openssl speed -engine af_alg aes-128-cbc
>
> I get results for using openssl's software implementation.  If I do:
>
> 	openssl speed -engine af_alg md5
>
> then I get results from using the kernel's MD5.  Hence, I think the
> only thing that I think openssh is using it for is the digest stuff,
> not the crypto itself.  I can't be certain about that though.
>
> I've tried debugging the af_alg engine plugin, but I'm not getting
> very far (I'm not an openssl hacker!)  I see it registering the
> function to get the ciphers (via ENGINE_set_ciphers), and I see this
> called several times, returning a list of NID_xxx values describing
> the methods it supports, which includes aes-128-cbc.  However,
> unlike the equivalent digest function, I never see it called
> requesting any of the ciphers.  Maybe it's an openssl bug, or a
> "feature" preventing hardware crypto?  Maybe something is missing
> from its initialisation?  I've no idea yet.  It seems I'm not alone
> in this - this report from April 2015 is exactly what I'm seeing:
>
> https://mta.openssl.org/pipermail/openssl-users/2015-April/001124.html
>
> However, I'm coming to the conclusion that AF_ALG with openssl is a
> dead project, and the only interface that everyone is using for that
> is cryptodev - probably contary to Herbert and/or DaveM's wishes.  For
> example, the openwrt guys seem to only support cryptodev, according to
> their wiki page on the subject of hardware crypto:
>
> http://wiki.openwrt.org/doc/hardware/cryptographic.hardware.accelerators
>
> Here's the references to code for AF_ALG with openssl I've found so far:
>
> Original af_alg plugin (dead):
>
> http://src.carnivore.it/users/common/af_alg/
>
> 3rd party "maintained" af_alg openssl plugin, derived from commit
> 1851bbb010c38878c83729be844f168192059189 in the above repo but with
> no history:
>
> https://github.com/RidgeRun/af-alg-rr
>
> and that doesn't contain any changes to the C code originally committed.
> Whether this C code contains changes or not is anyone's guess: there's
> no way to refer back to the original repository.
>
> Anyway, here's the digest results:
>
> Software:
> The 'numbers' are in 1000s of bytes per second processed.
> type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
> md5              13948.89k    42477.61k   104619.41k   165140.82k   199273.13k
> sha1             13091.91k    36463.89k    75393.88k   103893.33k   117104.50k
> sha256           13573.92k    30492.25k    52700.33k    64247.81k    68722.69k
>
> Hardware:
> The 'numbers' are in 1000s of bytes per second processed.
> type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
> md5               3964.55k    13782.11k    43181.71k   180263.38k  1446616.18k
> sha1              4609.16k     8922.35k    35422.87k   333575.31k  2122547.20k
> sha256           13519.62k    30484.10k    52547.47k    64285.21k    68530.60k
>
> There's actually something suspicious while running these tests:
>
> Doing md5 for 3s on 16 size blocks: 32212 md5's in 0.13s
> Doing md5 for 3s on 64 size blocks: 23688 md5's in 0.11s
> Doing md5 for 3s on 256 size blocks: 23615 md5's in 0.14s
> Doing md5 for 3s on 1024 size blocks: 22885 md5's in 0.13s
> Doing md5 for 3s on 8192 size blocks: 15893 md5's in 0.09s
> Doing sha1 for 3s on 16 size blocks: 31688 sha1's in 0.11s
> Doing sha1 for 3s on 64 size blocks: 23700 sha1's in 0.17s
> Doing sha1 for 3s on 256 size blocks: 23523 sha1's in 0.17s
> Doing sha1 for 3s on 1024 size blocks: 22803 sha1's in 0.07s
> Doing sha1 for 3s on 8192 size blocks: 15546 sha1's in 0.06s
> Doing sha256 for 3s on 16 size blocks: 2518030 sha256's in 2.98s
> Doing sha256 for 3s on 64 size blocks: 1419416 sha256's in 2.98s
> Doing sha256 for 3s on 256 size blocks: 613738 sha256's in 2.99s
> Doing sha256 for 3s on 1024 size blocks: 187080 sha256's in 2.98s
> Doing sha256 for 3s on 8192 size blocks: 25013 sha256's in 2.99s
>
> from the hardware - note the "in" figures are rediculously low, yet
> they do wait 3s for each test.  Also, the sha256 results are close
> enough to being the software version.
>
> No ideas on any of this yet... but I'm not about to start digging in
> the openssl code to try and work out what it's up to.  As I say, I
> think this is AF_ALG with openssl is a dead project.

Thanks for the time you took to assemble the information in previous 
email. Yesterday, when reading your patches, I ended up on [1], where
Marek (added him to Cc: list) basically has the same kind of conclusion
as yours, i.e. openssl w/ cryptodev is what currently works better even
if AF_ALG is the expected target for kernel to provide access to
hardware engines to userland apps.

I had a lot of performance results at various levels (tcrypt module on
variations of the drivers (tasklet, threaded irq, full polling, etc),
IPsec tunnel and transport mode through to see how it behaves w/ two
mvneta instances also eating CPU cycles for incoming/outgoing packets)
but those where done on an encryption use case. Some are provided
in [2]. In an early (read dirty) polling-based version of the driver,
the CESA on an Armada 370 (mirabox) was verified to be capable of near
100MB/s on buffers of 1500+ bytes for AES CBC encryption. Current
version of the driver is not as good (say half that value) but it
behaves better. A Mirabox can easily route 1500 bytes packets at 100MB/s
between its two interfaces but when you mix both using IPsec in tunnel
mode on one side, you end up w/ perfs between 10 to 15MB/s, IIRC. I
think it's interesting to see where it ends up w/ the engine exposed to
userland consumers (e.g. sth like SSH).

I cannot promise a huge amount of time but I'll try and find some to
play w/ AF_ALG using openssl and CESA in the coming weeks.

Cheers,

a+

[1]: http://events.linuxfoundation.org/sites/events/files/slides/lcj-2014-crypto-user.pdf
[2]: http://lists.infradead.org/pipermail/linux-arm-kernel/2015-April/336599.html
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King - ARM Linux Oct. 10, 2015, 11:29 a.m. UTC | #5
On Sat, Oct 10, 2015 at 12:31:29PM +0200, Arnaud Ebalard wrote:
> Hi Russel,
           ^

> Russell King - ARM Linux <linux@arm.linux.org.uk> writes:
> > Software:
> > The 'numbers' are in 1000s of bytes per second processed.
> > type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
> > md5              13948.89k    42477.61k   104619.41k   165140.82k   199273.13k
> > sha1             13091.91k    36463.89k    75393.88k   103893.33k   117104.50k
> > sha256           13573.92k    30492.25k    52700.33k    64247.81k    68722.69k
> >
> > Hardware:
> > The 'numbers' are in 1000s of bytes per second processed.
> > type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
> > md5               3964.55k    13782.11k    43181.71k   180263.38k  1446616.18k
> > sha1              4609.16k     8922.35k    35422.87k   333575.31k  2122547.20k
> > sha256           13519.62k    30484.10k    52547.47k    64285.21k    68530.60k

Okay, the reason for the difference in SHA256 speed is because the
"openssl speed" code *totally* *bypasses* the engine support, whereas
the md5 and sha1 do not.  It even bypasses the normal method used to
get hold of the sha256 implementation (EVP_sha256), and goes straight
to using SHA256() directly in openssl/crypto/sha/sha256.c.  It looks
like the same goes for the AES tests too.

> I had a lot of performance results at various levels (tcrypt module on
> variations of the drivers (tasklet, threaded irq, full polling, etc),
> IPsec tunnel and transport mode through to see how it behaves w/ two
> mvneta instances also eating CPU cycles for incoming/outgoing packets)
> but those where done on an encryption use case. Some are provided
> in [2]. In an early (read dirty) polling-based version of the driver,
> the CESA on an Armada 370 (mirabox) was verified to be capable of near
> 100MB/s on buffers of 1500+ bytes for AES CBC encryption. Current
> version of the driver is not as good (say half that value) but it
> behaves better. A Mirabox can easily route 1500 bytes packets at 100MB/s
> between its two interfaces but when you mix both using IPsec in tunnel
> mode on one side, you end up w/ perfs between 10 to 15MB/s, IIRC. I
> think it's interesting to see where it ends up w/ the engine exposed to
> userland consumers (e.g. sth like SSH).
> 
> I cannot promise a huge amount of time but I'll try and find some to
> play w/ AF_ALG using openssl and CESA in the coming weeks.

I think what we draw from my investigation is that "openssl speed" is
utterly crap - you don't actually know what's being tested there.  Some
things test the engine, others bypass the engine infrastructure totally
and test the openssl software implementation instead.

So, if you think "openssl speed" is a good way to measure the speed of
digests and ciphers that openssl supplies to applications, *think again*.
It doesn't.
Russell King - ARM Linux Oct. 10, 2015, 4:17 p.m. UTC | #6
On Sat, Oct 10, 2015 at 12:29:25PM +0100, Russell King - ARM Linux wrote:
> On Sat, Oct 10, 2015 at 12:31:29PM +0200, Arnaud Ebalard wrote:
> > Russell King - ARM Linux <linux@arm.linux.org.uk> writes:
> > > Software:
> > > The 'numbers' are in 1000s of bytes per second processed.
> > > type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
> > > md5              13948.89k    42477.61k   104619.41k   165140.82k   199273.13k
> > > sha1             13091.91k    36463.89k    75393.88k   103893.33k   117104.50k
> > > sha256           13573.92k    30492.25k    52700.33k    64247.81k    68722.69k
> > >
> > > Hardware:
> > > The 'numbers' are in 1000s of bytes per second processed.
> > > type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
> > > md5               3964.55k    13782.11k    43181.71k   180263.38k  1446616.18k
> > > sha1              4609.16k     8922.35k    35422.87k   333575.31k  2122547.20k
> > > sha256           13519.62k    30484.10k    52547.47k    64285.21k    68530.60k
> 
> Okay, the reason for the difference in SHA256 speed is because the
> "openssl speed" code *totally* *bypasses* the engine support, whereas
> the md5 and sha1 do not.  It even bypasses the normal method used to
> get hold of the sha256 implementation (EVP_sha256), and goes straight
> to using SHA256() directly in openssl/crypto/sha/sha256.c.  It looks
> like the same goes for the AES tests too.

Okay, what's required is openssl speed -evp <cipher|digest> and it can
only do one at a time.  That's really confusing and non-intuitive, given
that some of the non-evp options end up testing the EVP methods.

So, running:

$ openssl speed -evp aes-128-cbc

I get the kernel spitting out a number of these warnings during its run:

DRBG: could not allocate digest TFM handle: hmac(sha512)

I also notice is that 50% of CPU time is spent in the kernel, presumably
because the lack of hardware sha512 means that we end up having to do
sha512 in software in the kernel.

I _also_ notice that despite having the ARM assembly crypto functions
enabled and built-in to the kernel, they don't appear in /proc/crypto -
and this is because of patch 1 in this series, which blocks out any
crypto driver which has a zero statesize (as the ARM crypto functions
appear to have.)  I found this by rebuilding the ARM crypto stuff as
modules, and then trying to insert them:

# modprobe sha512-arm
modprobe: ERROR: could not insert 'sha512_arm': Invalid argument

So, I think it's best if this patch series is *NOT* merged until someone
who knows the kernel crypto code gets to grips with what's supposed to be
done in various parts of the code.  Yes, I know that Herbert suggested
the approach in patch 1, but that _will_ cause regressions like the above
when if it's merged.

Either the ARM crypto code in arch/arm/crypto is buggy, or the approach
in patch 1 is buggy.
Marek Vasut Oct. 10, 2015, 6:07 p.m. UTC | #7
On Saturday, October 10, 2015 at 01:29:25 PM, Russell King - ARM Linux wrote:
> On Sat, Oct 10, 2015 at 12:31:29PM +0200, Arnaud Ebalard wrote:
> > Hi Russel,
> 
>            ^
> 
> > Russell King - ARM Linux <linux@arm.linux.org.uk> writes:
> > > Software:
> > > The 'numbers' are in 1000s of bytes per second processed.
> > > type             16 bytes     64 bytes    256 bytes   1024 bytes   8192
> > > bytes md5              13948.89k    42477.61k   104619.41k  
> > > 165140.82k   199273.13k sha1             13091.91k    36463.89k   
> > > 75393.88k   103893.33k   117104.50k sha256           13573.92k   
> > > 30492.25k    52700.33k    64247.81k    68722.69k
> > > 
> > > Hardware:
> > > The 'numbers' are in 1000s of bytes per second processed.
> > > type             16 bytes     64 bytes    256 bytes   1024 bytes   8192
> > > bytes md5               3964.55k    13782.11k    43181.71k  
> > > 180263.38k  1446616.18k sha1              4609.16k     8922.35k   
> > > 35422.87k   333575.31k  2122547.20k sha256           13519.62k   
> > > 30484.10k    52547.47k    64285.21k    68530.60k
> 
> Okay, the reason for the difference in SHA256 speed is because the
> "openssl speed" code *totally* *bypasses* the engine support, whereas
> the md5 and sha1 do not.  It even bypasses the normal method used to
> get hold of the sha256 implementation (EVP_sha256), and goes straight
> to using SHA256() directly in openssl/crypto/sha/sha256.c.  It looks
> like the same goes for the AES tests too.
> 
> > I had a lot of performance results at various levels (tcrypt module on
> > variations of the drivers (tasklet, threaded irq, full polling, etc),
> > IPsec tunnel and transport mode through to see how it behaves w/ two
> > mvneta instances also eating CPU cycles for incoming/outgoing packets)
> > but those where done on an encryption use case. Some are provided
> > in [2]. In an early (read dirty) polling-based version of the driver,
> > the CESA on an Armada 370 (mirabox) was verified to be capable of near
> > 100MB/s on buffers of 1500+ bytes for AES CBC encryption. Current
> > version of the driver is not as good (say half that value) but it
> > behaves better. A Mirabox can easily route 1500 bytes packets at 100MB/s
> > between its two interfaces but when you mix both using IPsec in tunnel
> > mode on one side, you end up w/ perfs between 10 to 15MB/s, IIRC. I
> > think it's interesting to see where it ends up w/ the engine exposed to
> > userland consumers (e.g. sth like SSH).
> > 
> > I cannot promise a huge amount of time but I'll try and find some to
> > play w/ AF_ALG using openssl and CESA in the coming weeks.
> 
> I think what we draw from my investigation is that "openssl speed" is
> utterly crap - you don't actually know what's being tested there.  Some
> things test the engine, others bypass the engine infrastructure totally
> and test the openssl software implementation instead.
> 
> So, if you think "openssl speed" is a good way to measure the speed of
> digests and ciphers that openssl supplies to applications, *think again*.
> It doesn't.

Add to that the fact that openssl speed does NOT verify the results of the
transformations, so it's not usable for detecting errors during high load.
It's utter crap, just like you said.

Best regards,
Marek Vasut
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Herbert Xu Oct. 11, 2015, 6:55 a.m. UTC | #8
On Sat, Oct 10, 2015 at 05:17:54PM +0100, Russell King - ARM Linux wrote:
>
> I _also_ notice that despite having the ARM assembly crypto functions
> enabled and built-in to the kernel, they don't appear in /proc/crypto -
> and this is because of patch 1 in this series, which blocks out any
> crypto driver which has a zero statesize (as the ARM crypto functions
> appear to have.)  I found this by rebuilding the ARM crypto stuff as
> modules, and then trying to insert them:

They're buggy and unfortunately this wasn't caught during the
review process.  The import/export functions are required and
certainly not optional.

> # modprobe sha512-arm
> modprobe: ERROR: could not insert 'sha512_arm': Invalid argument

This is the correct response and what will happen is that the
software implementation of sha512 will be used rather than the
accelerated sha512_arm.

Once the sha512_arm driver has been fixed then it can be loaded
again.

> So, I think it's best if this patch series is *NOT* merged until someone
> who knows the kernel crypto code gets to grips with what's supposed to be
> done in various parts of the code.  Yes, I know that Herbert suggested
> the approach in patch 1, but that _will_ cause regressions like the above
> when if it's merged.

Considering that the current driver can be used to trigger a kernel
oops from user-space I think preventing the buggy driver from loading
is the right thing to do.

We can then fix them one-by-one.

Cheers,
Herbert Xu Oct. 13, 2015, 1 p.m. UTC | #9
On Sun, Oct 11, 2015 at 02:55:05PM +0800, Herbert Xu wrote:
> On Sat, Oct 10, 2015 at 05:17:54PM +0100, Russell King - ARM Linux wrote:
> >
> > I _also_ notice that despite having the ARM assembly crypto functions
> > enabled and built-in to the kernel, they don't appear in /proc/crypto -
> > and this is because of patch 1 in this series, which blocks out any
> > crypto driver which has a zero statesize (as the ARM crypto functions
> > appear to have.)  I found this by rebuilding the ARM crypto stuff as
> > modules, and then trying to insert them:
> 
> They're buggy and unfortunately this wasn't caught during the
> review process.  The import/export functions are required and
> certainly not optional.

OK it looks like I was confused.  Yes the import and export functions
are required but for shash algorithms we provide a simple default.
This means that implementations can simply leave it NULL and they
will then use the default import/export functions which exports
the shash_desc as the state.

So Russell what I'll do is apply your patch without the hunk
for shash_prepare_alg.  IOW we will block any ahash algorithms
that leave state as zero since ahash does not provide a default
import/export function (it can't because it may involve hardware
state).  shash algorithms on the other hand won't be affected.

Cheers,
Russell King - ARM Linux Oct. 13, 2015, 1:55 p.m. UTC | #10
On Tue, Oct 13, 2015 at 09:00:48PM +0800, Herbert Xu wrote:
> On Sun, Oct 11, 2015 at 02:55:05PM +0800, Herbert Xu wrote:
> > On Sat, Oct 10, 2015 at 05:17:54PM +0100, Russell King - ARM Linux wrote:
> > >
> > > I _also_ notice that despite having the ARM assembly crypto functions
> > > enabled and built-in to the kernel, they don't appear in /proc/crypto -
> > > and this is because of patch 1 in this series, which blocks out any
> > > crypto driver which has a zero statesize (as the ARM crypto functions
> > > appear to have.)  I found this by rebuilding the ARM crypto stuff as
> > > modules, and then trying to insert them:
> > 
> > They're buggy and unfortunately this wasn't caught during the
> > review process.  The import/export functions are required and
> > certainly not optional.
> 
> OK it looks like I was confused.  Yes the import and export functions
> are required but for shash algorithms we provide a simple default.
> This means that implementations can simply leave it NULL and they
> will then use the default import/export functions which exports
> the shash_desc as the state.
> 
> So Russell what I'll do is apply your patch without the hunk
> for shash_prepare_alg.  IOW we will block any ahash algorithms
> that leave state as zero since ahash does not provide a default
> import/export function (it can't because it may involve hardware
> state).  shash algorithms on the other hand won't be affected.

Okay.

I've a larger queue of patches building at the moment for the Marvell
driver - I've found that it's touch-and-go whether it produces the
correct hash result, and previous hashes can affect subsequent hashes
in some circumstances.  I'm currently at another 7 patches, plus a
"big" as-yet uncommitted patch which is virtually a rewrite of the
DMA preparation - diffstat wise, in total it's looking like:

 drivers/crypto/marvell/cesa.h |   8 +-
 drivers/crypto/marvell/hash.c | 258 ++++++++++++++++++++++--------------------
 drivers/crypto/marvell/tdma.c |   7 ++
 3 files changed, 151 insertions(+), 122 deletions(-)

and there's still a few more cases that need solving: zero bytes of
input should not give a zero hash, and an input which is the multiple
of the blocksize causes the hardware to hang - though I'm not yet
sure whether that's a result of my above changes.  At least with my
above changes, I now get stable and correct hash values for sizes
which do not hit any of those remaining bugs.

Given it's size so far, I'm not sure it makes much sense to backport
any of these fixes to stable; I think maybe we should just say "okay,
before these fixes, it's just broken."
Herbert Xu Oct. 13, 2015, 1:57 p.m. UTC | #11
On Tue, Oct 13, 2015 at 02:55:18PM +0100, Russell King - ARM Linux wrote:
>
> Given it's size so far, I'm not sure it makes much sense to backport
> any of these fixes to stable; I think maybe we should just say "okay,
> before these fixes, it's just broken."

OK as long as your first patch goes into stable it should be fine
as it'll essentially disable the marvell driver.

Cheers,
Russell King - ARM Linux Oct. 13, 2015, 1:59 p.m. UTC | #12
On Tue, Oct 13, 2015 at 09:57:22PM +0800, Herbert Xu wrote:
> On Tue, Oct 13, 2015 at 02:55:18PM +0100, Russell King - ARM Linux wrote:
> >
> > Given it's size so far, I'm not sure it makes much sense to backport
> > any of these fixes to stable; I think maybe we should just say "okay,
> > before these fixes, it's just broken."
> 
> OK as long as your first patch goes into stable it should be fine
> as it'll essentially disable the marvell driver.

It will disable the Marvell driver, along with the SW implementations in
arch/arm/crypto/ which don't set .statesize, .import and .export.  I
would not be surprised if it also affects others too.
Herbert Xu Oct. 13, 2015, 2:01 p.m. UTC | #13
On Tue, Oct 13, 2015 at 02:59:47PM +0100, Russell King - ARM Linux wrote:
>
> It will disable the Marvell driver, along with the SW implementations in
> arch/arm/crypto/ which don't set .statesize, .import and .export.  I
> would not be surprised if it also affects others too.

As I said earlier I'll make sure to delete the hunk for
shash_prepare_alg when I apply your patch so only ahash drivers
will be affected.

The ARM stuff is all shash so they are fine and will use the
default import/export functions.

Cheers,
diff mbox

Patch

diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index b7c2c05f1a01..a24ceda7acfe 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -795,39 +795,32 @@  static int mv_cesa_ahash_finup(struct ahash_request *req)
 	return ret;
 }
 
-static int mv_cesa_md5_init(struct ahash_request *req)
+static int mv_cesa_ahash_export(struct ahash_request *req, void *hash,
+				u64 *len, void *cache)
 {
-	struct mv_cesa_op_ctx tmpl;
-
-	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5);
-
-	mv_cesa_ahash_init(req, &tmpl);
-
-	return 0;
-}
-
-static int mv_cesa_md5_export(struct ahash_request *req, void *out)
-{
-	struct md5_state *out_state = out;
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	unsigned int digsize = crypto_ahash_digestsize(ahash);
+	unsigned int blocksize;
+
+	blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(ahash));
 
-	out_state->byte_count = creq->len;
-	memcpy(out_state->hash, creq->state, digsize);
-	memset(out_state->block, 0, sizeof(out_state->block));
+	*len = creq->len;
+	memcpy(hash, creq->state, digsize);
+	memset(cache, 0, blocksize);
 	if (creq->cache)
-		memcpy(out_state->block, creq->cache, creq->cache_ptr);
+		memcpy(cache, creq->cache, creq->cache_ptr);
 
 	return 0;
 }
 
-static int mv_cesa_md5_import(struct ahash_request *req, const void *in)
+static int mv_cesa_ahash_import(struct ahash_request *req, const void *hash,
+				u64 len, const void *cache)
 {
-	const struct md5_state *in_state = in;
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	unsigned int digsize = crypto_ahash_digestsize(ahash);
+	unsigned int blocksize;
 	unsigned int cache_ptr;
 	int ret;
 
@@ -835,16 +828,17 @@  static int mv_cesa_md5_import(struct ahash_request *req, const void *in)
 	if (ret)
 		return ret;
 
-	if (in_state->byte_count >= sizeof(in_state->block))
+	blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(ahash));
+	if (len >= blocksize)
 		mv_cesa_update_op_cfg(&creq->op_tmpl,
 				      CESA_SA_DESC_CFG_MID_FRAG,
 				      CESA_SA_DESC_CFG_FRAG_MSK);
 
-	creq->len = in_state->byte_count;
-	memcpy(creq->state, in_state->hash, digsize);
+	creq->len = len;
+	memcpy(creq->state, hash, digsize);
 	creq->cache_ptr = 0;
 
-	cache_ptr = creq->len % sizeof(in_state->block);
+	cache_ptr = do_div(len, blocksize);
 	if (!cache_ptr)
 		return 0;
 
@@ -852,12 +846,39 @@  static int mv_cesa_md5_import(struct ahash_request *req, const void *in)
 	if (ret)
 		return ret;
 
-	memcpy(creq->cache, in_state->block, cache_ptr);
+	memcpy(creq->cache, cache, cache_ptr);
 	creq->cache_ptr = cache_ptr;
 
 	return 0;
 }
 
+static int mv_cesa_md5_init(struct ahash_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5);
+
+	mv_cesa_ahash_init(req, &tmpl);
+
+	return 0;
+}
+
+static int mv_cesa_md5_export(struct ahash_request *req, void *out)
+{
+	struct md5_state *out_state = out;
+
+	return mv_cesa_ahash_export(req, out_state->hash,
+				    &out_state->byte_count, out_state->block);
+}
+
+static int mv_cesa_md5_import(struct ahash_request *req, const void *in)
+{
+	const struct md5_state *in_state = in;
+
+	return mv_cesa_ahash_import(req, in_state->hash, in_state->byte_count,
+				    in_state->block);
+}
+
 static int mv_cesa_md5_digest(struct ahash_request *req)
 {
 	int ret;
@@ -908,53 +929,17 @@  static int mv_cesa_sha1_init(struct ahash_request *req)
 static int mv_cesa_sha1_export(struct ahash_request *req, void *out)
 {
 	struct sha1_state *out_state = out;
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	unsigned int digsize = crypto_ahash_digestsize(ahash);
-
-	out_state->count = creq->len;
-	memcpy(out_state->state, creq->state, digsize);
-	memset(out_state->buffer, 0, sizeof(out_state->buffer));
-	if (creq->cache)
-		memcpy(out_state->buffer, creq->cache, creq->cache_ptr);
 
-	return 0;
+	return mv_cesa_ahash_export(req, out_state->state, &out_state->count,
+				    out_state->buffer);
 }
 
 static int mv_cesa_sha1_import(struct ahash_request *req, const void *in)
 {
 	const struct sha1_state *in_state = in;
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	unsigned int digsize = crypto_ahash_digestsize(ahash);
-	unsigned int cache_ptr;
-	int ret;
-
-	ret = crypto_ahash_init(req);
-	if (ret)
-		return ret;
-
-	if (in_state->count >= SHA1_BLOCK_SIZE)
-		mv_cesa_update_op_cfg(&creq->op_tmpl,
-				      CESA_SA_DESC_CFG_MID_FRAG,
-				      CESA_SA_DESC_CFG_FRAG_MSK);
-
-	creq->len = in_state->count;
-	memcpy(creq->state, in_state->state, digsize);
-	creq->cache_ptr = 0;
-
-	cache_ptr = creq->len % SHA1_BLOCK_SIZE;
-	if (!cache_ptr)
-		return 0;
-
-	ret = mv_cesa_ahash_alloc_cache(req);
-	if (ret)
-		return ret;
-
-	memcpy(creq->cache, in_state->buffer, cache_ptr);
-	creq->cache_ptr = cache_ptr;
 
-	return 0;
+	return mv_cesa_ahash_import(req, in_state->state, in_state->count,
+				    in_state->buffer);
 }
 
 static int mv_cesa_sha1_digest(struct ahash_request *req)
@@ -1018,53 +1003,17 @@  static int mv_cesa_sha256_digest(struct ahash_request *req)
 static int mv_cesa_sha256_export(struct ahash_request *req, void *out)
 {
 	struct sha256_state *out_state = out;
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	unsigned int ds = crypto_ahash_digestsize(ahash);
 
-	out_state->count = creq->len;
-	memcpy(out_state->state, creq->state, ds);
-	memset(out_state->buf, 0, sizeof(out_state->buf));
-	if (creq->cache)
-		memcpy(out_state->buf, creq->cache, creq->cache_ptr);
-
-	return 0;
+	return mv_cesa_ahash_export(req, out_state->state, &out_state->count,
+				    out_state->buf);
 }
 
 static int mv_cesa_sha256_import(struct ahash_request *req, const void *in)
 {
 	const struct sha256_state *in_state = in;
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	unsigned int digsize = crypto_ahash_digestsize(ahash);
-	unsigned int cache_ptr;
-	int ret;
-
-	ret = crypto_ahash_init(req);
-	if (ret)
-		return ret;
-
-	if (in_state->count >= SHA256_BLOCK_SIZE)
-		mv_cesa_update_op_cfg(&creq->op_tmpl,
-				      CESA_SA_DESC_CFG_MID_FRAG,
-				      CESA_SA_DESC_CFG_FRAG_MSK);
 
-	creq->len = in_state->count;
-	memcpy(creq->state, in_state->state, digsize);
-	creq->cache_ptr = 0;
-
-	cache_ptr = creq->len % SHA256_BLOCK_SIZE;
-	if (!cache_ptr)
-		return 0;
-
-	ret = mv_cesa_ahash_alloc_cache(req);
-	if (ret)
-		return ret;
-
-	memcpy(creq->cache, in_state->buf, cache_ptr);
-	creq->cache_ptr = cache_ptr;
-
-	return 0;
+	return mv_cesa_ahash_import(req, in_state->state, in_state->count,
+				    in_state->buf);
 }
 
 struct ahash_alg mv_sha256_alg = {