diff mbox

crypto: caam - Add support for hashing export and import functions

Message ID 1445037573-29667-1-git-send-email-vicki.milhoan@freescale.com (mailing list archive)
State Changes Requested
Delegated to: Herbert Xu
Headers show

Commit Message

Victoria Milhoan Oct. 16, 2015, 11:19 p.m. UTC
Add support for the export and import functions used by hashing algorithms
by specifying the size of the data saved/restored. The size of the data is
provided during algorithm registration.

Also, modify the structures saved/restored in order to meet the size limit
of 512 bytes imposed by the the Crypto API.

Signed-off-by: Victoria Milhoan <vicki.milhoan@freescale.com>
---
 drivers/crypto/caam/caamhash.c | 82 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 68 insertions(+), 14 deletions(-)

Comments

Russell King - ARM Linux Oct. 17, 2015, 7:39 a.m. UTC | #1
On Fri, Oct 16, 2015 at 04:19:33PM -0700, Victoria Milhoan wrote:
> @@ -319,7 +319,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
>  		have_key = OP_ALG_AAI_HMAC_PRECOMP;
>  
>  	/* ahash_update shared descriptor */
> -	desc = ctx->sh_desc_update;
> +	desc = kmalloc(DESC_HASH_MAX_USED_BYTES, GFP_KERNEL | GFP_DMA);

What if kmalloc() fails?  Should this really oops the kernel?  Ditto
for every other kmalloc you've added below.
 
> @@ -1557,6 +1575,20 @@ static int ahash_export(struct ahash_request *req, void *out)
>  	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
>  	struct caam_hash_state *state = ahash_request_ctx(req);
>  
> +	/*
> +	 * Do not export the data buffers. New buffers are
> +	 * allocated during import.
> +	 */
> +	kfree(state->buf_0);
> +	kfree(state->buf_1);
> +	state->buf_0 = NULL;
> +	state->buf_1 = NULL;
> +
> +	state->current_buf = 0;
> +	state->buf_dma = 0;
> +	state->buflen_0 = 0;
> +	state->buflen_1 = 0;
> +

So what if I export, and then continue using _this_ context later?

> @@ -1605,6 +1641,8 @@ static struct caam_hash_template driver_hash[] = {
>  			.setkey = ahash_setkey,
>  			.halg = {
>  				.digestsize = SHA1_DIGEST_SIZE,
> +				.statesize = sizeof(struct caam_hash_ctx) +
> +					     sizeof(struct caam_hash_state),

Much prefer to have a 'struct caam_hash_export_state' thing rather than
litter the code with the knowledge that these two go together.

>  				},
>  			},
>  		.alg_type = OP_ALG_ALGSEL_SHA1,
> @@ -1626,6 +1664,8 @@ static struct caam_hash_template driver_hash[] = {
>  			.setkey = ahash_setkey,
>  			.halg = {
>  				.digestsize = SHA224_DIGEST_SIZE,
> +				.statesize = sizeof(struct caam_hash_ctx) +
> +					     sizeof(struct caam_hash_state),
>  				},
>  			},
>  		.alg_type = OP_ALG_ALGSEL_SHA224,
> @@ -1647,6 +1687,8 @@ static struct caam_hash_template driver_hash[] = {
>  			.setkey = ahash_setkey,
>  			.halg = {
>  				.digestsize = SHA256_DIGEST_SIZE,
> +				.statesize = sizeof(struct caam_hash_ctx) +
> +					     sizeof(struct caam_hash_state),
>  				},
>  			},
>  		.alg_type = OP_ALG_ALGSEL_SHA256,
> @@ -1668,6 +1710,8 @@ static struct caam_hash_template driver_hash[] = {
>  			.setkey = ahash_setkey,
>  			.halg = {
>  				.digestsize = SHA384_DIGEST_SIZE,
> +				.statesize = sizeof(struct caam_hash_ctx) +
> +					     sizeof(struct caam_hash_state),
>  				},
>  			},
>  		.alg_type = OP_ALG_ALGSEL_SHA384,
> @@ -1689,6 +1733,8 @@ static struct caam_hash_template driver_hash[] = {
>  			.setkey = ahash_setkey,
>  			.halg = {
>  				.digestsize = SHA512_DIGEST_SIZE,
> +				.statesize = sizeof(struct caam_hash_ctx) +
> +					     sizeof(struct caam_hash_state),
>  				},
>  			},
>  		.alg_type = OP_ALG_ALGSEL_SHA512,
> @@ -1710,6 +1756,8 @@ static struct caam_hash_template driver_hash[] = {
>  			.setkey = ahash_setkey,
>  			.halg = {
>  				.digestsize = MD5_DIGEST_SIZE,
> +				.statesize = sizeof(struct caam_hash_ctx) +
> +					     sizeof(struct caam_hash_state),
>  				},
>  			},
>  		.alg_type = OP_ALG_ALGSEL_MD5,
> @@ -1796,6 +1844,12 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm)
>  		dma_unmap_single(ctx->jrdev, ctx->sh_desc_finup_dma,
>  				 desc_bytes(ctx->sh_desc_finup), DMA_TO_DEVICE);
>  
> +	kfree(ctx->sh_desc_update);
> +	kfree(ctx->sh_desc_update_first);
> +	kfree(ctx->sh_desc_fin);
> +	kfree(ctx->sh_desc_digest);
> +	kfree(ctx->sh_desc_finup);
> +

What happens to these when ahash_import() overwrites all the context
state?  Doesn't this mean we end up with double-kfree()s ?

Also, did you test this DMA debug enabled?
Russell King - ARM Linux Oct. 17, 2015, 9:43 a.m. UTC | #2
On Fri, Oct 16, 2015 at 04:19:33PM -0700, Victoria Milhoan wrote:
> @@ -1569,6 +1601,10 @@ static int ahash_import(struct ahash_request *req, const void *in)
>  	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
>  	struct caam_hash_state *state = ahash_request_ctx(req);
>  
> +	/* Allocate new data buffers to use for this request */
> +	state->buf_0 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
> +	state->buf_1 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
> +
>  	memcpy(ctx, in, sizeof(struct caam_hash_ctx));
>  	memcpy(state, in + sizeof(struct caam_hash_ctx),
>  	       sizeof(struct caam_hash_state));

This is buggy.  You loose the reference to the two buffers you've just
allocated because this memcpy() overwrites it.
Russell King - ARM Linux Oct. 17, 2015, 10:52 a.m. UTC | #3
On Fri, Oct 16, 2015 at 04:19:33PM -0700, Victoria Milhoan wrote:
> @@ -1557,6 +1575,20 @@ static int ahash_export(struct ahash_request *req, void *out)
>  	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
>  	struct caam_hash_state *state = ahash_request_ctx(req);
>  
> +	/*
> +	 * Do not export the data buffers. New buffers are
> +	 * allocated during import.
> +	 */
> +	kfree(state->buf_0);
> +	kfree(state->buf_1);
> +	state->buf_0 = NULL;
> +	state->buf_1 = NULL;
> +
> +	state->current_buf = 0;
> +	state->buf_dma = 0;
> +	state->buflen_0 = 0;
> +	state->buflen_1 = 0;
> +
>  	memcpy(out, ctx, sizeof(struct caam_hash_ctx));
>  	memcpy(out + sizeof(struct caam_hash_ctx), state,
>  	       sizeof(struct caam_hash_state));
> @@ -1569,6 +1601,10 @@ static int ahash_import(struct ahash_request *req, const void *in)
>  	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
>  	struct caam_hash_state *state = ahash_request_ctx(req);
>  
> +	/* Allocate new data buffers to use for this request */
> +	state->buf_0 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
> +	state->buf_1 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
> +
>  	memcpy(ctx, in, sizeof(struct caam_hash_ctx));
>  	memcpy(state, in + sizeof(struct caam_hash_ctx),
>  	       sizeof(struct caam_hash_state));

Why are we even saving and restoring the caam_hash_ctx here?

That comes from: crypto_tfm_ctx(crypto_ahash_tfm(tfm)), which, tracing
that through the inline functions comes out as:

	tfm->base.__crt_ctx

tfm comes from: __crypto_ahash_cast(req->base.tfm), where req->base.tfm
is a pointer to struct crypto_ahash's base member.

When a hash is accept()ed, req->base.tfm is copied from the original
socket handle to the new socket handle (see ahash_request_set_tfm(),
called from hash_accept_parent(), which is in turn called from
af_alg_accept() in hash_accept()).  So both the exporting and importing
hashes end up with the same struct caam_hash_ctx.

We can see this if we add debug print:

[156147.994219] ahash_export(ctx=ed087080,state=e2a6e600)
[156147.994257] ahash_import(ctx=ed087080,state=e2a6d600)
[156147.998256] ahash_export(ctx=ed083080,state=e2a6c600)
[156147.998294] ahash_import(ctx=ed083080,state=edb9ee00)
[156147.998659] ahash_export(ctx=ed087080,state=e2a6e600)
[156147.998700] ahash_import(ctx=ed087080,state=e2a6fa00)
[156147.999002] ahash_export(ctx=ed080880,state=e2a6d200)
[156147.999040] ahash_import(ctx=ed080880,state=e2a6d600)

Notice that the state changes between each export and import, but the
context doesn't.

So, I think we can (and should) completely drop the saving and restoring
of context in these functions.

Now, with that change, and with your change to buf_0/buf_1, I see
(before the import/export functions are used) several of these errors:

caam_jr 2101000.jr0: 40000501: DECO: desc idx 5: SGT Length Error. The descriptor is trying to read more data than is contained in the SGT table.

when checking using openssh.  They don't occur when testing with openssl
performing normal hashes (as per my previously posted script) but only
with openssh.
Victoria Milhoan Oct. 17, 2015, 1:57 p.m. UTC | #4
On Sat, 17 Oct 2015 10:43:00 +0100
Russell King - ARM Linux <linux@arm.linux.org.uk> wrote:

> On Fri, Oct 16, 2015 at 04:19:33PM -0700, Victoria Milhoan wrote:
> > @@ -1569,6 +1601,10 @@ static int ahash_import(struct ahash_request *req, const void *in)
> >  	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
> >  	struct caam_hash_state *state = ahash_request_ctx(req);
> >  
> > +	/* Allocate new data buffers to use for this request */
> > +	state->buf_0 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
> > +	state->buf_1 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
> > +
> >  	memcpy(ctx, in, sizeof(struct caam_hash_ctx));
> >  	memcpy(state, in + sizeof(struct caam_hash_ctx),
> >  	       sizeof(struct caam_hash_state));
> 
> This is buggy.  You loose the reference to the two buffers you've just
> allocated because this memcpy() overwrites it.

Correct - this was apparently the wrong patch I pushed out. The one I'm actively
using has this fixed (this is the only difference). I will make this change in
v2 after reviewing your other comments.

> 
> -- 
> FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
> according to speedtest.net.
Russell King - ARM Linux Oct. 17, 2015, 2:11 p.m. UTC | #5
On Sat, Oct 17, 2015 at 06:57:44AM -0700, Victoria Milhoan wrote:
> Correct - this was apparently the wrong patch I pushed out. The one I'm
> actively using has this fixed (this is the only difference). I will make
> this change in v2 after reviewing your other comments.

Thanks Victoria, but please perform the tests I've suggested in one of
my previous emails - merely testing with openssl's built-in tests aren't
sufficient.  Also, openssl is non-obvious with what it's testing,
especially if you're using "openssl speed".  I'm afraid it's a case of
"you don't know what's being tested unless you've read the openssl
source" :(

However, while trying to debug the DECO stuff, I've stumbled across this:

static inline void append_ptr(u32 *desc, dma_addr_t ptr)
{
        dma_addr_t *offset = (dma_addr_t *)desc_end(desc);

        *offset = ptr;

        (*desc) += CAAM_PTR_SZ / CAAM_CMD_SZ;
}

where I think 'desc' points to memory used for the descriptor, which is
read by hardware, correct?

If that's true, why are we using dma_addr_t here?  dma_addr_t can be
either 32-bit or 64-bit depending on the kernel configuration, and I
suspect if this driver is built with dma_addr_t set to 64-bit, things
will go awry.

Also, what endianness are the hardware descriptors?  Are they always
native CPU endian, or are they fixed as little endian?  If they're always
little endian, is there any reason not to use __le32 and cpu_to_le32()/
le32_to_cpu() as appropriate to ensure that it works on big endian
systems?
Russell King - ARM Linux Oct. 17, 2015, 5:36 p.m. UTC | #6
On Sat, Oct 17, 2015 at 11:52:54AM +0100, Russell King - ARM Linux wrote:
> Now, with that change, and with your change to buf_0/buf_1, I see
> (before the import/export functions are used) several of these errors:
> 
> caam_jr 2101000.jr0: 40000501: DECO: desc idx 5: SGT Length Error. The descriptor is trying to read more data than is contained in the SGT table.
> 
> when checking using openssh.  They don't occur when testing with openssl
> performing normal hashes (as per my previously posted script) but only
> with openssh.

The hardware seems to be quite justified in complaining - I've added code
to dump out the scatter list as well:

jobdesc@879: e4bd7c18: b0861c08 3cd29080 f1400000 34bd7c38  .......<..@.8|.4
jobdesc@879: e4bd7c28: 00000068 f8400000 3ccf3640 00000028  h.....@.@6.<(...
sg@882: e4bd7c38: 00000000 3ccf3640 00000028 00000000
sg@882: e4bd7c48: 00000000 34bd7300 00000006 00000000
sg@882: e4bd7c58: 00000000 7528a070 40000020 00000000

If I'm interpreting this correctly:

	f1400000 34bd7c38 00000068

tells the CAAM to read 0x68 bytes using the scatterlist at 0x34bd7c38.

The scatterlist here contains three entries, which have lengths 0x28,
0x6 and 0x20, which total up to 0x4e bytes, not 0x68.

Now, looking at how the scatterlist is created, I have yet more questions.

1. What's this dma_map_sg_chained() and dma_unmap_sg_chained() stuff?
   What's wrong with just calling dma_map_sg() on the scatterlist, giving
   it the number of entries we want to map?  dma_map_sg() is supposed to
   deal with chained scatterlists, if it doesn't, it's buggy.

2. src_map_to_sec4_sg() fails to check whether dma_map_sg_chained() failed.
   if it failed, we continue anyway, poking invalid addresses into the
   hardware scatterlist, thereby causing memory corruption.

3. __sg_count() - what's going on with this.  This seems to assume that
   scatterlists aren't chained as:

                if (!sg_is_last(sg) && (sg + 1)->length == 0)

   sg + 1 may _not_ be sg_next(sg) - and this will walk off the end of
   the array.

4. Is the try_buf_map_to_sec4_sg() call correct?  Shouldn't it be:

@@ -838,7 +838,7 @@ static int ahash_update_ctx(struct ahash_request *req)
 		state->buf_dma = try_buf_map_to_sec4_sg(jrdev,
 							edesc->sec4_sg + 1,
 							buf, state->buf_dma,
-							*next_buflen, *buflen);
+							*buflen, last_buflen);
 
 		if (src_nents) {
  			src_map_to_sec4_sg(jrdev, req->src, src_nents,

   Indeed, with this change, my ssh test works, and the DECO errors are
   gone.
Russell King - ARM Linux Oct. 17, 2015, 5:38 p.m. UTC | #7
On Fri, Oct 16, 2015 at 04:19:33PM -0700, Victoria Milhoan wrote:
> @@ -1569,6 +1601,10 @@ static int ahash_import(struct ahash_request *req, const void *in)
>  	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
>  	struct caam_hash_state *state = ahash_request_ctx(req);
>  
> +	/* Allocate new data buffers to use for this request */
> +	state->buf_0 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
> +	state->buf_1 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
> +

I'm really not sure you can do this at all.  What if the previous
digest calculation prior to the accept() cloning the state was for
a non-hash-block aligned size of data.  The above will lose that
state, and produce an incorrect hash result.  Herbert, can you
confirm please?
Herbert Xu Oct. 18, 2015, 12:35 a.m. UTC | #8
On Sat, Oct 17, 2015 at 06:38:23PM +0100, Russell King - ARM Linux wrote:
> On Fri, Oct 16, 2015 at 04:19:33PM -0700, Victoria Milhoan wrote:
> > @@ -1569,6 +1601,10 @@ static int ahash_import(struct ahash_request *req, const void *in)
> >  	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
> >  	struct caam_hash_state *state = ahash_request_ctx(req);
> >  
> > +	/* Allocate new data buffers to use for this request */
> > +	state->buf_0 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
> > +	state->buf_1 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
> > +
> 
> I'm really not sure you can do this at all.  What if the previous
> digest calculation prior to the accept() cloning the state was for
> a non-hash-block aligned size of data.  The above will lose that
> state, and produce an incorrect hash result.  Herbert, can you
> confirm please?

Well the patch you're responding to is simply bogus.  It is making
an allocation and then immediately overwriting that pointer value
with memcpy from the imported state.

Thanks,
diff mbox

Patch

diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 9609f66..bb81ab6 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -100,11 +100,11 @@  static struct list_head hash_list;
 /* ahash per-session context */
 struct caam_hash_ctx {
 	struct device *jrdev;
-	u32 sh_desc_update[DESC_HASH_MAX_USED_LEN];
-	u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN];
-	u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN];
-	u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN];
-	u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN];
+	u32 *sh_desc_update;
+	u32 *sh_desc_update_first;
+	u32 *sh_desc_fin;
+	u32 *sh_desc_digest;
+	u32 *sh_desc_finup;
 	dma_addr_t sh_desc_update_dma;
 	dma_addr_t sh_desc_update_first_dma;
 	dma_addr_t sh_desc_fin_dma;
@@ -123,9 +123,9 @@  struct caam_hash_ctx {
 struct caam_hash_state {
 	dma_addr_t buf_dma;
 	dma_addr_t ctx_dma;
-	u8 buf_0[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
+	u8 *buf_0;
 	int buflen_0;
-	u8 buf_1[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
+	u8 *buf_1;
 	int buflen_1;
 	u8 caam_ctx[MAX_CTX_LEN] ____cacheline_aligned;
 	int (*update)(struct ahash_request *req);
@@ -319,7 +319,7 @@  static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 		have_key = OP_ALG_AAI_HMAC_PRECOMP;
 
 	/* ahash_update shared descriptor */
-	desc = ctx->sh_desc_update;
+	desc = kmalloc(DESC_HASH_MAX_USED_BYTES, GFP_KERNEL | GFP_DMA);
 
 	init_sh_desc(desc, HDR_SHARE_SERIAL);
 
@@ -334,6 +334,7 @@  static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 	/* Load data and write to result or context */
 	ahash_append_load_str(desc, ctx->ctx_len);
 
+	ctx->sh_desc_update = desc;
 	ctx->sh_desc_update_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
 						 DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, ctx->sh_desc_update_dma)) {
@@ -347,11 +348,12 @@  static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 #endif
 
 	/* ahash_update_first shared descriptor */
-	desc = ctx->sh_desc_update_first;
+	desc = kmalloc(DESC_HASH_MAX_USED_BYTES, GFP_KERNEL | GFP_DMA);
 
 	ahash_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INIT,
 			  ctx->ctx_len, ctx);
 
+	ctx->sh_desc_update_first = desc;
 	ctx->sh_desc_update_first_dma = dma_map_single(jrdev, desc,
 						       desc_bytes(desc),
 						       DMA_TO_DEVICE);
@@ -366,11 +368,12 @@  static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 #endif
 
 	/* ahash_final shared descriptor */
-	desc = ctx->sh_desc_fin;
+	desc = kmalloc(DESC_HASH_MAX_USED_BYTES, GFP_KERNEL | GFP_DMA);
 
 	ahash_ctx_data_to_out(desc, have_key | ctx->alg_type,
 			      OP_ALG_AS_FINALIZE, digestsize, ctx);
 
+	ctx->sh_desc_fin = desc;
 	ctx->sh_desc_fin_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
 					      DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, ctx->sh_desc_fin_dma)) {
@@ -384,11 +387,12 @@  static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 #endif
 
 	/* ahash_finup shared descriptor */
-	desc = ctx->sh_desc_finup;
+	desc = kmalloc(DESC_HASH_MAX_USED_BYTES, GFP_KERNEL | GFP_DMA);
 
 	ahash_ctx_data_to_out(desc, have_key | ctx->alg_type,
 			      OP_ALG_AS_FINALIZE, digestsize, ctx);
 
+	ctx->sh_desc_finup = desc;
 	ctx->sh_desc_finup_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
 						DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, ctx->sh_desc_finup_dma)) {
@@ -402,11 +406,12 @@  static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 #endif
 
 	/* ahash_digest shared descriptor */
-	desc = ctx->sh_desc_digest;
+	desc = kmalloc(DESC_HASH_MAX_USED_BYTES, GFP_KERNEL | GFP_DMA);
 
 	ahash_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INITFINAL,
 			  digestsize, ctx);
 
+	ctx->sh_desc_digest = desc;
 	ctx->sh_desc_digest_dma = dma_map_single(jrdev, desc,
 						 desc_bytes(desc),
 						 DMA_TO_DEVICE);
@@ -631,10 +636,10 @@  static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
 	struct ahash_request *req = context;
 	struct ahash_edesc *edesc;
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_state *state = ahash_request_ctx(req);
 	int digestsize = crypto_ahash_digestsize(ahash);
 #ifdef DEBUG
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
 
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
@@ -657,6 +662,11 @@  static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
 			       digestsize, 1);
 #endif
 
+	kfree(state->buf_0);
+	kfree(state->buf_1);
+	state->buf_0 = NULL;
+	state->buf_1 = NULL;
+
 	req->base.complete(&req->base, err);
 }
 
@@ -701,10 +711,10 @@  static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err,
 	struct ahash_request *req = context;
 	struct ahash_edesc *edesc;
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_state *state = ahash_request_ctx(req);
 	int digestsize = crypto_ahash_digestsize(ahash);
 #ifdef DEBUG
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
 
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
@@ -727,6 +737,11 @@  static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err,
 			       digestsize, 1);
 #endif
 
+	kfree(state->buf_0);
+	kfree(state->buf_1);
+	state->buf_0 = NULL;
+	state->buf_1 = NULL;
+
 	req->base.complete(&req->base, err);
 }
 
@@ -1522,6 +1537,9 @@  static int ahash_init(struct ahash_request *req)
 	state->finup = ahash_finup_first;
 	state->final = ahash_final_no_ctx;
 
+	state->buf_0 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
+	state->buf_1 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
+
 	state->current_buf = 0;
 	state->buf_dma = 0;
 	state->buflen_0 = 0;
@@ -1557,6 +1575,20 @@  static int ahash_export(struct ahash_request *req, void *out)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 
+	/*
+	 * Do not export the data buffers. New buffers are
+	 * allocated during import.
+	 */
+	kfree(state->buf_0);
+	kfree(state->buf_1);
+	state->buf_0 = NULL;
+	state->buf_1 = NULL;
+
+	state->current_buf = 0;
+	state->buf_dma = 0;
+	state->buflen_0 = 0;
+	state->buflen_1 = 0;
+
 	memcpy(out, ctx, sizeof(struct caam_hash_ctx));
 	memcpy(out + sizeof(struct caam_hash_ctx), state,
 	       sizeof(struct caam_hash_state));
@@ -1569,6 +1601,10 @@  static int ahash_import(struct ahash_request *req, const void *in)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 
+	/* Allocate new data buffers to use for this request */
+	state->buf_0 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
+	state->buf_1 = kmalloc(CAAM_MAX_HASH_BLOCK_SIZE, GFP_KERNEL | GFP_DMA);
+
 	memcpy(ctx, in, sizeof(struct caam_hash_ctx));
 	memcpy(state, in + sizeof(struct caam_hash_ctx),
 	       sizeof(struct caam_hash_state));
@@ -1605,6 +1641,8 @@  static struct caam_hash_template driver_hash[] = {
 			.setkey = ahash_setkey,
 			.halg = {
 				.digestsize = SHA1_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_hash_ctx) +
+					     sizeof(struct caam_hash_state),
 				},
 			},
 		.alg_type = OP_ALG_ALGSEL_SHA1,
@@ -1626,6 +1664,8 @@  static struct caam_hash_template driver_hash[] = {
 			.setkey = ahash_setkey,
 			.halg = {
 				.digestsize = SHA224_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_hash_ctx) +
+					     sizeof(struct caam_hash_state),
 				},
 			},
 		.alg_type = OP_ALG_ALGSEL_SHA224,
@@ -1647,6 +1687,8 @@  static struct caam_hash_template driver_hash[] = {
 			.setkey = ahash_setkey,
 			.halg = {
 				.digestsize = SHA256_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_hash_ctx) +
+					     sizeof(struct caam_hash_state),
 				},
 			},
 		.alg_type = OP_ALG_ALGSEL_SHA256,
@@ -1668,6 +1710,8 @@  static struct caam_hash_template driver_hash[] = {
 			.setkey = ahash_setkey,
 			.halg = {
 				.digestsize = SHA384_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_hash_ctx) +
+					     sizeof(struct caam_hash_state),
 				},
 			},
 		.alg_type = OP_ALG_ALGSEL_SHA384,
@@ -1689,6 +1733,8 @@  static struct caam_hash_template driver_hash[] = {
 			.setkey = ahash_setkey,
 			.halg = {
 				.digestsize = SHA512_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_hash_ctx) +
+					     sizeof(struct caam_hash_state),
 				},
 			},
 		.alg_type = OP_ALG_ALGSEL_SHA512,
@@ -1710,6 +1756,8 @@  static struct caam_hash_template driver_hash[] = {
 			.setkey = ahash_setkey,
 			.halg = {
 				.digestsize = MD5_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_hash_ctx) +
+					     sizeof(struct caam_hash_state),
 				},
 			},
 		.alg_type = OP_ALG_ALGSEL_MD5,
@@ -1796,6 +1844,12 @@  static void caam_hash_cra_exit(struct crypto_tfm *tfm)
 		dma_unmap_single(ctx->jrdev, ctx->sh_desc_finup_dma,
 				 desc_bytes(ctx->sh_desc_finup), DMA_TO_DEVICE);
 
+	kfree(ctx->sh_desc_update);
+	kfree(ctx->sh_desc_update_first);
+	kfree(ctx->sh_desc_fin);
+	kfree(ctx->sh_desc_digest);
+	kfree(ctx->sh_desc_finup);
+
 	caam_jr_free(ctx->jrdev);
 }