diff mbox series

[v3,2/4] crypto: talitos - fix hash on SEC1.

Message ID 732ca0ff440bf4cd589d844cfda71d96efd500f5.1560429844.git.christophe.leroy@c-s.fr (mailing list archive)
State Superseded
Delegated to: Herbert Xu
Headers show
Series Additional fixes on Talitos driver | expand

Commit Message

Christophe Leroy June 13, 2019, 12:48 p.m. UTC
On SEC1, hash provides wrong result when performing hashing in several
steps with input data SG list has more than one element. This was
detected with CONFIG_CRYPTO_MANAGER_EXTRA_TESTS:

[   44.185947] alg: hash: md5-talitos test failed (wrong result) on test vector 6, cfg="random: may_sleep use_finup src_divs=[<reimport>25.88%@+8063, <flush>24.19%@+9588, 28.63%@+16333, <reimport>4.60%@+6756, 16.70%@+16281] dst_divs=[71.61%@alignmask+16361, 14.36%@+7756, 14.3%@+"
[   44.325122] alg: hash: sha1-talitos test failed (wrong result) on test vector 3, cfg="random: inplace use_final src_divs=[<flush,nosimd>16.56%@+16378, <reimport>52.0%@+16329, 21.42%@alignmask+16380, 10.2%@alignmask+16380] iv_offset=39"
[   44.493500] alg: hash: sha224-talitos test failed (wrong result) on test vector 4, cfg="random: use_final nosimd src_divs=[<reimport>52.27%@+7401, <reimport>17.34%@+16285, <flush>17.71%@+26, 12.68%@+10644] iv_offset=43"
[   44.673262] alg: hash: sha256-talitos test failed (wrong result) on test vector 4, cfg="random: may_sleep use_finup src_divs=[<reimport>60.6%@+12790, 17.86%@+1329, <reimport>12.64%@alignmask+16300, 8.29%@+15, 0.40%@+13506, <reimport>0.51%@+16322, <reimport>0.24%@+16339] dst_divs"

This is due to two issues:
- We have an overlap between the buffer used for copying the input
data (SEC1 doesn't do scatter/gather) and the chained descriptor.
- Data copy is wrong when the previous hash left less than one
blocksize of data to hash, implying a complement of the previous
block with a few bytes from the new request.

Fix it by:
- Moving the second descriptor after the buffer, as moving the buffer
after the descriptor would make it more complex for other cipher
operations (AEAD, ABLKCIPHER)
- Rebuiding a new data SG list without the bytes taken from the new
request to complete the previous one.

Preceding patch ("crypto: talitos - move struct talitos_edesc into
talitos.h") as required for this change to build properly.

Fixes: 37b5e8897eb5 ("crypto: talitos - chain in buffered data for ahash on SEC1")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 drivers/crypto/talitos.c | 63 ++++++++++++++++++++++++++++++------------------
 1 file changed, 40 insertions(+), 23 deletions(-)

Comments

Horia Geanta June 13, 2019, 7:07 p.m. UTC | #1
On 6/13/2019 3:48 PM, Christophe Leroy wrote:
> On SEC1, hash provides wrong result when performing hashing in several
> steps with input data SG list has more than one element. This was
> detected with CONFIG_CRYPTO_MANAGER_EXTRA_TESTS:
> 
> [   44.185947] alg: hash: md5-talitos test failed (wrong result) on test vector 6, cfg="random: may_sleep use_finup src_divs=[<reimport>25.88%@+8063, <flush>24.19%@+9588, 28.63%@+16333, <reimport>4.60%@+6756, 16.70%@+16281] dst_divs=[71.61%@alignmask+16361, 14.36%@+7756, 14.3%@+"
> [   44.325122] alg: hash: sha1-talitos test failed (wrong result) on test vector 3, cfg="random: inplace use_final src_divs=[<flush,nosimd>16.56%@+16378, <reimport>52.0%@+16329, 21.42%@alignmask+16380, 10.2%@alignmask+16380] iv_offset=39"
> [   44.493500] alg: hash: sha224-talitos test failed (wrong result) on test vector 4, cfg="random: use_final nosimd src_divs=[<reimport>52.27%@+7401, <reimport>17.34%@+16285, <flush>17.71%@+26, 12.68%@+10644] iv_offset=43"
> [   44.673262] alg: hash: sha256-talitos test failed (wrong result) on test vector 4, cfg="random: may_sleep use_finup src_divs=[<reimport>60.6%@+12790, 17.86%@+1329, <reimport>12.64%@alignmask+16300, 8.29%@+15, 0.40%@+13506, <reimport>0.51%@+16322, <reimport>0.24%@+16339] dst_divs"
> 
> This is due to two issues:
> - We have an overlap between the buffer used for copying the input
> data (SEC1 doesn't do scatter/gather) and the chained descriptor.
> - Data copy is wrong when the previous hash left less than one
> blocksize of data to hash, implying a complement of the previous
> block with a few bytes from the new request.
> 
I fail to spot these issues.

IIUC in case of SEC1, the variable part of talitos_edesc structure contains
a 2nd "chained" descriptor (talitos_desc struct) followed by an area
dedicated to linearizing the input (in case input is scattered).

Where is the overlap?

> Fix it by:
> - Moving the second descriptor after the buffer, as moving the buffer
> after the descriptor would make it more complex for other cipher
> operations (AEAD, ABLKCIPHER)
> - Rebuiding a new data SG list without the bytes taken from the new
> request to complete the previous one.
> 
> Preceding patch ("crypto: talitos - move struct talitos_edesc into
> talitos.h") as required for this change to build properly.
> 
> Fixes: 37b5e8897eb5 ("crypto: talitos - chain in buffered data for ahash on SEC1")
> Cc: stable@vger.kernel.org
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
>  drivers/crypto/talitos.c | 63 ++++++++++++++++++++++++++++++------------------
>  1 file changed, 40 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
> index 5b401aec6c84..4f03baef952b 100644
> --- a/drivers/crypto/talitos.c
> +++ b/drivers/crypto/talitos.c
> @@ -336,15 +336,18 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
>  	tail = priv->chan[ch].tail;
>  	while (priv->chan[ch].fifo[tail].desc) {
>  		__be32 hdr;
> +		struct talitos_edesc *edesc;
>  
>  		request = &priv->chan[ch].fifo[tail];
> +		edesc = container_of(request->desc, struct talitos_edesc, desc);
>  
>  		/* descriptors with their done bits set don't get the error */
>  		rmb();
>  		if (!is_sec1)
>  			hdr = request->desc->hdr;
>  		else if (request->desc->next_desc)
> -			hdr = (request->desc + 1)->hdr1;
> +			hdr = ((struct talitos_desc *)
> +			       (edesc->buf + edesc->dma_len))->hdr1;
>  		else
>  			hdr = request->desc->hdr1;
>  
> @@ -476,8 +479,14 @@ static u32 current_desc_hdr(struct device *dev, int ch)
>  		}
>  	}
>  
> -	if (priv->chan[ch].fifo[iter].desc->next_desc == cur_desc)
> -		return (priv->chan[ch].fifo[iter].desc + 1)->hdr;
> +	if (priv->chan[ch].fifo[iter].desc->next_desc == cur_desc) {
> +		struct talitos_edesc *edesc;
> +
> +		edesc = container_of(priv->chan[ch].fifo[iter].desc,
> +				     struct talitos_edesc, desc);
> +		return ((struct talitos_desc *)
> +			(edesc->buf + edesc->dma_len))->hdr;
> +	}
>  
>  	return priv->chan[ch].fifo[iter].desc->hdr;
>  }
> @@ -1402,15 +1411,11 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
>  	edesc->dst_nents = dst_nents;
>  	edesc->iv_dma = iv_dma;
>  	edesc->dma_len = dma_len;
> -	if (dma_len) {
> -		void *addr = &edesc->link_tbl[0];
> -
> -		if (is_sec1 && !dst)
> -			addr += sizeof(struct talitos_desc);
> -		edesc->dma_link_tbl = dma_map_single(dev, addr,
> +	if (dma_len)
> +		edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0],
>  						     edesc->dma_len,
>  						     DMA_BIDIRECTIONAL);
> -	}
> +
>  	return edesc;
>  }
>  
> @@ -1722,14 +1727,16 @@ static void common_nonsnoop_hash_unmap(struct device *dev,
>  	struct talitos_private *priv = dev_get_drvdata(dev);
>  	bool is_sec1 = has_ftr_sec1(priv);
>  	struct talitos_desc *desc = &edesc->desc;
> -	struct talitos_desc *desc2 = desc + 1;
> +	struct talitos_desc *desc2 = (struct talitos_desc *)
> +				     (edesc->buf + edesc->dma_len);
>  
>  	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
>  	if (desc->next_desc &&
>  	    desc->ptr[5].ptr != desc2->ptr[5].ptr)
>  		unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE);
>  
> -	talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
> +	if (req_ctx->psrc)
> +		talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
>  
>  	/* When using hashctx-in, must unmap it. */
>  	if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1))
> @@ -1796,7 +1803,6 @@ static void talitos_handle_buggy_hash(struct talitos_ctx *ctx,
>  
>  static int common_nonsnoop_hash(struct talitos_edesc *edesc,
>  				struct ahash_request *areq, unsigned int length,
> -				unsigned int offset,
>  				void (*callback) (struct device *dev,
>  						  struct talitos_desc *desc,
>  						  void *context, int error))
> @@ -1835,9 +1841,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
>  
>  	sg_count = edesc->src_nents ?: 1;
>  	if (is_sec1 && sg_count > 1)
> -		sg_pcopy_to_buffer(req_ctx->psrc, sg_count,
> -				   edesc->buf + sizeof(struct talitos_desc),
> -				   length, req_ctx->nbuf);
> +		sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length);
>  	else if (length)
>  		sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count,
>  				      DMA_TO_DEVICE);
> @@ -1850,7 +1854,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
>  				       DMA_TO_DEVICE);
>  	} else {
>  		sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
> -					  &desc->ptr[3], sg_count, offset, 0);
> +					  &desc->ptr[3], sg_count, 0, 0);
>  		if (sg_count > 1)
>  			sync_needed = true;
>  	}
> @@ -1874,7 +1878,8 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
>  		talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]);
>  
>  	if (is_sec1 && req_ctx->nbuf && length) {
> -		struct talitos_desc *desc2 = desc + 1;
> +		struct talitos_desc *desc2 = (struct talitos_desc *)
> +					     (edesc->buf + edesc->dma_len);
>  		dma_addr_t next_desc;
>  
>  		memset(desc2, 0, sizeof(*desc2));
> @@ -1895,7 +1900,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
>  						      DMA_TO_DEVICE);
>  		copy_talitos_ptr(&desc2->ptr[2], &desc->ptr[2], is_sec1);
>  		sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
> -					  &desc2->ptr[3], sg_count, offset, 0);
> +					  &desc2->ptr[3], sg_count, 0, 0);
>  		if (sg_count > 1)
>  			sync_needed = true;
>  		copy_talitos_ptr(&desc2->ptr[5], &desc->ptr[5], is_sec1);
> @@ -2006,7 +2011,6 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
>  	struct device *dev = ctx->dev;
>  	struct talitos_private *priv = dev_get_drvdata(dev);
>  	bool is_sec1 = has_ftr_sec1(priv);
> -	int offset = 0;
>  	u8 *ctx_buf = req_ctx->buf[req_ctx->buf_idx];
>  
>  	if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
> @@ -2046,6 +2050,9 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
>  			sg_chain(req_ctx->bufsl, 2, areq->src);
>  		req_ctx->psrc = req_ctx->bufsl;
>  	} else if (is_sec1 && req_ctx->nbuf && req_ctx->nbuf < blocksize) {
> +		int offset;
> +		struct scatterlist *sg;
> +
>  		if (nbytes_to_hash > blocksize)
>  			offset = blocksize - req_ctx->nbuf;
>  		else
> @@ -2058,7 +2065,18 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
>  		sg_copy_to_buffer(areq->src, nents,
>  				  ctx_buf + req_ctx->nbuf, offset);
>  		req_ctx->nbuf += offset;
> -		req_ctx->psrc = areq->src;
> +		for (sg = areq->src; sg && offset >= sg->length;
> +		     offset -= sg->length, sg = sg_next(sg))
> +			;
> +		if (offset) {
> +			sg_init_table(req_ctx->bufsl, 2);
> +			sg_set_buf(req_ctx->bufsl, sg_virt(sg) + offset,
> +				   sg->length - offset);
> +			sg_chain(req_ctx->bufsl, 2, sg_next(sg));
> +			req_ctx->psrc = req_ctx->bufsl;
> +		} else {
> +			req_ctx->psrc = sg;
> +		}
>  	} else
>  		req_ctx->psrc = areq->src;
>  
> @@ -2098,8 +2116,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
>  	if (ctx->keylen && (req_ctx->first || req_ctx->last))
>  		edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
>  
> -	return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, offset,
> -				    ahash_done);
> +	return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, ahash_done);
>  }
>  
>  static int ahash_update(struct ahash_request *areq)
>
Christophe Leroy June 14, 2019, 7:57 a.m. UTC | #2
Le 13/06/2019 à 21:07, Horia Geanta a écrit :
> On 6/13/2019 3:48 PM, Christophe Leroy wrote:
>> On SEC1, hash provides wrong result when performing hashing in several
>> steps with input data SG list has more than one element. This was
>> detected with CONFIG_CRYPTO_MANAGER_EXTRA_TESTS:
>>
>> [   44.185947] alg: hash: md5-talitos test failed (wrong result) on test vector 6, cfg="random: may_sleep use_finup src_divs=[<reimport>25.88%@+8063, <flush>24.19%@+9588, 28.63%@+16333, <reimport>4.60%@+6756, 16.70%@+16281] dst_divs=[71.61%@alignmask+16361, 14.36%@+7756, 14.3%@+"
>> [   44.325122] alg: hash: sha1-talitos test failed (wrong result) on test vector 3, cfg="random: inplace use_final src_divs=[<flush,nosimd>16.56%@+16378, <reimport>52.0%@+16329, 21.42%@alignmask+16380, 10.2%@alignmask+16380] iv_offset=39"
>> [   44.493500] alg: hash: sha224-talitos test failed (wrong result) on test vector 4, cfg="random: use_final nosimd src_divs=[<reimport>52.27%@+7401, <reimport>17.34%@+16285, <flush>17.71%@+26, 12.68%@+10644] iv_offset=43"
>> [   44.673262] alg: hash: sha256-talitos test failed (wrong result) on test vector 4, cfg="random: may_sleep use_finup src_divs=[<reimport>60.6%@+12790, 17.86%@+1329, <reimport>12.64%@alignmask+16300, 8.29%@+15, 0.40%@+13506, <reimport>0.51%@+16322, <reimport>0.24%@+16339] dst_divs"
>>
>> This is due to two issues:
>> - We have an overlap between the buffer used for copying the input
>> data (SEC1 doesn't do scatter/gather) and the chained descriptor.
>> - Data copy is wrong when the previous hash left less than one
>> blocksize of data to hash, implying a complement of the previous
>> block with a few bytes from the new request.
>>
> I fail to spot these issues.
> 
> IIUC in case of SEC1, the variable part of talitos_edesc structure contains
> a 2nd "chained" descriptor (talitos_desc struct) followed by an area
> dedicated to linearizing the input (in case input is scattered).
> 
> Where is the overlap?

talitos_sg_map() maps the area starting at edesc->dma_link_tbl, which 
corresponds to the start of the variable part of talitos_edesc 
structure. When we use the second descriptor, the data is after that 
descriptor, but talitos_sg_map() is not aware of it so it maps the 
second descriptor followed by part of the data instead of mapping the data.

Christophe


> 
>> Fix it by:
>> - Moving the second descriptor after the buffer, as moving the buffer
>> after the descriptor would make it more complex for other cipher
>> operations (AEAD, ABLKCIPHER)
>> - Rebuiding a new data SG list without the bytes taken from the new
>> request to complete the previous one.
>>
>> Preceding patch ("crypto: talitos - move struct talitos_edesc into
>> talitos.h") as required for this change to build properly.
>>
>> Fixes: 37b5e8897eb5 ("crypto: talitos - chain in buffered data for ahash on SEC1")
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>>   drivers/crypto/talitos.c | 63 ++++++++++++++++++++++++++++++------------------
>>   1 file changed, 40 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
>> index 5b401aec6c84..4f03baef952b 100644
>> --- a/drivers/crypto/talitos.c
>> +++ b/drivers/crypto/talitos.c
>> @@ -336,15 +336,18 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
>>   	tail = priv->chan[ch].tail;
>>   	while (priv->chan[ch].fifo[tail].desc) {
>>   		__be32 hdr;
>> +		struct talitos_edesc *edesc;
>>   
>>   		request = &priv->chan[ch].fifo[tail];
>> +		edesc = container_of(request->desc, struct talitos_edesc, desc);
>>   
>>   		/* descriptors with their done bits set don't get the error */
>>   		rmb();
>>   		if (!is_sec1)
>>   			hdr = request->desc->hdr;
>>   		else if (request->desc->next_desc)
>> -			hdr = (request->desc + 1)->hdr1;
>> +			hdr = ((struct talitos_desc *)
>> +			       (edesc->buf + edesc->dma_len))->hdr1;
>>   		else
>>   			hdr = request->desc->hdr1;
>>   
>> @@ -476,8 +479,14 @@ static u32 current_desc_hdr(struct device *dev, int ch)
>>   		}
>>   	}
>>   
>> -	if (priv->chan[ch].fifo[iter].desc->next_desc == cur_desc)
>> -		return (priv->chan[ch].fifo[iter].desc + 1)->hdr;
>> +	if (priv->chan[ch].fifo[iter].desc->next_desc == cur_desc) {
>> +		struct talitos_edesc *edesc;
>> +
>> +		edesc = container_of(priv->chan[ch].fifo[iter].desc,
>> +				     struct talitos_edesc, desc);
>> +		return ((struct talitos_desc *)
>> +			(edesc->buf + edesc->dma_len))->hdr;
>> +	}
>>   
>>   	return priv->chan[ch].fifo[iter].desc->hdr;
>>   }
>> @@ -1402,15 +1411,11 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
>>   	edesc->dst_nents = dst_nents;
>>   	edesc->iv_dma = iv_dma;
>>   	edesc->dma_len = dma_len;
>> -	if (dma_len) {
>> -		void *addr = &edesc->link_tbl[0];
>> -
>> -		if (is_sec1 && !dst)
>> -			addr += sizeof(struct talitos_desc);
>> -		edesc->dma_link_tbl = dma_map_single(dev, addr,
>> +	if (dma_len)
>> +		edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0],
>>   						     edesc->dma_len,
>>   						     DMA_BIDIRECTIONAL);
>> -	}
>> +
>>   	return edesc;
>>   }
>>   
>> @@ -1722,14 +1727,16 @@ static void common_nonsnoop_hash_unmap(struct device *dev,
>>   	struct talitos_private *priv = dev_get_drvdata(dev);
>>   	bool is_sec1 = has_ftr_sec1(priv);
>>   	struct talitos_desc *desc = &edesc->desc;
>> -	struct talitos_desc *desc2 = desc + 1;
>> +	struct talitos_desc *desc2 = (struct talitos_desc *)
>> +				     (edesc->buf + edesc->dma_len);
>>   
>>   	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
>>   	if (desc->next_desc &&
>>   	    desc->ptr[5].ptr != desc2->ptr[5].ptr)
>>   		unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE);
>>   
>> -	talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
>> +	if (req_ctx->psrc)
>> +		talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
>>   
>>   	/* When using hashctx-in, must unmap it. */
>>   	if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1))
>> @@ -1796,7 +1803,6 @@ static void talitos_handle_buggy_hash(struct talitos_ctx *ctx,
>>   
>>   static int common_nonsnoop_hash(struct talitos_edesc *edesc,
>>   				struct ahash_request *areq, unsigned int length,
>> -				unsigned int offset,
>>   				void (*callback) (struct device *dev,
>>   						  struct talitos_desc *desc,
>>   						  void *context, int error))
>> @@ -1835,9 +1841,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
>>   
>>   	sg_count = edesc->src_nents ?: 1;
>>   	if (is_sec1 && sg_count > 1)
>> -		sg_pcopy_to_buffer(req_ctx->psrc, sg_count,
>> -				   edesc->buf + sizeof(struct talitos_desc),
>> -				   length, req_ctx->nbuf);
>> +		sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length);
>>   	else if (length)
>>   		sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count,
>>   				      DMA_TO_DEVICE);
>> @@ -1850,7 +1854,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
>>   				       DMA_TO_DEVICE);
>>   	} else {
>>   		sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
>> -					  &desc->ptr[3], sg_count, offset, 0);
>> +					  &desc->ptr[3], sg_count, 0, 0);
>>   		if (sg_count > 1)
>>   			sync_needed = true;
>>   	}
>> @@ -1874,7 +1878,8 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
>>   		talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]);
>>   
>>   	if (is_sec1 && req_ctx->nbuf && length) {
>> -		struct talitos_desc *desc2 = desc + 1;
>> +		struct talitos_desc *desc2 = (struct talitos_desc *)
>> +					     (edesc->buf + edesc->dma_len);
>>   		dma_addr_t next_desc;
>>   
>>   		memset(desc2, 0, sizeof(*desc2));
>> @@ -1895,7 +1900,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
>>   						      DMA_TO_DEVICE);
>>   		copy_talitos_ptr(&desc2->ptr[2], &desc->ptr[2], is_sec1);
>>   		sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
>> -					  &desc2->ptr[3], sg_count, offset, 0);
>> +					  &desc2->ptr[3], sg_count, 0, 0);
>>   		if (sg_count > 1)
>>   			sync_needed = true;
>>   		copy_talitos_ptr(&desc2->ptr[5], &desc->ptr[5], is_sec1);
>> @@ -2006,7 +2011,6 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
>>   	struct device *dev = ctx->dev;
>>   	struct talitos_private *priv = dev_get_drvdata(dev);
>>   	bool is_sec1 = has_ftr_sec1(priv);
>> -	int offset = 0;
>>   	u8 *ctx_buf = req_ctx->buf[req_ctx->buf_idx];
>>   
>>   	if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
>> @@ -2046,6 +2050,9 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
>>   			sg_chain(req_ctx->bufsl, 2, areq->src);
>>   		req_ctx->psrc = req_ctx->bufsl;
>>   	} else if (is_sec1 && req_ctx->nbuf && req_ctx->nbuf < blocksize) {
>> +		int offset;
>> +		struct scatterlist *sg;
>> +
>>   		if (nbytes_to_hash > blocksize)
>>   			offset = blocksize - req_ctx->nbuf;
>>   		else
>> @@ -2058,7 +2065,18 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
>>   		sg_copy_to_buffer(areq->src, nents,
>>   				  ctx_buf + req_ctx->nbuf, offset);
>>   		req_ctx->nbuf += offset;
>> -		req_ctx->psrc = areq->src;
>> +		for (sg = areq->src; sg && offset >= sg->length;
>> +		     offset -= sg->length, sg = sg_next(sg))
>> +			;
>> +		if (offset) {
>> +			sg_init_table(req_ctx->bufsl, 2);
>> +			sg_set_buf(req_ctx->bufsl, sg_virt(sg) + offset,
>> +				   sg->length - offset);
>> +			sg_chain(req_ctx->bufsl, 2, sg_next(sg));
>> +			req_ctx->psrc = req_ctx->bufsl;
>> +		} else {
>> +			req_ctx->psrc = sg;
>> +		}
>>   	} else
>>   		req_ctx->psrc = areq->src;
>>   
>> @@ -2098,8 +2116,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
>>   	if (ctx->keylen && (req_ctx->first || req_ctx->last))
>>   		edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
>>   
>> -	return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, offset,
>> -				    ahash_done);
>> +	return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, ahash_done);
>>   }
>>   
>>   static int ahash_update(struct ahash_request *areq)
>>
Horia Geanta June 14, 2019, 11:32 a.m. UTC | #3
On 6/13/2019 3:48 PM, Christophe Leroy wrote:
> @@ -336,15 +336,18 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
>  	tail = priv->chan[ch].tail;
>  	while (priv->chan[ch].fifo[tail].desc) {
>  		__be32 hdr;
> +		struct talitos_edesc *edesc;
>  
>  		request = &priv->chan[ch].fifo[tail];
> +		edesc = container_of(request->desc, struct talitos_edesc, desc);
Not needed for all cases, should be moved to the block that uses it.

>  
>  		/* descriptors with their done bits set don't get the error */
>  		rmb();
>  		if (!is_sec1)
>  			hdr = request->desc->hdr;
>  		else if (request->desc->next_desc)
> -			hdr = (request->desc + 1)->hdr1;
> +			hdr = ((struct talitos_desc *)
> +			       (edesc->buf + edesc->dma_len))->hdr1;
>  		else
>  			hdr = request->desc->hdr1;
>  
[snip]
> @@ -2058,7 +2065,18 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
>  		sg_copy_to_buffer(areq->src, nents,
>  				  ctx_buf + req_ctx->nbuf, offset);
>  		req_ctx->nbuf += offset;
> -		req_ctx->psrc = areq->src;
> +		for (sg = areq->src; sg && offset >= sg->length;
> +		     offset -= sg->length, sg = sg_next(sg))
> +			;
> +		if (offset) {
> +			sg_init_table(req_ctx->bufsl, 2);
> +			sg_set_buf(req_ctx->bufsl, sg_virt(sg) + offset,
> +				   sg->length - offset);
> +			sg_chain(req_ctx->bufsl, 2, sg_next(sg));
> +			req_ctx->psrc = req_ctx->bufsl;
Isn't this what scatterwalk_ffwd() does?

Horia
Christophe Leroy June 15, 2019, 8:18 a.m. UTC | #4
Le 14/06/2019 à 13:32, Horia Geanta a écrit :
> On 6/13/2019 3:48 PM, Christophe Leroy wrote:
>> @@ -336,15 +336,18 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
>>   	tail = priv->chan[ch].tail;
>>   	while (priv->chan[ch].fifo[tail].desc) {
>>   		__be32 hdr;
>> +		struct talitos_edesc *edesc;
>>   
>>   		request = &priv->chan[ch].fifo[tail];
>> +		edesc = container_of(request->desc, struct talitos_edesc, desc);
> Not needed for all cases, should be moved to the block that uses it.

Ok.

> 
>>   
>>   		/* descriptors with their done bits set don't get the error */
>>   		rmb();
>>   		if (!is_sec1)
>>   			hdr = request->desc->hdr;
>>   		else if (request->desc->next_desc)
>> -			hdr = (request->desc + 1)->hdr1;
>> +			hdr = ((struct talitos_desc *)
>> +			       (edesc->buf + edesc->dma_len))->hdr1;
>>   		else
>>   			hdr = request->desc->hdr1;
>>   
> [snip]
>> @@ -2058,7 +2065,18 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
>>   		sg_copy_to_buffer(areq->src, nents,
>>   				  ctx_buf + req_ctx->nbuf, offset);
>>   		req_ctx->nbuf += offset;
>> -		req_ctx->psrc = areq->src;
>> +		for (sg = areq->src; sg && offset >= sg->length;
>> +		     offset -= sg->length, sg = sg_next(sg))
>> +			;
>> +		if (offset) {
>> +			sg_init_table(req_ctx->bufsl, 2);
>> +			sg_set_buf(req_ctx->bufsl, sg_virt(sg) + offset,
>> +				   sg->length - offset);
>> +			sg_chain(req_ctx->bufsl, 2, sg_next(sg));
>> +			req_ctx->psrc = req_ctx->bufsl;
> Isn't this what scatterwalk_ffwd() does?

Thanks for pointing this, I wasn't aware of that function. Looking at it 
it seems to do the same. Unfortunately, some tests fails with 'wrong 
result' when using it instead.

Comparing the results of scatterwalk_ffwd() with what I get with my open 
codying, I see the following difference:

scatterwalk_ffwd() uses sg_page(sg) + sg->offset + len

while my open codying results in virt_to_page(sg_virt(sg) + len)

When sg->offset + len is greater than PAGE_SIZE, the resulting SG entry 
is different allthough valid in both cases. I think this difference 
results in sg_copy_to_buffer() failing. I'm still investigating. Any idea ?

Christophe

> 
> Horia
>
Christophe Leroy June 15, 2019, 8:37 a.m. UTC | #5
Le 15/06/2019 à 10:18, Christophe Leroy a écrit :
>>> @@ -2058,7 +2065,18 @@ static int ahash_process_req(struct 
>>> ahash_request *areq, unsigned int nbytes)
>>>           sg_copy_to_buffer(areq->src, nents,
>>>                     ctx_buf + req_ctx->nbuf, offset);
>>>           req_ctx->nbuf += offset;
>>> -        req_ctx->psrc = areq->src;
>>> +        for (sg = areq->src; sg && offset >= sg->length;
>>> +             offset -= sg->length, sg = sg_next(sg))
>>> +            ;
>>> +        if (offset) {
>>> +            sg_init_table(req_ctx->bufsl, 2);
>>> +            sg_set_buf(req_ctx->bufsl, sg_virt(sg) + offset,
>>> +                   sg->length - offset);
>>> +            sg_chain(req_ctx->bufsl, 2, sg_next(sg));
>>> +            req_ctx->psrc = req_ctx->bufsl;
>> Isn't this what scatterwalk_ffwd() does?
> 
> Thanks for pointing this, I wasn't aware of that function. Looking at it 
> it seems to do the same. Unfortunately, some tests fails with 'wrong 
> result' when using it instead.
> 
> Comparing the results of scatterwalk_ffwd() with what I get with my open 
> codying, I see the following difference:
> 
> scatterwalk_ffwd() uses sg_page(sg) + sg->offset + len
> 
> while my open codying results in virt_to_page(sg_virt(sg) + len)
> 
> When sg->offset + len is greater than PAGE_SIZE, the resulting SG entry 
> is different allthough valid in both cases. I think this difference 
> results in sg_copy_to_buffer() failing. I'm still investigating. Any idea ?
> 

After adding some dumps, I confirm the suspicion:

My board uses 16k pages.

SG list when not using scatterwalk_ffwd()
[   64.120540] sg c6386794 page c7fc1c60 offset 22 len 213
[   64.120579] sg c6386a48 page c7fc1b80 offset 4 len 2
[   64.120618] sg c6386a5c page c7fc1b00 offset 3 len 17
[   64.120658] sg c6386a70 page c7fc1b40 offset 2 len 10

SG list when using scatterwalk_ffwd()
[   64.120743] sg c6386794 page c7fc1c40 offset 16406 len 213
[   64.120782] sg c6386a48 page c7fc1b80 offset 4 len 2
[   64.120821] sg c6386a5c page c7fc1b00 offset 3 len 17
[   64.120861] sg c6386a70 page c7fc1b40 offset 2 len 10

Content of the SG list:
[   64.120975] 00000000: e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 60 b8 10
[   64.121021] 00000010: 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90
[   64.121067] 00000020: e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 60 b8 10
[   64.121112] 00000030: 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90
[   64.121157] 00000040: e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 60 b8 10
[   64.121202] 00000050: 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90
[   64.121247] 00000060: e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 60 a8 10
[   64.121292] 00000070: 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90
[   64.121337] 00000080: e8 40 98 f0 48 a0 f8 50 28 00 58 b0 08 60 b8 10
[   64.121382] 00000090: 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90
[   64.121427] 000000a0: e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 60 b8 10
[   64.121472] 000000b0: 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90
[   64.121517] 000000c0: e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 60 b8 10
[   64.121557] 000000d0: 68 c0 18 30 c8
[   64.121598] 00000000: 20 78
[   64.121646] 00000000: d0 28 80 f8 30 88 e0 38 90 e8 40 98 f0 48 a0 f8
[   64.121684] 00000010: 50
[   64.121729] 00000000: a8 00 58 b0 08 60 b8 10 68 c0

Content of the buffer after the copy from the list:
[   64.121790] 00000000: e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 60 b8 10
[   64.121836] 00000010: 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90
[   64.121881] 00000020: e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 60 b8 10
[   64.121927] 00000030: 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90
[   64.121972] 00000040: e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 60 b8 10
[   64.122017] 00000050: 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90
[   64.122062] 00000060: e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 60 a8 10
[   64.122107] 00000070: 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90
[   64.122152] 00000080: e8 40 98 f0 48 a0 f8 50 28 00 58 b0 08 60 b8 10
[   64.122197] 00000090: 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90
[   64.122243] 000000a0: e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 60 b8 10
[   64.122288] 000000b0: 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90
[   64.122333] 000000c0: e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 60 b8 10
[   64.122378] 000000d0: 68 c0 18 30 c8 d8 b0 08 60 b8 10 68 c0 18 70 c8
[   64.122424] 000000e0: 20 78 d0 28 80 d8 30 88 e0 38 90 e8 40 98 f0 48
[   64.122462] 000000f0: a0 f8

As you can see, the data following the first block should be
20 78 d0 28 80 f8 30 88 e0 38 90 e8 40 98 f0 48 a0 f8 50 a8 00 58 b0 08 
60 b8 10 68 c0

Instead it is
d8 b0 08 60 b8 10 68 c0 18 70 c8 20 78 d0 28 80 d8 30 88 e0 38 90 e8 40 
98 f0 48 a0 f8

So I guess there is something wrong with sg_copy_to_buffer()

Christophe
diff mbox series

Patch

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 5b401aec6c84..4f03baef952b 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -336,15 +336,18 @@  static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
 	tail = priv->chan[ch].tail;
 	while (priv->chan[ch].fifo[tail].desc) {
 		__be32 hdr;
+		struct talitos_edesc *edesc;
 
 		request = &priv->chan[ch].fifo[tail];
+		edesc = container_of(request->desc, struct talitos_edesc, desc);
 
 		/* descriptors with their done bits set don't get the error */
 		rmb();
 		if (!is_sec1)
 			hdr = request->desc->hdr;
 		else if (request->desc->next_desc)
-			hdr = (request->desc + 1)->hdr1;
+			hdr = ((struct talitos_desc *)
+			       (edesc->buf + edesc->dma_len))->hdr1;
 		else
 			hdr = request->desc->hdr1;
 
@@ -476,8 +479,14 @@  static u32 current_desc_hdr(struct device *dev, int ch)
 		}
 	}
 
-	if (priv->chan[ch].fifo[iter].desc->next_desc == cur_desc)
-		return (priv->chan[ch].fifo[iter].desc + 1)->hdr;
+	if (priv->chan[ch].fifo[iter].desc->next_desc == cur_desc) {
+		struct talitos_edesc *edesc;
+
+		edesc = container_of(priv->chan[ch].fifo[iter].desc,
+				     struct talitos_edesc, desc);
+		return ((struct talitos_desc *)
+			(edesc->buf + edesc->dma_len))->hdr;
+	}
 
 	return priv->chan[ch].fifo[iter].desc->hdr;
 }
@@ -1402,15 +1411,11 @@  static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 	edesc->dst_nents = dst_nents;
 	edesc->iv_dma = iv_dma;
 	edesc->dma_len = dma_len;
-	if (dma_len) {
-		void *addr = &edesc->link_tbl[0];
-
-		if (is_sec1 && !dst)
-			addr += sizeof(struct talitos_desc);
-		edesc->dma_link_tbl = dma_map_single(dev, addr,
+	if (dma_len)
+		edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0],
 						     edesc->dma_len,
 						     DMA_BIDIRECTIONAL);
-	}
+
 	return edesc;
 }
 
@@ -1722,14 +1727,16 @@  static void common_nonsnoop_hash_unmap(struct device *dev,
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	bool is_sec1 = has_ftr_sec1(priv);
 	struct talitos_desc *desc = &edesc->desc;
-	struct talitos_desc *desc2 = desc + 1;
+	struct talitos_desc *desc2 = (struct talitos_desc *)
+				     (edesc->buf + edesc->dma_len);
 
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
 	if (desc->next_desc &&
 	    desc->ptr[5].ptr != desc2->ptr[5].ptr)
 		unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE);
 
-	talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
+	if (req_ctx->psrc)
+		talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
 
 	/* When using hashctx-in, must unmap it. */
 	if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1))
@@ -1796,7 +1803,6 @@  static void talitos_handle_buggy_hash(struct talitos_ctx *ctx,
 
 static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 				struct ahash_request *areq, unsigned int length,
-				unsigned int offset,
 				void (*callback) (struct device *dev,
 						  struct talitos_desc *desc,
 						  void *context, int error))
@@ -1835,9 +1841,7 @@  static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 
 	sg_count = edesc->src_nents ?: 1;
 	if (is_sec1 && sg_count > 1)
-		sg_pcopy_to_buffer(req_ctx->psrc, sg_count,
-				   edesc->buf + sizeof(struct talitos_desc),
-				   length, req_ctx->nbuf);
+		sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length);
 	else if (length)
 		sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count,
 				      DMA_TO_DEVICE);
@@ -1850,7 +1854,7 @@  static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 				       DMA_TO_DEVICE);
 	} else {
 		sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
-					  &desc->ptr[3], sg_count, offset, 0);
+					  &desc->ptr[3], sg_count, 0, 0);
 		if (sg_count > 1)
 			sync_needed = true;
 	}
@@ -1874,7 +1878,8 @@  static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 		talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]);
 
 	if (is_sec1 && req_ctx->nbuf && length) {
-		struct talitos_desc *desc2 = desc + 1;
+		struct talitos_desc *desc2 = (struct talitos_desc *)
+					     (edesc->buf + edesc->dma_len);
 		dma_addr_t next_desc;
 
 		memset(desc2, 0, sizeof(*desc2));
@@ -1895,7 +1900,7 @@  static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 						      DMA_TO_DEVICE);
 		copy_talitos_ptr(&desc2->ptr[2], &desc->ptr[2], is_sec1);
 		sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
-					  &desc2->ptr[3], sg_count, offset, 0);
+					  &desc2->ptr[3], sg_count, 0, 0);
 		if (sg_count > 1)
 			sync_needed = true;
 		copy_talitos_ptr(&desc2->ptr[5], &desc->ptr[5], is_sec1);
@@ -2006,7 +2011,6 @@  static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 	struct device *dev = ctx->dev;
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	bool is_sec1 = has_ftr_sec1(priv);
-	int offset = 0;
 	u8 *ctx_buf = req_ctx->buf[req_ctx->buf_idx];
 
 	if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
@@ -2046,6 +2050,9 @@  static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 			sg_chain(req_ctx->bufsl, 2, areq->src);
 		req_ctx->psrc = req_ctx->bufsl;
 	} else if (is_sec1 && req_ctx->nbuf && req_ctx->nbuf < blocksize) {
+		int offset;
+		struct scatterlist *sg;
+
 		if (nbytes_to_hash > blocksize)
 			offset = blocksize - req_ctx->nbuf;
 		else
@@ -2058,7 +2065,18 @@  static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 		sg_copy_to_buffer(areq->src, nents,
 				  ctx_buf + req_ctx->nbuf, offset);
 		req_ctx->nbuf += offset;
-		req_ctx->psrc = areq->src;
+		for (sg = areq->src; sg && offset >= sg->length;
+		     offset -= sg->length, sg = sg_next(sg))
+			;
+		if (offset) {
+			sg_init_table(req_ctx->bufsl, 2);
+			sg_set_buf(req_ctx->bufsl, sg_virt(sg) + offset,
+				   sg->length - offset);
+			sg_chain(req_ctx->bufsl, 2, sg_next(sg));
+			req_ctx->psrc = req_ctx->bufsl;
+		} else {
+			req_ctx->psrc = sg;
+		}
 	} else
 		req_ctx->psrc = areq->src;
 
@@ -2098,8 +2116,7 @@  static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 	if (ctx->keylen && (req_ctx->first || req_ctx->last))
 		edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
 
-	return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, offset,
-				    ahash_done);
+	return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, ahash_done);
 }
 
 static int ahash_update(struct ahash_request *areq)