diff mbox series

[v2,01/13] dmaengine: pl330: Remove the burst limit for quirk 'NO-FLUSHP'

Message ID 1591665267-37713-2-git-send-email-sugar.zhang@rock-chips.com (mailing list archive)
State New, archived
Headers show
Series Patches to improve transfer efficiency for Rockchip SoCs. | expand

Commit Message

Sugar Zhang June 9, 2020, 1:14 a.m. UTC
There is no reason to limit the performance on the 'NO-FLUSHP' SoCs,
cuz these platforms are just that the 'FLUSHP' instruction is broken.
so, remove the limit to improve the efficiency.

Signed-off-by: Sugar Zhang <sugar.zhang@rock-chips.com>
---

Changes in v2: None

 drivers/dma/pl330.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

Comments

Vinod Koul June 24, 2020, 7:54 a.m. UTC | #1
On 09-06-20, 09:14, Sugar Zhang wrote:
> There is no reason to limit the performance on the 'NO-FLUSHP' SoCs,
> cuz these platforms are just that the 'FLUSHP' instruction is broken.

Lets not use terms like cuz... 'because' is perfect term :)

It can rephrased to:
There is no reason to limit the performance on the 'NO-FLUSHP' SoCs
beacuse 'FLUSHP' instruction is broken on these platforms, so remove the
limit to improve the efficiency


> so, remove the limit to improve the efficiency.
> 
> Signed-off-by: Sugar Zhang <sugar.zhang@rock-chips.com>
> ---
> 
> Changes in v2: None
> 
>  drivers/dma/pl330.c | 34 ++++++++++++++++++++++------------
>  1 file changed, 22 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
> index 6a158ee..ff0a91f 100644
> --- a/drivers/dma/pl330.c
> +++ b/drivers/dma/pl330.c
> @@ -1183,9 +1183,6 @@ static inline int _ldst_peripheral(struct pl330_dmac *pl330,
>  {
>  	int off = 0;
>  
> -	if (pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP)
> -		cond = BURST;
> -
>  	/*
>  	 * do FLUSHP at beginning to clear any stale dma requests before the
>  	 * first WFP.
> @@ -1231,8 +1228,9 @@ static int _bursts(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[],
>  }
>  
>  /*
> - * transfer dregs with single transfers to peripheral, or a reduced size burst
> - * for mem-to-mem.
> + * only the unaligned bursts transfers have the dregs.
> + * transfer dregs with a reduced size burst to peripheral,
> + * or a reduced size burst for mem-to-mem.

This is not related to broken flush and should be a different patch
explaining why this changes were done

>   */
>  static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[],
>  		const struct _xfer_spec *pxs, int transfer_length)
> @@ -1247,8 +1245,23 @@ static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[],
>  	case DMA_MEM_TO_DEV:
>  		/* fall through */
>  	case DMA_DEV_TO_MEM:
> -		off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs,
> -			transfer_length, SINGLE);
> +		/*
> +		 * dregs_len = (total bytes - BURST_TO_BYTE(bursts, ccr)) /
> +		 *             BRST_SIZE(ccr)
> +		 * the dregs len must be smaller than burst len,
> +		 * so, for higher efficiency, we can modify CCR
> +		 * to use a reduced size burst len for the dregs.
> +		 */
> +		dregs_ccr = pxs->ccr;
> +		dregs_ccr &= ~((0xf << CC_SRCBRSTLEN_SHFT) |
> +			(0xf << CC_DSTBRSTLEN_SHFT));
> +		dregs_ccr |= (((transfer_length - 1) & 0xf) <<
> +			CC_SRCBRSTLEN_SHFT);
> +		dregs_ccr |= (((transfer_length - 1) & 0xf) <<
> +			CC_DSTBRSTLEN_SHFT);
> +		off += _emit_MOV(dry_run, &buf[off], CCR, dregs_ccr);
> +		off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs, 1,
> +					BURST);
>  		break;
>  
>  	case DMA_MEM_TO_MEM:
> @@ -2221,9 +2234,7 @@ static bool pl330_prep_slave_fifo(struct dma_pl330_chan *pch,
>  
>  static int fixup_burst_len(int max_burst_len, int quirks)
>  {
> -	if (quirks & PL330_QUIRK_BROKEN_NO_FLUSHP)
> -		return 1;
> -	else if (max_burst_len > PL330_MAX_BURST)
> +	if (max_burst_len > PL330_MAX_BURST)
>  		return PL330_MAX_BURST;
>  	else if (max_burst_len < 1)
>  		return 1;
> @@ -3128,8 +3139,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
>  	pd->dst_addr_widths = PL330_DMA_BUSWIDTHS;
>  	pd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
>  	pd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
> -	pd->max_burst = ((pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP) ?
> -			 1 : PL330_MAX_BURST);
> +	pd->max_burst = PL330_MAX_BURST;
>  
>  	ret = dma_async_device_register(pd);
>  	if (ret) {
> -- 
> 2.7.4
> 
>
Sugar Zhang June 29, 2020, 1:38 a.m. UTC | #2
On 2020/6/24 15:54, Vinod Koul wrote:
> On 09-06-20, 09:14, Sugar Zhang wrote:
>> There is no reason to limit the performance on the 'NO-FLUSHP' SoCs,
>> cuz these platforms are just that the 'FLUSHP' instruction is broken.
> Lets not use terms like cuz... 'because' is perfect term :)
>
> It can rephrased to:
> There is no reason to limit the performance on the 'NO-FLUSHP' SoCs
> beacuse 'FLUSHP' instruction is broken on these platforms, so remove the
> limit to improve the efficiency
Thanks, I will send a v3 including these.
>
>> so, remove the limit to improve the efficiency.
>>
>> Signed-off-by: Sugar Zhang <sugar.zhang@rock-chips.com>
>> ---
>>
>> Changes in v2: None
>>
>>   drivers/dma/pl330.c | 34 ++++++++++++++++++++++------------
>>   1 file changed, 22 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
>> index 6a158ee..ff0a91f 100644
>> --- a/drivers/dma/pl330.c
>> +++ b/drivers/dma/pl330.c
>> @@ -1183,9 +1183,6 @@ static inline int _ldst_peripheral(struct pl330_dmac *pl330,
>>   {
>>   	int off = 0;
>>   
>> -	if (pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP)
>> -		cond = BURST;
>> -
>>   	/*
>>   	 * do FLUSHP at beginning to clear any stale dma requests before the
>>   	 * first WFP.
>> @@ -1231,8 +1228,9 @@ static int _bursts(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[],
>>   }
>>   
>>   /*
>> - * transfer dregs with single transfers to peripheral, or a reduced size burst
>> - * for mem-to-mem.
>> + * only the unaligned bursts transfers have the dregs.
>> + * transfer dregs with a reduced size burst to peripheral,
>> + * or a reduced size burst for mem-to-mem.
> This is not related to broken flush and should be a different patch
> explaining why this changes were done
ok, I will split this patch in v3.
>>    */
>>   static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[],
>>   		const struct _xfer_spec *pxs, int transfer_length)
>> @@ -1247,8 +1245,23 @@ static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[],
>>   	case DMA_MEM_TO_DEV:
>>   		/* fall through */
>>   	case DMA_DEV_TO_MEM:
>> -		off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs,
>> -			transfer_length, SINGLE);
>> +		/*
>> +		 * dregs_len = (total bytes - BURST_TO_BYTE(bursts, ccr)) /
>> +		 *             BRST_SIZE(ccr)
>> +		 * the dregs len must be smaller than burst len,
>> +		 * so, for higher efficiency, we can modify CCR
>> +		 * to use a reduced size burst len for the dregs.
>> +		 */
>> +		dregs_ccr = pxs->ccr;
>> +		dregs_ccr &= ~((0xf << CC_SRCBRSTLEN_SHFT) |
>> +			(0xf << CC_DSTBRSTLEN_SHFT));
>> +		dregs_ccr |= (((transfer_length - 1) & 0xf) <<
>> +			CC_SRCBRSTLEN_SHFT);
>> +		dregs_ccr |= (((transfer_length - 1) & 0xf) <<
>> +			CC_DSTBRSTLEN_SHFT);
>> +		off += _emit_MOV(dry_run, &buf[off], CCR, dregs_ccr);
>> +		off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs, 1,
>> +					BURST);
>>   		break;
>>   
>>   	case DMA_MEM_TO_MEM:
>> @@ -2221,9 +2234,7 @@ static bool pl330_prep_slave_fifo(struct dma_pl330_chan *pch,
>>   
>>   static int fixup_burst_len(int max_burst_len, int quirks)
>>   {
>> -	if (quirks & PL330_QUIRK_BROKEN_NO_FLUSHP)
>> -		return 1;
>> -	else if (max_burst_len > PL330_MAX_BURST)
>> +	if (max_burst_len > PL330_MAX_BURST)
>>   		return PL330_MAX_BURST;
>>   	else if (max_burst_len < 1)
>>   		return 1;
>> @@ -3128,8 +3139,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
>>   	pd->dst_addr_widths = PL330_DMA_BUSWIDTHS;
>>   	pd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
>>   	pd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
>> -	pd->max_burst = ((pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP) ?
>> -			 1 : PL330_MAX_BURST);
>> +	pd->max_burst = PL330_MAX_BURST;
>>   
>>   	ret = dma_async_device_register(pd);
>>   	if (ret) {
>> -- 
>> 2.7.4
>>
>>
diff mbox series

Patch

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 6a158ee..ff0a91f 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1183,9 +1183,6 @@  static inline int _ldst_peripheral(struct pl330_dmac *pl330,
 {
 	int off = 0;
 
-	if (pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP)
-		cond = BURST;
-
 	/*
 	 * do FLUSHP at beginning to clear any stale dma requests before the
 	 * first WFP.
@@ -1231,8 +1228,9 @@  static int _bursts(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[],
 }
 
 /*
- * transfer dregs with single transfers to peripheral, or a reduced size burst
- * for mem-to-mem.
+ * only the unaligned bursts transfers have the dregs.
+ * transfer dregs with a reduced size burst to peripheral,
+ * or a reduced size burst for mem-to-mem.
  */
 static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[],
 		const struct _xfer_spec *pxs, int transfer_length)
@@ -1247,8 +1245,23 @@  static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[],
 	case DMA_MEM_TO_DEV:
 		/* fall through */
 	case DMA_DEV_TO_MEM:
-		off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs,
-			transfer_length, SINGLE);
+		/*
+		 * dregs_len = (total bytes - BURST_TO_BYTE(bursts, ccr)) /
+		 *             BRST_SIZE(ccr)
+		 * the dregs len must be smaller than burst len,
+		 * so, for higher efficiency, we can modify CCR
+		 * to use a reduced size burst len for the dregs.
+		 */
+		dregs_ccr = pxs->ccr;
+		dregs_ccr &= ~((0xf << CC_SRCBRSTLEN_SHFT) |
+			(0xf << CC_DSTBRSTLEN_SHFT));
+		dregs_ccr |= (((transfer_length - 1) & 0xf) <<
+			CC_SRCBRSTLEN_SHFT);
+		dregs_ccr |= (((transfer_length - 1) & 0xf) <<
+			CC_DSTBRSTLEN_SHFT);
+		off += _emit_MOV(dry_run, &buf[off], CCR, dregs_ccr);
+		off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs, 1,
+					BURST);
 		break;
 
 	case DMA_MEM_TO_MEM:
@@ -2221,9 +2234,7 @@  static bool pl330_prep_slave_fifo(struct dma_pl330_chan *pch,
 
 static int fixup_burst_len(int max_burst_len, int quirks)
 {
-	if (quirks & PL330_QUIRK_BROKEN_NO_FLUSHP)
-		return 1;
-	else if (max_burst_len > PL330_MAX_BURST)
+	if (max_burst_len > PL330_MAX_BURST)
 		return PL330_MAX_BURST;
 	else if (max_burst_len < 1)
 		return 1;
@@ -3128,8 +3139,7 @@  pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	pd->dst_addr_widths = PL330_DMA_BUSWIDTHS;
 	pd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
 	pd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
-	pd->max_burst = ((pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP) ?
-			 1 : PL330_MAX_BURST);
+	pd->max_burst = PL330_MAX_BURST;
 
 	ret = dma_async_device_register(pd);
 	if (ret) {