diff mbox series

[v3] dmaengine: qcom: gpi: Add GPI immediate DMA support for SPI protocol

Message ID 20241204122059.24239-1-quic_jseerapu@quicinc.com (mailing list archive)
State Changes Requested
Headers show
Series [v3] dmaengine: qcom: gpi: Add GPI immediate DMA support for SPI protocol | expand

Commit Message

Jyothi Kumar Seerapu Dec. 4, 2024, 12:20 p.m. UTC
The DMA TRE(Transfer ring element) buffer contains the DMA
buffer address. Accessing data from this address can cause
significant delays in SPI transfers, which can be mitigated to
some extent by utilizing immediate DMA support.

QCOM GPI DMA hardware supports an immediate DMA feature for data
up to 8 bytes, storing the data directly in the DMA TRE buffer
instead of the DMA buffer address. This enhancement enables faster
SPI data transfers.

This optimization reduces the average transfer time from 25 us to
16 us for a single SPI transfer of 8 bytes length, with a clock
frequency of 50 MHz.

Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
---

v2-> v3:
   - When to enable Immediate DMA support, control is moved to GPI driver
     from SPI driver. 
   - Optimizations are done in GPI driver related to immediate dma changes.
   - Removed the immediate dma supported changes in qcom-gpi-dma.h file
     and handled in GPI driver. 

   Link to v2: 
	https://lore.kernel.org/all/20241128133351.24593-2-quic_jseerapu@quicinc.com/
	https://lore.kernel.org/all/20241128133351.24593-3-quic_jseerapu@quicinc.com/ 

v1 -> v2:
   - Separated the patches to dmaengine and spi subsystems
   - Removed the changes which are not required for this feature from
     qcom-gpi-dma.h file.
   - Removed the type conversions used in gpi_create_spi_tre.
  
   Link to v1:
	https://lore.kernel.org/lkml/20241121115201.2191-2-quic_jseerapu@quicinc.com/ 

 drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

Comments

Dmitry Baryshkov Dec. 4, 2024, 12:45 p.m. UTC | #1
On Wed, Dec 04, 2024 at 05:50:59PM +0530, Jyothi Kumar Seerapu wrote:
> The DMA TRE(Transfer ring element) buffer contains the DMA
> buffer address. Accessing data from this address can cause
> significant delays in SPI transfers, which can be mitigated to
> some extent by utilizing immediate DMA support.
> 
> QCOM GPI DMA hardware supports an immediate DMA feature for data
> up to 8 bytes, storing the data directly in the DMA TRE buffer
> instead of the DMA buffer address. This enhancement enables faster
> SPI data transfers.
> 
> This optimization reduces the average transfer time from 25 us to
> 16 us for a single SPI transfer of 8 bytes length, with a clock
> frequency of 50 MHz.
> 
> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
> ---
> 
> v2-> v3:
>    - When to enable Immediate DMA support, control is moved to GPI driver
>      from SPI driver. 
>    - Optimizations are done in GPI driver related to immediate dma changes.
>    - Removed the immediate dma supported changes in qcom-gpi-dma.h file
>      and handled in GPI driver. 
> 
>    Link to v2: 
> 	https://lore.kernel.org/all/20241128133351.24593-2-quic_jseerapu@quicinc.com/
> 	https://lore.kernel.org/all/20241128133351.24593-3-quic_jseerapu@quicinc.com/ 
> 
> v1 -> v2:
>    - Separated the patches to dmaengine and spi subsystems
>    - Removed the changes which are not required for this feature from
>      qcom-gpi-dma.h file.
>    - Removed the type conversions used in gpi_create_spi_tre.
>   
>    Link to v1:
> 	https://lore.kernel.org/lkml/20241121115201.2191-2-quic_jseerapu@quicinc.com/ 
> 
>  drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++-----
>  1 file changed, 27 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
> index 52a7c8f2498f..35451d5a81f7 100644
> --- a/drivers/dma/qcom/gpi.c
> +++ b/drivers/dma/qcom/gpi.c
> @@ -27,6 +27,7 @@
>  #define TRE_FLAGS_IEOT		BIT(9)
>  #define TRE_FLAGS_BEI		BIT(10)
>  #define TRE_FLAGS_LINK		BIT(11)
> +#define TRE_FLAGS_IMMEDIATE_DMA	BIT(16)
>  #define TRE_FLAGS_TYPE		GENMASK(23, 16)
>  
>  /* SPI CONFIG0 WD0 */
> @@ -64,6 +65,7 @@
>  
>  /* DMA TRE */
>  #define TRE_DMA_LEN		GENMASK(23, 0)
> +#define TRE_DMA_IMMEDIATE_LEN	GENMASK(3, 0)
>  
>  /* Register offsets from gpi-top */
>  #define GPII_n_CH_k_CNTXT_0_OFFS(n, k)	(0x20000 + (0x4000 * (n)) + (0x80 * (k)))
> @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
>  	dma_addr_t address;
>  	struct gpi_tre *tre;
>  	unsigned int i;
> +	int len;
> +	u8 immediate_dma;
>  
>  	/* first create config tre if applicable */
>  	if (direction == DMA_MEM_TO_DEV && spi->set_config) {
> @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
>  	tre_idx++;
>  
>  	address = sg_dma_address(sgl);
> -	tre->dword[0] = lower_32_bits(address);
> -	tre->dword[1] = upper_32_bits(address);
> +	len = sg_dma_len(sgl);
>  
> -	tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN);
> +	immediate_dma = (direction == DMA_MEM_TO_DEV) && len <= 2 * sizeof(tre->dword[0]);

inline this condition, remove extra brackets and split the line after &&.

> +
> +	/* Support Immediate dma for write transfers for data length up to 8 bytes */
> +	if (immediate_dma) {
> +		/*
> +		 * For Immediate dma, data length may not always be length of 8 bytes,
> +		 * it can be length less than 8, hence initialize both dword's with 0
> +		 */
> +		tre->dword[0] = 0;
> +		tre->dword[1] = 0;
> +		memcpy(&tre->dword[0], sg_virt(sgl), len);
> +
> +		tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN);
> +	} else {
> +		tre->dword[0] = lower_32_bits(address);
> +		tre->dword[1] = upper_32_bits(address);
> +
> +		tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN);
> +	}
>  
>  	tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
> -	if (direction == DMA_MEM_TO_DEV)
> -		tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
> +	tre->dword[3] |= u32_encode_bits(!!immediate_dma, TRE_FLAGS_IMMEDIATE_DMA);
> +	tre->dword[3] |= u32_encode_bits(!!(direction == DMA_MEM_TO_DEV),
> +					 TRE_FLAGS_IEOT);
>  
>  	for (i = 0; i < tre_idx; i++)
>  		dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0],
> -- 
> 2.17.1
>
Neil Armstrong Dec. 4, 2024, 12:49 p.m. UTC | #2
Hi,

On 04/12/2024 13:20, Jyothi Kumar Seerapu wrote:
> The DMA TRE(Transfer ring element) buffer contains the DMA
> buffer address. Accessing data from this address can cause
> significant delays in SPI transfers, which can be mitigated to
> some extent by utilizing immediate DMA support.
> 
> QCOM GPI DMA hardware supports an immediate DMA feature for data
> up to 8 bytes, storing the data directly in the DMA TRE buffer
> instead of the DMA buffer address. This enhancement enables faster
> SPI data transfers.
> 
> This optimization reduces the average transfer time from 25 us to
> 16 us for a single SPI transfer of 8 bytes length, with a clock
> frequency of 50 MHz.
> 
> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
> ---
> 
> v2-> v3:
>     - When to enable Immediate DMA support, control is moved to GPI driver
>       from SPI driver.
>     - Optimizations are done in GPI driver related to immediate dma changes.
>     - Removed the immediate dma supported changes in qcom-gpi-dma.h file
>       and handled in GPI driver.
> 
>     Link to v2:
> 	https://lore.kernel.org/all/20241128133351.24593-2-quic_jseerapu@quicinc.com/
> 	https://lore.kernel.org/all/20241128133351.24593-3-quic_jseerapu@quicinc.com/
> 
> v1 -> v2:
>     - Separated the patches to dmaengine and spi subsystems
>     - Removed the changes which are not required for this feature from
>       qcom-gpi-dma.h file.
>     - Removed the type conversions used in gpi_create_spi_tre.
>    
>     Link to v1:
> 	https://lore.kernel.org/lkml/20241121115201.2191-2-quic_jseerapu@quicinc.com/
> 
>   drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++-----
>   1 file changed, 27 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
> index 52a7c8f2498f..35451d5a81f7 100644
> --- a/drivers/dma/qcom/gpi.c
> +++ b/drivers/dma/qcom/gpi.c
> @@ -27,6 +27,7 @@
>   #define TRE_FLAGS_IEOT		BIT(9)
>   #define TRE_FLAGS_BEI		BIT(10)
>   #define TRE_FLAGS_LINK		BIT(11)
> +#define TRE_FLAGS_IMMEDIATE_DMA	BIT(16)
>   #define TRE_FLAGS_TYPE		GENMASK(23, 16)
>   
>   /* SPI CONFIG0 WD0 */
> @@ -64,6 +65,7 @@
>   
>   /* DMA TRE */
>   #define TRE_DMA_LEN		GENMASK(23, 0)
> +#define TRE_DMA_IMMEDIATE_LEN	GENMASK(3, 0)
>   
>   /* Register offsets from gpi-top */
>   #define GPII_n_CH_k_CNTXT_0_OFFS(n, k)	(0x20000 + (0x4000 * (n)) + (0x80 * (k)))
> @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
>   	dma_addr_t address;
>   	struct gpi_tre *tre;
>   	unsigned int i;
> +	int len;
> +	u8 immediate_dma;

Should be bool

>   
>   	/* first create config tre if applicable */
>   	if (direction == DMA_MEM_TO_DEV && spi->set_config) {
> @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
>   	tre_idx++;
>   
>   	address = sg_dma_address(sgl);
> -	tre->dword[0] = lower_32_bits(address);
> -	tre->dword[1] = upper_32_bits(address);
> +	len = sg_dma_len(sgl);
>   
> -	tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN);
> +	immediate_dma = (direction == DMA_MEM_TO_DEV) && len <= 2 * sizeof(tre->dword[0]);

I would have added () around 2 * sizeof(tre->dword[0])

> +
> +	/* Support Immediate dma for write transfers for data length up to 8 bytes */
> +	if (immediate_dma) {
> +		/*
> +		 * For Immediate dma, data length may not always be length of 8 bytes,
> +		 * it can be length less than 8, hence initialize both dword's with 0
> +		 */
> +		tre->dword[0] = 0;
> +		tre->dword[1] = 0;
> +		memcpy(&tre->dword[0], sg_virt(sgl), len);
> +
> +		tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN);
> +	} else {
> +		tre->dword[0] = lower_32_bits(address);
> +		tre->dword[1] = upper_32_bits(address);
> +
> +		tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN);
> +	}
>   
>   	tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
> -	if (direction == DMA_MEM_TO_DEV)
> -		tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
> +	tre->dword[3] |= u32_encode_bits(!!immediate_dma, TRE_FLAGS_IMMEDIATE_DMA);

And you can drop !! if it's a bool

> +	tre->dword[3] |= u32_encode_bits(!!(direction == DMA_MEM_TO_DEV),
> +					 TRE_FLAGS_IEOT);

I thingk you can drop !! here aswell, the check will return a bool

>   
>   	for (i = 0; i < tre_idx; i++)
>   		dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0],

Otherwise I like the simplification :-)

Thanks,
Neil
Jyothi Kumar Seerapu Dec. 4, 2024, 1:25 p.m. UTC | #3
On 12/4/2024 6:15 PM, Dmitry Baryshkov wrote:
> On Wed, Dec 04, 2024 at 05:50:59PM +0530, Jyothi Kumar Seerapu wrote:
>> The DMA TRE(Transfer ring element) buffer contains the DMA
>> buffer address. Accessing data from this address can cause
>> significant delays in SPI transfers, which can be mitigated to
>> some extent by utilizing immediate DMA support.
>>
>> QCOM GPI DMA hardware supports an immediate DMA feature for data
>> up to 8 bytes, storing the data directly in the DMA TRE buffer
>> instead of the DMA buffer address. This enhancement enables faster
>> SPI data transfers.
>>
>> This optimization reduces the average transfer time from 25 us to
>> 16 us for a single SPI transfer of 8 bytes length, with a clock
>> frequency of 50 MHz.
>>
>> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
>> ---
>>
>> v2-> v3:
>>     - When to enable Immediate DMA support, control is moved to GPI driver
>>       from SPI driver.
>>     - Optimizations are done in GPI driver related to immediate dma changes.
>>     - Removed the immediate dma supported changes in qcom-gpi-dma.h file
>>       and handled in GPI driver.
>>
>>     Link to v2:
>> 	https://lore.kernel.org/all/20241128133351.24593-2-quic_jseerapu@quicinc.com/
>> 	https://lore.kernel.org/all/20241128133351.24593-3-quic_jseerapu@quicinc.com/
>>
>> v1 -> v2:
>>     - Separated the patches to dmaengine and spi subsystems
>>     - Removed the changes which are not required for this feature from
>>       qcom-gpi-dma.h file.
>>     - Removed the type conversions used in gpi_create_spi_tre.
>>    
>>     Link to v1:
>> 	https://lore.kernel.org/lkml/20241121115201.2191-2-quic_jseerapu@quicinc.com/
>>
>>   drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++-----
>>   1 file changed, 27 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
>> index 52a7c8f2498f..35451d5a81f7 100644
>> --- a/drivers/dma/qcom/gpi.c
>> +++ b/drivers/dma/qcom/gpi.c
>> @@ -27,6 +27,7 @@
>>   #define TRE_FLAGS_IEOT		BIT(9)
>>   #define TRE_FLAGS_BEI		BIT(10)
>>   #define TRE_FLAGS_LINK		BIT(11)
>> +#define TRE_FLAGS_IMMEDIATE_DMA	BIT(16)
>>   #define TRE_FLAGS_TYPE		GENMASK(23, 16)
>>   
>>   /* SPI CONFIG0 WD0 */
>> @@ -64,6 +65,7 @@
>>   
>>   /* DMA TRE */
>>   #define TRE_DMA_LEN		GENMASK(23, 0)
>> +#define TRE_DMA_IMMEDIATE_LEN	GENMASK(3, 0)
>>   
>>   /* Register offsets from gpi-top */
>>   #define GPII_n_CH_k_CNTXT_0_OFFS(n, k)	(0x20000 + (0x4000 * (n)) + (0x80 * (k)))
>> @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
>>   	dma_addr_t address;
>>   	struct gpi_tre *tre;
>>   	unsigned int i;
>> +	int len;
>> +	u8 immediate_dma;
>>   
>>   	/* first create config tre if applicable */
>>   	if (direction == DMA_MEM_TO_DEV && spi->set_config) {
>> @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
>>   	tre_idx++;
>>   
>>   	address = sg_dma_address(sgl);
>> -	tre->dword[0] = lower_32_bits(address);
>> -	tre->dword[1] = upper_32_bits(address);
>> +	len = sg_dma_len(sgl);
>>   
>> -	tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN);
>> +	immediate_dma = (direction == DMA_MEM_TO_DEV) && len <= 2 * sizeof(tre->dword[0]);
> 
> inline this condition, remove extra brackets and split the line after &&.
Hi Dmitry Baryshkov, thanks for the review.
Sure, i will make the changes mentioned below. Please let me know otherwise.

immediate_dma = direction == DMA_MEM_TO_DEV &&
                 len <= 2 * sizeof(tre->dword[0]);>
>> +
>> +	/* Support Immediate dma for write transfers for data length up to 8 bytes */
>> +	if (immediate_dma) {
>> +		/*
>> +		 * For Immediate dma, data length may not always be length of 8 bytes,
>> +		 * it can be length less than 8, hence initialize both dword's with 0
>> +		 */
>> +		tre->dword[0] = 0;
>> +		tre->dword[1] = 0;
>> +		memcpy(&tre->dword[0], sg_virt(sgl), len);
>> +
>> +		tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN);
>> +	} else {
>> +		tre->dword[0] = lower_32_bits(address);
>> +		tre->dword[1] = upper_32_bits(address);
>> +
>> +		tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN);
>> +	}
>>   
>>   	tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
>> -	if (direction == DMA_MEM_TO_DEV)
>> -		tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
>> +	tre->dword[3] |= u32_encode_bits(!!immediate_dma, TRE_FLAGS_IMMEDIATE_DMA);
>> +	tre->dword[3] |= u32_encode_bits(!!(direction == DMA_MEM_TO_DEV),
>> +					 TRE_FLAGS_IEOT);
>>   
>>   	for (i = 0; i < tre_idx; i++)
>>   		dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0],
>> -- 
>> 2.17.1
>>
>
Jyothi Kumar Seerapu Dec. 4, 2024, 1:29 p.m. UTC | #4
On 12/4/2024 6:19 PM, neil.armstrong@linaro.org wrote:
> Hi,
> 
> On 04/12/2024 13:20, Jyothi Kumar Seerapu wrote:
>> The DMA TRE(Transfer ring element) buffer contains the DMA
>> buffer address. Accessing data from this address can cause
>> significant delays in SPI transfers, which can be mitigated to
>> some extent by utilizing immediate DMA support.
>>
>> QCOM GPI DMA hardware supports an immediate DMA feature for data
>> up to 8 bytes, storing the data directly in the DMA TRE buffer
>> instead of the DMA buffer address. This enhancement enables faster
>> SPI data transfers.
>>
>> This optimization reduces the average transfer time from 25 us to
>> 16 us for a single SPI transfer of 8 bytes length, with a clock
>> frequency of 50 MHz.
>>
>> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
>> ---
>>
>> v2-> v3:
>>     - When to enable Immediate DMA support, control is moved to GPI 
>> driver
>>       from SPI driver.
>>     - Optimizations are done in GPI driver related to immediate dma 
>> changes.
>>     - Removed the immediate dma supported changes in qcom-gpi-dma.h file
>>       and handled in GPI driver.
>>
>>     Link to v2:
>>     https://lore.kernel.org/all/20241128133351.24593-2-quic_jseerapu@quicinc.com/
>>     https://lore.kernel.org/all/20241128133351.24593-3-quic_jseerapu@quicinc.com/
>>
>> v1 -> v2:
>>     - Separated the patches to dmaengine and spi subsystems
>>     - Removed the changes which are not required for this feature from
>>       qcom-gpi-dma.h file.
>>     - Removed the type conversions used in gpi_create_spi_tre.
>>     Link to v1:
>>     https://lore.kernel.org/lkml/20241121115201.2191-2-quic_jseerapu@quicinc.com/
>>
>>   drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++-----
>>   1 file changed, 27 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
>> index 52a7c8f2498f..35451d5a81f7 100644
>> --- a/drivers/dma/qcom/gpi.c
>> +++ b/drivers/dma/qcom/gpi.c
>> @@ -27,6 +27,7 @@
>>   #define TRE_FLAGS_IEOT        BIT(9)
>>   #define TRE_FLAGS_BEI        BIT(10)
>>   #define TRE_FLAGS_LINK        BIT(11)
>> +#define TRE_FLAGS_IMMEDIATE_DMA    BIT(16)
>>   #define TRE_FLAGS_TYPE        GENMASK(23, 16)
>>   /* SPI CONFIG0 WD0 */
>> @@ -64,6 +65,7 @@
>>   /* DMA TRE */
>>   #define TRE_DMA_LEN        GENMASK(23, 0)
>> +#define TRE_DMA_IMMEDIATE_LEN    GENMASK(3, 0)
>>   /* Register offsets from gpi-top */
>>   #define GPII_n_CH_k_CNTXT_0_OFFS(n, k)    (0x20000 + (0x4000 * (n)) 
>> + (0x80 * (k)))
>> @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan 
>> *chan, struct gpi_desc *desc,
>>       dma_addr_t address;
>>       struct gpi_tre *tre;
>>       unsigned int i;
>> +    int len;
>> +    u8 immediate_dma;
> 
> Should be bool
Hi Neil, Thanks for the review.
Sure, will change it to bool.
> 
>>       /* first create config tre if applicable */
>>       if (direction == DMA_MEM_TO_DEV && spi->set_config) {
>> @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan 
>> *chan, struct gpi_desc *desc,
>>       tre_idx++;
>>       address = sg_dma_address(sgl);
>> -    tre->dword[0] = lower_32_bits(address);
>> -    tre->dword[1] = upper_32_bits(address);
>> +    len = sg_dma_len(sgl);
>> -    tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN);
>> +    immediate_dma = (direction == DMA_MEM_TO_DEV) && len <= 2 * 
>> sizeof(tre->dword[0]);
> 
> I would have added () around 2 * sizeof(tre->dword[0])

Below condition is fine or should i add brackets ?

immediate_dma = direction == DMA_MEM_TO_DEV &&
                 len <= 2 * sizeof(tre->dword[0]);
> 
>> +
>> +    /* Support Immediate dma for write transfers for data length up 
>> to 8 bytes */
>> +    if (immediate_dma) {
>> +        /*
>> +         * For Immediate dma, data length may not always be length of 
>> 8 bytes,
>> +         * it can be length less than 8, hence initialize both 
>> dword's with 0
>> +         */
>> +        tre->dword[0] = 0;
>> +        tre->dword[1] = 0;
>> +        memcpy(&tre->dword[0], sg_virt(sgl), len);
>> +
>> +        tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN);
>> +    } else {
>> +        tre->dword[0] = lower_32_bits(address);
>> +        tre->dword[1] = upper_32_bits(address);
>> +
>> +        tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN);
>> +    }
>>       tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
>> -    if (direction == DMA_MEM_TO_DEV)
>> -        tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
>> +    tre->dword[3] |= u32_encode_bits(!!immediate_dma, 
>> TRE_FLAGS_IMMEDIATE_DMA);
> 
> And you can drop !! if it's a bool
Sure thanks, i will drop !! in V4.
> 
>> +    tre->dword[3] |= u32_encode_bits(!!(direction == DMA_MEM_TO_DEV),
>> +                     TRE_FLAGS_IEOT);
> 
> I thingk you can drop !! here aswell, the check will return a bool
Sure thanks, i will drop !! in V4.
> 
>>       for (i = 0; i < tre_idx; i++)
>>           dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0],
> 
> Otherwise I like the simplification :-)
Thanks, i will incorporate the changes in V4.
> 
> Thanks,
> Neil
Neil Armstrong Dec. 4, 2024, 1:31 p.m. UTC | #5
Hi,

On 04/12/2024 14:29, Jyothi Kumar Seerapu wrote:
> 
> 
> On 12/4/2024 6:19 PM, neil.armstrong@linaro.org wrote:
>> Hi,
>>
>> On 04/12/2024 13:20, Jyothi Kumar Seerapu wrote:
>>> The DMA TRE(Transfer ring element) buffer contains the DMA
>>> buffer address. Accessing data from this address can cause
>>> significant delays in SPI transfers, which can be mitigated to
>>> some extent by utilizing immediate DMA support.
>>>
>>> QCOM GPI DMA hardware supports an immediate DMA feature for data
>>> up to 8 bytes, storing the data directly in the DMA TRE buffer
>>> instead of the DMA buffer address. This enhancement enables faster
>>> SPI data transfers.
>>>
>>> This optimization reduces the average transfer time from 25 us to
>>> 16 us for a single SPI transfer of 8 bytes length, with a clock
>>> frequency of 50 MHz.
>>>
>>> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
>>> ---
>>>
>>> v2-> v3:
>>>     - When to enable Immediate DMA support, control is moved to GPI driver
>>>       from SPI driver.
>>>     - Optimizations are done in GPI driver related to immediate dma changes.
>>>     - Removed the immediate dma supported changes in qcom-gpi-dma.h file
>>>       and handled in GPI driver.
>>>
>>>     Link to v2:
>>>     https://lore.kernel.org/all/20241128133351.24593-2-quic_jseerapu@quicinc.com/
>>>     https://lore.kernel.org/all/20241128133351.24593-3-quic_jseerapu@quicinc.com/
>>>
>>> v1 -> v2:
>>>     - Separated the patches to dmaengine and spi subsystems
>>>     - Removed the changes which are not required for this feature from
>>>       qcom-gpi-dma.h file.
>>>     - Removed the type conversions used in gpi_create_spi_tre.
>>>     Link to v1:
>>>     https://lore.kernel.org/lkml/20241121115201.2191-2-quic_jseerapu@quicinc.com/
>>>
>>>   drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++-----
>>>   1 file changed, 27 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
>>> index 52a7c8f2498f..35451d5a81f7 100644
>>> --- a/drivers/dma/qcom/gpi.c
>>> +++ b/drivers/dma/qcom/gpi.c
>>> @@ -27,6 +27,7 @@
>>>   #define TRE_FLAGS_IEOT        BIT(9)
>>>   #define TRE_FLAGS_BEI        BIT(10)
>>>   #define TRE_FLAGS_LINK        BIT(11)
>>> +#define TRE_FLAGS_IMMEDIATE_DMA    BIT(16)
>>>   #define TRE_FLAGS_TYPE        GENMASK(23, 16)
>>>   /* SPI CONFIG0 WD0 */
>>> @@ -64,6 +65,7 @@
>>>   /* DMA TRE */
>>>   #define TRE_DMA_LEN        GENMASK(23, 0)
>>> +#define TRE_DMA_IMMEDIATE_LEN    GENMASK(3, 0)
>>>   /* Register offsets from gpi-top */
>>>   #define GPII_n_CH_k_CNTXT_0_OFFS(n, k)    (0x20000 + (0x4000 * (n)) + (0x80 * (k)))
>>> @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
>>>       dma_addr_t address;
>>>       struct gpi_tre *tre;
>>>       unsigned int i;
>>> +    int len;
>>> +    u8 immediate_dma;
>>
>> Should be bool
> Hi Neil, Thanks for the review.
> Sure, will change it to bool.
>>
>>>       /* first create config tre if applicable */
>>>       if (direction == DMA_MEM_TO_DEV && spi->set_config) {
>>> @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
>>>       tre_idx++;
>>>       address = sg_dma_address(sgl);
>>> -    tre->dword[0] = lower_32_bits(address);
>>> -    tre->dword[1] = upper_32_bits(address);
>>> +    len = sg_dma_len(sgl);
>>> -    tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN);
>>> +    immediate_dma = (direction == DMA_MEM_TO_DEV) && len <= 2 * sizeof(tre->dword[0]);
>>
>> I would have added () around 2 * sizeof(tre->dword[0])
> 
> Below condition is fine or should i add brackets ?
> 
> immediate_dma = direction == DMA_MEM_TO_DEV &&
>                  len <= 2 * sizeof(tre->dword[0]);

This is what Dmitry requested, it's fine for me.

Neil

>>
>>> +
>>> +    /* Support Immediate dma for write transfers for data length up to 8 bytes */
>>> +    if (immediate_dma) {
>>> +        /*
>>> +         * For Immediate dma, data length may not always be length of 8 bytes,
>>> +         * it can be length less than 8, hence initialize both dword's with 0
>>> +         */
>>> +        tre->dword[0] = 0;
>>> +        tre->dword[1] = 0;
>>> +        memcpy(&tre->dword[0], sg_virt(sgl), len);
>>> +
>>> +        tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN);
>>> +    } else {
>>> +        tre->dword[0] = lower_32_bits(address);
>>> +        tre->dword[1] = upper_32_bits(address);
>>> +
>>> +        tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN);
>>> +    }
>>>       tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
>>> -    if (direction == DMA_MEM_TO_DEV)
>>> -        tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
>>> +    tre->dword[3] |= u32_encode_bits(!!immediate_dma, TRE_FLAGS_IMMEDIATE_DMA);
>>
>> And you can drop !! if it's a bool
> Sure thanks, i will drop !! in V4.
>>
>>> +    tre->dword[3] |= u32_encode_bits(!!(direction == DMA_MEM_TO_DEV),
>>> +                     TRE_FLAGS_IEOT);
>>
>> I thingk you can drop !! here aswell, the check will return a bool
> Sure thanks, i will drop !! in V4.
>>
>>>       for (i = 0; i < tre_idx; i++)
>>>           dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0],
>>
>> Otherwise I like the simplification :-)
> Thanks, i will incorporate the changes in V4.
>>
>> Thanks,
>> Neil
Dmitry Baryshkov Dec. 4, 2024, 1:39 p.m. UTC | #6
On Wed, 4 Dec 2024 at 15:25, Jyothi Kumar Seerapu
<quic_jseerapu@quicinc.com> wrote:
>
>
>
> On 12/4/2024 6:15 PM, Dmitry Baryshkov wrote:
> > On Wed, Dec 04, 2024 at 05:50:59PM +0530, Jyothi Kumar Seerapu wrote:
> >> The DMA TRE(Transfer ring element) buffer contains the DMA
> >> buffer address. Accessing data from this address can cause
> >> significant delays in SPI transfers, which can be mitigated to
> >> some extent by utilizing immediate DMA support.
> >>
> >> QCOM GPI DMA hardware supports an immediate DMA feature for data
> >> up to 8 bytes, storing the data directly in the DMA TRE buffer
> >> instead of the DMA buffer address. This enhancement enables faster
> >> SPI data transfers.
> >>
> >> This optimization reduces the average transfer time from 25 us to
> >> 16 us for a single SPI transfer of 8 bytes length, with a clock
> >> frequency of 50 MHz.
> >>
> >> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
> >> ---
> >>
> >> v2-> v3:
> >>     - When to enable Immediate DMA support, control is moved to GPI driver
> >>       from SPI driver.
> >>     - Optimizations are done in GPI driver related to immediate dma changes.
> >>     - Removed the immediate dma supported changes in qcom-gpi-dma.h file
> >>       and handled in GPI driver.
> >>
> >>     Link to v2:
> >>      https://lore.kernel.org/all/20241128133351.24593-2-quic_jseerapu@quicinc.com/
> >>      https://lore.kernel.org/all/20241128133351.24593-3-quic_jseerapu@quicinc.com/
> >>
> >> v1 -> v2:
> >>     - Separated the patches to dmaengine and spi subsystems
> >>     - Removed the changes which are not required for this feature from
> >>       qcom-gpi-dma.h file.
> >>     - Removed the type conversions used in gpi_create_spi_tre.
> >>
> >>     Link to v1:
> >>      https://lore.kernel.org/lkml/20241121115201.2191-2-quic_jseerapu@quicinc.com/
> >>
> >>   drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++-----
> >>   1 file changed, 27 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
> >> index 52a7c8f2498f..35451d5a81f7 100644
> >> --- a/drivers/dma/qcom/gpi.c
> >> +++ b/drivers/dma/qcom/gpi.c
> >> @@ -27,6 +27,7 @@
> >>   #define TRE_FLAGS_IEOT             BIT(9)
> >>   #define TRE_FLAGS_BEI              BIT(10)
> >>   #define TRE_FLAGS_LINK             BIT(11)
> >> +#define TRE_FLAGS_IMMEDIATE_DMA     BIT(16)
> >>   #define TRE_FLAGS_TYPE             GENMASK(23, 16)
> >>
> >>   /* SPI CONFIG0 WD0 */
> >> @@ -64,6 +65,7 @@
> >>
> >>   /* DMA TRE */
> >>   #define TRE_DMA_LEN                GENMASK(23, 0)
> >> +#define TRE_DMA_IMMEDIATE_LEN       GENMASK(3, 0)
> >>
> >>   /* Register offsets from gpi-top */
> >>   #define GPII_n_CH_k_CNTXT_0_OFFS(n, k)     (0x20000 + (0x4000 * (n)) + (0x80 * (k)))
> >> @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
> >>      dma_addr_t address;
> >>      struct gpi_tre *tre;
> >>      unsigned int i;
> >> +    int len;
> >> +    u8 immediate_dma;
> >>
> >>      /* first create config tre if applicable */
> >>      if (direction == DMA_MEM_TO_DEV && spi->set_config) {
> >> @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
> >>      tre_idx++;
> >>
> >>      address = sg_dma_address(sgl);
> >> -    tre->dword[0] = lower_32_bits(address);
> >> -    tre->dword[1] = upper_32_bits(address);
> >> +    len = sg_dma_len(sgl);
> >>
> >> -    tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN);
> >> +    immediate_dma = (direction == DMA_MEM_TO_DEV) && len <= 2 * sizeof(tre->dword[0]);
> >
> > inline this condition, remove extra brackets and split the line after &&.
> Hi Dmitry Baryshkov, thanks for the review.
> Sure, i will make the changes mentioned below. Please let me know otherwise.
>
> immediate_dma = direction == DMA_MEM_TO_DEV &&
>                  len <= 2 * sizeof(tre->dword[0]);>

I was suggesting to _inline_ this condition rather than having a
separate variable for it.

> >> +
> >> +    /* Support Immediate dma for write transfers for data length up to 8 bytes */
> >> +    if (immediate_dma) {
> >> +            /*
> >> +             * For Immediate dma, data length may not always be length of 8 bytes,
> >> +             * it can be length less than 8, hence initialize both dword's with 0
> >> +             */
> >> +            tre->dword[0] = 0;
> >> +            tre->dword[1] = 0;
> >> +            memcpy(&tre->dword[0], sg_virt(sgl), len);
> >> +
> >> +            tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN);
> >> +    } else {
> >> +            tre->dword[0] = lower_32_bits(address);
> >> +            tre->dword[1] = upper_32_bits(address);
> >> +
> >> +            tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN);
> >> +    }
> >>
> >>      tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
> >> -    if (direction == DMA_MEM_TO_DEV)
> >> -            tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
> >> +    tre->dword[3] |= u32_encode_bits(!!immediate_dma, TRE_FLAGS_IMMEDIATE_DMA);
> >> +    tre->dword[3] |= u32_encode_bits(!!(direction == DMA_MEM_TO_DEV),
> >> +                                     TRE_FLAGS_IEOT);
> >>
> >>      for (i = 0; i < tre_idx; i++)
> >>              dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0],
> >> --
> >> 2.17.1
> >>
> >
Jyothi Kumar Seerapu Dec. 4, 2024, 1:54 p.m. UTC | #7
On 12/4/2024 7:09 PM, Dmitry Baryshkov wrote:
> On Wed, 4 Dec 2024 at 15:25, Jyothi Kumar Seerapu
> <quic_jseerapu@quicinc.com> wrote:
>>
>>
>>
>> On 12/4/2024 6:15 PM, Dmitry Baryshkov wrote:
>>> On Wed, Dec 04, 2024 at 05:50:59PM +0530, Jyothi Kumar Seerapu wrote:
>>>> The DMA TRE(Transfer ring element) buffer contains the DMA
>>>> buffer address. Accessing data from this address can cause
>>>> significant delays in SPI transfers, which can be mitigated to
>>>> some extent by utilizing immediate DMA support.
>>>>
>>>> QCOM GPI DMA hardware supports an immediate DMA feature for data
>>>> up to 8 bytes, storing the data directly in the DMA TRE buffer
>>>> instead of the DMA buffer address. This enhancement enables faster
>>>> SPI data transfers.
>>>>
>>>> This optimization reduces the average transfer time from 25 us to
>>>> 16 us for a single SPI transfer of 8 bytes length, with a clock
>>>> frequency of 50 MHz.
>>>>
>>>> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
>>>> ---
>>>>
>>>> v2-> v3:
>>>>      - When to enable Immediate DMA support, control is moved to GPI driver
>>>>        from SPI driver.
>>>>      - Optimizations are done in GPI driver related to immediate dma changes.
>>>>      - Removed the immediate dma supported changes in qcom-gpi-dma.h file
>>>>        and handled in GPI driver.
>>>>
>>>>      Link to v2:
>>>>       https://lore.kernel.org/all/20241128133351.24593-2-quic_jseerapu@quicinc.com/
>>>>       https://lore.kernel.org/all/20241128133351.24593-3-quic_jseerapu@quicinc.com/
>>>>
>>>> v1 -> v2:
>>>>      - Separated the patches to dmaengine and spi subsystems
>>>>      - Removed the changes which are not required for this feature from
>>>>        qcom-gpi-dma.h file.
>>>>      - Removed the type conversions used in gpi_create_spi_tre.
>>>>
>>>>      Link to v1:
>>>>       https://lore.kernel.org/lkml/20241121115201.2191-2-quic_jseerapu@quicinc.com/
>>>>
>>>>    drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++-----
>>>>    1 file changed, 27 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
>>>> index 52a7c8f2498f..35451d5a81f7 100644
>>>> --- a/drivers/dma/qcom/gpi.c
>>>> +++ b/drivers/dma/qcom/gpi.c
>>>> @@ -27,6 +27,7 @@
>>>>    #define TRE_FLAGS_IEOT             BIT(9)
>>>>    #define TRE_FLAGS_BEI              BIT(10)
>>>>    #define TRE_FLAGS_LINK             BIT(11)
>>>> +#define TRE_FLAGS_IMMEDIATE_DMA     BIT(16)
>>>>    #define TRE_FLAGS_TYPE             GENMASK(23, 16)
>>>>
>>>>    /* SPI CONFIG0 WD0 */
>>>> @@ -64,6 +65,7 @@
>>>>
>>>>    /* DMA TRE */
>>>>    #define TRE_DMA_LEN                GENMASK(23, 0)
>>>> +#define TRE_DMA_IMMEDIATE_LEN       GENMASK(3, 0)
>>>>
>>>>    /* Register offsets from gpi-top */
>>>>    #define GPII_n_CH_k_CNTXT_0_OFFS(n, k)     (0x20000 + (0x4000 * (n)) + (0x80 * (k)))
>>>> @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
>>>>       dma_addr_t address;
>>>>       struct gpi_tre *tre;
>>>>       unsigned int i;
>>>> +    int len;
>>>> +    u8 immediate_dma;
>>>>
>>>>       /* first create config tre if applicable */
>>>>       if (direction == DMA_MEM_TO_DEV && spi->set_config) {
>>>> @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
>>>>       tre_idx++;
>>>>
>>>>       address = sg_dma_address(sgl);
>>>> -    tre->dword[0] = lower_32_bits(address);
>>>> -    tre->dword[1] = upper_32_bits(address);
>>>> +    len = sg_dma_len(sgl);
>>>>
>>>> -    tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN);
>>>> +    immediate_dma = (direction == DMA_MEM_TO_DEV) && len <= 2 * sizeof(tre->dword[0]);
>>>
>>> inline this condition, remove extra brackets and split the line after &&.
>> Hi Dmitry Baryshkov, thanks for the review.
>> Sure, i will make the changes mentioned below. Please let me know otherwise.
>>
>> immediate_dma = direction == DMA_MEM_TO_DEV &&
>>                   len <= 2 * sizeof(tre->dword[0]);
> 
> I was suggesting to _inline_ this condition rather than having a
> separate variable for it.

I can directly use the condition as follows:
if (direction == DMA_MEM_TO_DEV && len <= 2 * sizeof(tre->dword[0]))

However, this condition also needs to account for the 
"TRE_FLAGS_IMMEDIATE_DMA" update. Therefore, I introduced a separate 
variable.

tre->dword[3] |= u32_encode_bits(!!immediate_dma, TRE_FLAGS_IMMEDIATE_DMA);

Please let me know if it's acceptable to mention the entire condition in 
both places instead of using a separate variable.


> 
>>>> +
>>>> +    /* Support Immediate dma for write transfers for data length up to 8 bytes */
>>>> +    if (immediate_dma) {
>>>> +            /*
>>>> +             * For Immediate dma, data length may not always be length of 8 bytes,
>>>> +             * it can be length less than 8, hence initialize both dword's with 0
>>>> +             */
>>>> +            tre->dword[0] = 0;
>>>> +            tre->dword[1] = 0;
>>>> +            memcpy(&tre->dword[0], sg_virt(sgl), len);
>>>> +
>>>> +            tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN);
>>>> +    } else {
>>>> +            tre->dword[0] = lower_32_bits(address);
>>>> +            tre->dword[1] = upper_32_bits(address);
>>>> +
>>>> +            tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN);
>>>> +    }
>>>>
>>>>       tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
>>>> -    if (direction == DMA_MEM_TO_DEV)
>>>> -            tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
>>>> +    tre->dword[3] |= u32_encode_bits(!!immediate_dma, TRE_FLAGS_IMMEDIATE_DMA);
>>>> +    tre->dword[3] |= u32_encode_bits(!!(direction == DMA_MEM_TO_DEV),
>>>> +                                     TRE_FLAGS_IEOT);
>>>>
>>>>       for (i = 0; i < tre_idx; i++)
>>>>               dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0],
>>>> --
>>>> 2.17.1
>>>>
>>>
> 
> 
>
Dmitry Baryshkov Dec. 4, 2024, 10:23 p.m. UTC | #8
On Wed, 4 Dec 2024 at 15:54, Jyothi Kumar Seerapu
<quic_jseerapu@quicinc.com> wrote:
>
>
>
> On 12/4/2024 7:09 PM, Dmitry Baryshkov wrote:
> > On Wed, 4 Dec 2024 at 15:25, Jyothi Kumar Seerapu
> > <quic_jseerapu@quicinc.com> wrote:
> >>
> >>
> >>
> >> On 12/4/2024 6:15 PM, Dmitry Baryshkov wrote:
> >>> On Wed, Dec 04, 2024 at 05:50:59PM +0530, Jyothi Kumar Seerapu wrote:
> >>>> The DMA TRE(Transfer ring element) buffer contains the DMA
> >>>> buffer address. Accessing data from this address can cause
> >>>> significant delays in SPI transfers, which can be mitigated to
> >>>> some extent by utilizing immediate DMA support.
> >>>>
> >>>> QCOM GPI DMA hardware supports an immediate DMA feature for data
> >>>> up to 8 bytes, storing the data directly in the DMA TRE buffer
> >>>> instead of the DMA buffer address. This enhancement enables faster
> >>>> SPI data transfers.
> >>>>
> >>>> This optimization reduces the average transfer time from 25 us to
> >>>> 16 us for a single SPI transfer of 8 bytes length, with a clock
> >>>> frequency of 50 MHz.
> >>>>
> >>>> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
> >>>> ---
> >>>>
> >>>> v2-> v3:
> >>>>      - When to enable Immediate DMA support, control is moved to GPI driver
> >>>>        from SPI driver.
> >>>>      - Optimizations are done in GPI driver related to immediate dma changes.
> >>>>      - Removed the immediate dma supported changes in qcom-gpi-dma.h file
> >>>>        and handled in GPI driver.
> >>>>
> >>>>      Link to v2:
> >>>>       https://lore.kernel.org/all/20241128133351.24593-2-quic_jseerapu@quicinc.com/
> >>>>       https://lore.kernel.org/all/20241128133351.24593-3-quic_jseerapu@quicinc.com/
> >>>>
> >>>> v1 -> v2:
> >>>>      - Separated the patches to dmaengine and spi subsystems
> >>>>      - Removed the changes which are not required for this feature from
> >>>>        qcom-gpi-dma.h file.
> >>>>      - Removed the type conversions used in gpi_create_spi_tre.
> >>>>
> >>>>      Link to v1:
> >>>>       https://lore.kernel.org/lkml/20241121115201.2191-2-quic_jseerapu@quicinc.com/
> >>>>
> >>>>    drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++-----
> >>>>    1 file changed, 27 insertions(+), 5 deletions(-)
> >>>>
> >>>> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
> >>>> index 52a7c8f2498f..35451d5a81f7 100644
> >>>> --- a/drivers/dma/qcom/gpi.c
> >>>> +++ b/drivers/dma/qcom/gpi.c
> >>>> @@ -27,6 +27,7 @@
> >>>>    #define TRE_FLAGS_IEOT             BIT(9)
> >>>>    #define TRE_FLAGS_BEI              BIT(10)
> >>>>    #define TRE_FLAGS_LINK             BIT(11)
> >>>> +#define TRE_FLAGS_IMMEDIATE_DMA     BIT(16)
> >>>>    #define TRE_FLAGS_TYPE             GENMASK(23, 16)
> >>>>
> >>>>    /* SPI CONFIG0 WD0 */
> >>>> @@ -64,6 +65,7 @@
> >>>>
> >>>>    /* DMA TRE */
> >>>>    #define TRE_DMA_LEN                GENMASK(23, 0)
> >>>> +#define TRE_DMA_IMMEDIATE_LEN       GENMASK(3, 0)
> >>>>
> >>>>    /* Register offsets from gpi-top */
> >>>>    #define GPII_n_CH_k_CNTXT_0_OFFS(n, k)     (0x20000 + (0x4000 * (n)) + (0x80 * (k)))
> >>>> @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
> >>>>       dma_addr_t address;
> >>>>       struct gpi_tre *tre;
> >>>>       unsigned int i;
> >>>> +    int len;
> >>>> +    u8 immediate_dma;
> >>>>
> >>>>       /* first create config tre if applicable */
> >>>>       if (direction == DMA_MEM_TO_DEV && spi->set_config) {
> >>>> @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
> >>>>       tre_idx++;
> >>>>
> >>>>       address = sg_dma_address(sgl);
> >>>> -    tre->dword[0] = lower_32_bits(address);
> >>>> -    tre->dword[1] = upper_32_bits(address);
> >>>> +    len = sg_dma_len(sgl);
> >>>>
> >>>> -    tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN);
> >>>> +    immediate_dma = (direction == DMA_MEM_TO_DEV) && len <= 2 * sizeof(tre->dword[0]);
> >>>
> >>> inline this condition, remove extra brackets and split the line after &&.
> >> Hi Dmitry Baryshkov, thanks for the review.
> >> Sure, i will make the changes mentioned below. Please let me know otherwise.
> >>
> >> immediate_dma = direction == DMA_MEM_TO_DEV &&
> >>                   len <= 2 * sizeof(tre->dword[0]);
> >
> > I was suggesting to _inline_ this condition rather than having a
> > separate variable for it.
>
> I can directly use the condition as follows:
> if (direction == DMA_MEM_TO_DEV && len <= 2 * sizeof(tre->dword[0]))
>
> However, this condition also needs to account for the
> "TRE_FLAGS_IMMEDIATE_DMA" update. Therefore, I introduced a separate
> variable.
>
> tre->dword[3] |= u32_encode_bits(!!immediate_dma, TRE_FLAGS_IMMEDIATE_DMA);
>
> Please let me know if it's acceptable to mention the entire condition in
> both places instead of using a separate variable.

Move the flag setting under the if() too.

>
>
> >
> >>>> +
> >>>> +    /* Support Immediate dma for write transfers for data length up to 8 bytes */
> >>>> +    if (immediate_dma) {
> >>>> +            /*
> >>>> +             * For Immediate dma, data length may not always be length of 8 bytes,
> >>>> +             * it can be length less than 8, hence initialize both dword's with 0
> >>>> +             */
> >>>> +            tre->dword[0] = 0;
> >>>> +            tre->dword[1] = 0;
> >>>> +            memcpy(&tre->dword[0], sg_virt(sgl), len);
> >>>> +
> >>>> +            tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN);
> >>>> +    } else {
> >>>> +            tre->dword[0] = lower_32_bits(address);
> >>>> +            tre->dword[1] = upper_32_bits(address);
> >>>> +
> >>>> +            tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN);
> >>>> +    }
> >>>>
> >>>>       tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
> >>>> -    if (direction == DMA_MEM_TO_DEV)
> >>>> -            tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
> >>>> +    tre->dword[3] |= u32_encode_bits(!!immediate_dma, TRE_FLAGS_IMMEDIATE_DMA);
> >>>> +    tre->dword[3] |= u32_encode_bits(!!(direction == DMA_MEM_TO_DEV),
> >>>> +                                     TRE_FLAGS_IEOT);
> >>>>
> >>>>       for (i = 0; i < tre_idx; i++)
> >>>>               dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0],
> >>>> --
> >>>> 2.17.1
> >>>>
> >>>
> >
> >
> >
diff mbox series

Patch

diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
index 52a7c8f2498f..35451d5a81f7 100644
--- a/drivers/dma/qcom/gpi.c
+++ b/drivers/dma/qcom/gpi.c
@@ -27,6 +27,7 @@ 
 #define TRE_FLAGS_IEOT		BIT(9)
 #define TRE_FLAGS_BEI		BIT(10)
 #define TRE_FLAGS_LINK		BIT(11)
+#define TRE_FLAGS_IMMEDIATE_DMA	BIT(16)
 #define TRE_FLAGS_TYPE		GENMASK(23, 16)
 
 /* SPI CONFIG0 WD0 */
@@ -64,6 +65,7 @@ 
 
 /* DMA TRE */
 #define TRE_DMA_LEN		GENMASK(23, 0)
+#define TRE_DMA_IMMEDIATE_LEN	GENMASK(3, 0)
 
 /* Register offsets from gpi-top */
 #define GPII_n_CH_k_CNTXT_0_OFFS(n, k)	(0x20000 + (0x4000 * (n)) + (0x80 * (k)))
@@ -1711,6 +1713,8 @@  static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
 	dma_addr_t address;
 	struct gpi_tre *tre;
 	unsigned int i;
+	int len;
+	u8 immediate_dma;
 
 	/* first create config tre if applicable */
 	if (direction == DMA_MEM_TO_DEV && spi->set_config) {
@@ -1763,14 +1767,32 @@  static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
 	tre_idx++;
 
 	address = sg_dma_address(sgl);
-	tre->dword[0] = lower_32_bits(address);
-	tre->dword[1] = upper_32_bits(address);
+	len = sg_dma_len(sgl);
 
-	tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN);
+	immediate_dma = (direction == DMA_MEM_TO_DEV) && len <= 2 * sizeof(tre->dword[0]);
+
+	/* Support Immediate dma for write transfers for data length up to 8 bytes */
+	if (immediate_dma) {
+		/*
+		 * For Immediate dma, data length may not always be length of 8 bytes,
+		 * it can be length less than 8, hence initialize both dword's with 0
+		 */
+		tre->dword[0] = 0;
+		tre->dword[1] = 0;
+		memcpy(&tre->dword[0], sg_virt(sgl), len);
+
+		tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN);
+	} else {
+		tre->dword[0] = lower_32_bits(address);
+		tre->dword[1] = upper_32_bits(address);
+
+		tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN);
+	}
 
 	tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
-	if (direction == DMA_MEM_TO_DEV)
-		tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
+	tre->dword[3] |= u32_encode_bits(!!immediate_dma, TRE_FLAGS_IMMEDIATE_DMA);
+	tre->dword[3] |= u32_encode_bits(!!(direction == DMA_MEM_TO_DEV),
+					 TRE_FLAGS_IEOT);
 
 	for (i = 0; i < tre_idx; i++)
 		dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0],