Message ID | 20241128133351.24593-2-quic_jseerapu@quicinc.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add GPI immediate DMA support for SPI | expand |
On Thu, Nov 28, 2024 at 07:03:50PM +0530, Jyothi Kumar Seerapu wrote: > The DMA TRE(Transfer ring element) buffer contains the DMA > buffer address. Accessing data from this address can cause > significant delays in SPI transfers, which can be mitigated to > some extent by utilizing immediate DMA support. > > QCOM GPI DMA hardware supports an immediate DMA feature for data > up to 8 bytes, storing the data directly in the DMA TRE buffer > instead of the DMA buffer address. This enhancement enables faster > SPI data transfers. > > This optimization reduces the average transfer time from 25 us to > 16 us for a single SPI transfer of 8 bytes length, with a clock > frequency of 50 MHz. > > Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com> > --- > v1 -> v2: > - Separated the patches to dmaengine and spi subsystems > - Removed the changes which are not required for this feature from > qcom-gpi-dma.h file. > - Removed the type conversions used in gpi_create_spi_tre. > > drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++----- > include/linux/dma/qcom-gpi-dma.h | 6 ++++++ > 2 files changed, 33 insertions(+), 5 deletions(-) > > diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c > index 52a7c8f2498f..4c5df696ddd8 100644 > --- a/drivers/dma/qcom/gpi.c > +++ b/drivers/dma/qcom/gpi.c > @@ -27,6 +27,7 @@ > #define TRE_FLAGS_IEOT BIT(9) > #define TRE_FLAGS_BEI BIT(10) > #define TRE_FLAGS_LINK BIT(11) > +#define TRE_FLAGS_IMMEDIATE_DMA BIT(16) > #define TRE_FLAGS_TYPE GENMASK(23, 16) > > /* SPI CONFIG0 WD0 */ > @@ -64,6 +65,7 @@ > > /* DMA TRE */ > #define TRE_DMA_LEN GENMASK(23, 0) > +#define TRE_DMA_IMMEDIATE_LEN GENMASK(3, 0) > > /* Register offsets from gpi-top */ > #define GPII_n_CH_k_CNTXT_0_OFFS(n, k) (0x20000 + (0x4000 * (n)) + (0x80 * (k))) > @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, > dma_addr_t address; > struct gpi_tre *tre; > unsigned int i; > + u8 *buf; > + int len = 0; > > /* first create config tre if applicable */ > if (direction == DMA_MEM_TO_DEV && spi->set_config) { > @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, > tre_idx++; > > address = sg_dma_address(sgl); > - tre->dword[0] = lower_32_bits(address); > - tre->dword[1] = upper_32_bits(address); > + len = sg_dma_len(sgl); > > - tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN); > + /* Support Immediate dma for write transfers for data length up to 8 bytes */ > + if ((spi->flags & QCOM_GPI_IMMEDIATE_DMA) && direction == DMA_MEM_TO_DEV) { Please defer applying the patch until the discussion on v1 comes to conclusion. > + buf = sg_virt(sgl); > > - tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); > - if (direction == DMA_MEM_TO_DEV) > + /* memcpy may not always be length of 8, hence pre-fill both dword's with 0 */ > + tre->dword[0] = 0; > + tre->dword[1] = 0; > + memcpy(&tre->dword[0], buf, len); > + > + tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN); > + > + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); > tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); > + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IMMEDIATE_DMA); > + } else { > + tre->dword[0] = lower_32_bits(address); > + tre->dword[1] = upper_32_bits(address); > + > + tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN); > + > + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); > + if (direction == DMA_MEM_TO_DEV) > + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); > + } > > for (i = 0; i < tre_idx; i++) > dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0], > diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h > index 6680dd1a43c6..84598848d53a 100644 > --- a/include/linux/dma/qcom-gpi-dma.h > +++ b/include/linux/dma/qcom-gpi-dma.h > @@ -15,6 +15,10 @@ enum spi_transfer_cmd { > SPI_DUPLEX, > }; > > +#define QCOM_GPI_IMMEDIATE_DMA BIT(1) > + > +#define QCOM_GPI_IMMEDIATE_DMA_LEN 8 > + > /** > * struct gpi_spi_config - spi config for peripheral > * > @@ -30,6 +34,7 @@ enum spi_transfer_cmd { > * @cs: chip select toggle > * @set_config: set peripheral config > * @rx_len: receive length for buffer > + * @flags: true for immediate dma support > */ > struct gpi_spi_config { > u8 set_config; > @@ -44,6 +49,7 @@ struct gpi_spi_config { > u32 clk_src; > enum spi_transfer_cmd cmd; > u32 rx_len; > + u8 flags; > }; > > enum i2c_op { > -- > 2.17.1 >
On Thu, Nov 28, 2024 at 07:03:50PM +0530, Jyothi Kumar Seerapu wrote: > The DMA TRE(Transfer ring element) buffer contains the DMA > buffer address. Accessing data from this address can cause > significant delays in SPI transfers, which can be mitigated to > some extent by utilizing immediate DMA support. > > QCOM GPI DMA hardware supports an immediate DMA feature for data > up to 8 bytes, storing the data directly in the DMA TRE buffer > instead of the DMA buffer address. This enhancement enables faster > SPI data transfers. > > This optimization reduces the average transfer time from 25 us to > 16 us for a single SPI transfer of 8 bytes length, with a clock > frequency of 50 MHz. > > Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com> > --- > v1 -> v2: > - Separated the patches to dmaengine and spi subsystems > - Removed the changes which are not required for this feature from > qcom-gpi-dma.h file. > - Removed the type conversions used in gpi_create_spi_tre. > > drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++----- > include/linux/dma/qcom-gpi-dma.h | 6 ++++++ > 2 files changed, 33 insertions(+), 5 deletions(-) > > diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c > index 52a7c8f2498f..4c5df696ddd8 100644 > --- a/drivers/dma/qcom/gpi.c > +++ b/drivers/dma/qcom/gpi.c > @@ -27,6 +27,7 @@ > #define TRE_FLAGS_IEOT BIT(9) > #define TRE_FLAGS_BEI BIT(10) > #define TRE_FLAGS_LINK BIT(11) > +#define TRE_FLAGS_IMMEDIATE_DMA BIT(16) > #define TRE_FLAGS_TYPE GENMASK(23, 16) > > /* SPI CONFIG0 WD0 */ > @@ -64,6 +65,7 @@ > > /* DMA TRE */ > #define TRE_DMA_LEN GENMASK(23, 0) > +#define TRE_DMA_IMMEDIATE_LEN GENMASK(3, 0) > > /* Register offsets from gpi-top */ > #define GPII_n_CH_k_CNTXT_0_OFFS(n, k) (0x20000 + (0x4000 * (n)) + (0x80 * (k))) > @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, > dma_addr_t address; > struct gpi_tre *tre; > unsigned int i; > + u8 *buf; > + int len = 0; First use of "len" is an assignment, so you shouldn't zero-initialize it here. > > /* first create config tre if applicable */ > if (direction == DMA_MEM_TO_DEV && spi->set_config) { > @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, > tre_idx++; > > address = sg_dma_address(sgl); > - tre->dword[0] = lower_32_bits(address); > - tre->dword[1] = upper_32_bits(address); > + len = sg_dma_len(sgl); > > - tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN); > + /* Support Immediate dma for write transfers for data length up to 8 bytes */ And what happens if the developer writing the SPI driver forgets to read this comment and sets QCOM_GPI_IMMEDIATE_DMA for a 9 byte transfer? > + if ((spi->flags & QCOM_GPI_IMMEDIATE_DMA) && direction == DMA_MEM_TO_DEV) { Why is this flag introduced? If I understand the next patch, all DMA_MEM_TO_DEV transfers of <= QCOM_GPI_IMMEDIATE_DMA_LEN can use the immediate mode, so why not move the condition here? Also ordering[1]. if (direction == DMA_MEM_TO_DEV && len <= 2 * sizeof(tre->dword[0])) [1] Compare "all transfers of length 8 or less, which are mem to device" vs "all transfers which are mem to device, with a length of 8 or less". The bigger "selection criteria" is the direction, then that's fine tuned by the length query. > + buf = sg_virt(sgl); It's a question of style, but I think you could just put the sg_virt() directly in the memcpy() call and avoid the extra variable. > > - tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); > - if (direction == DMA_MEM_TO_DEV) > + /* memcpy may not always be length of 8, hence pre-fill both dword's with 0 */ The implementation of memcpy() is always more than 8 bytes, it's buf that might be less than 8 bytes ;) Also you're not "pre-filling", you're "zero-initializing", or just "initialize". That said, does it matter? Will the QUP read beyond the TRE_DMA_IMMEDIATE_LEN bytes? If so, please put _that_ in the comment ("QUP reads beyond the provided len, so additional content needs to be cleared", or similar) > + tre->dword[0] = 0; > + tre->dword[1] = 0; > + memcpy(&tre->dword[0], buf, len); > + > + tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN); Does the format of tre->dword[2] really change when TRE_FLAGS_IMMEDIATE_DMA is set, or is TRE_DMA_IMMEDIATE_LEN just a mask to highlight that len can't be more than 4 bits? It seems like you could drop TRE_DMA_IMMEDIATE_LEN and just use TRE_DMA_LEN here? (But it should match what the hardware programming guide states) Perhaps you could reduce the scope of this if/else then as well, as the assignment of of dword[2] and dword[3] is mostly the same with and without immediate mode (just the one bit to enable it) > + > + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); > tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); > + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IMMEDIATE_DMA); > + } else { > + tre->dword[0] = lower_32_bits(address); > + tre->dword[1] = upper_32_bits(address); > + > + tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN); > + > + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); > + if (direction == DMA_MEM_TO_DEV) > + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); > + } > > for (i = 0; i < tre_idx; i++) > dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0], > diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h > index 6680dd1a43c6..84598848d53a 100644 > --- a/include/linux/dma/qcom-gpi-dma.h > +++ b/include/linux/dma/qcom-gpi-dma.h > @@ -15,6 +15,10 @@ enum spi_transfer_cmd { > SPI_DUPLEX, > }; > > +#define QCOM_GPI_IMMEDIATE_DMA BIT(1) > + > +#define QCOM_GPI_IMMEDIATE_DMA_LEN 8 > + > /** > * struct gpi_spi_config - spi config for peripheral > * > @@ -30,6 +34,7 @@ enum spi_transfer_cmd { > * @cs: chip select toggle > * @set_config: set peripheral config > * @rx_len: receive length for buffer > + * @flags: true for immediate dma support Per above I think you can remove this flag, but "true for immediate DMA support" doesn't match what you have written in the code. (Also in general u8 shouldn't be "true") Regards, Bjorn > */ > struct gpi_spi_config { > u8 set_config; > @@ -44,6 +49,7 @@ struct gpi_spi_config { > u32 clk_src; > enum spi_transfer_cmd cmd; > u32 rx_len; > + u8 flags; > }; > > enum i2c_op { > -- > 2.17.1 > >
diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 52a7c8f2498f..4c5df696ddd8 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -27,6 +27,7 @@ #define TRE_FLAGS_IEOT BIT(9) #define TRE_FLAGS_BEI BIT(10) #define TRE_FLAGS_LINK BIT(11) +#define TRE_FLAGS_IMMEDIATE_DMA BIT(16) #define TRE_FLAGS_TYPE GENMASK(23, 16) /* SPI CONFIG0 WD0 */ @@ -64,6 +65,7 @@ /* DMA TRE */ #define TRE_DMA_LEN GENMASK(23, 0) +#define TRE_DMA_IMMEDIATE_LEN GENMASK(3, 0) /* Register offsets from gpi-top */ #define GPII_n_CH_k_CNTXT_0_OFFS(n, k) (0x20000 + (0x4000 * (n)) + (0x80 * (k))) @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, dma_addr_t address; struct gpi_tre *tre; unsigned int i; + u8 *buf; + int len = 0; /* first create config tre if applicable */ if (direction == DMA_MEM_TO_DEV && spi->set_config) { @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, tre_idx++; address = sg_dma_address(sgl); - tre->dword[0] = lower_32_bits(address); - tre->dword[1] = upper_32_bits(address); + len = sg_dma_len(sgl); - tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN); + /* Support Immediate dma for write transfers for data length up to 8 bytes */ + if ((spi->flags & QCOM_GPI_IMMEDIATE_DMA) && direction == DMA_MEM_TO_DEV) { + buf = sg_virt(sgl); - tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); - if (direction == DMA_MEM_TO_DEV) + /* memcpy may not always be length of 8, hence pre-fill both dword's with 0 */ + tre->dword[0] = 0; + tre->dword[1] = 0; + memcpy(&tre->dword[0], buf, len); + + tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN); + + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IMMEDIATE_DMA); + } else { + tre->dword[0] = lower_32_bits(address); + tre->dword[1] = upper_32_bits(address); + + tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN); + + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); + if (direction == DMA_MEM_TO_DEV) + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); + } for (i = 0; i < tre_idx; i++) dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0], diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h index 6680dd1a43c6..84598848d53a 100644 --- a/include/linux/dma/qcom-gpi-dma.h +++ b/include/linux/dma/qcom-gpi-dma.h @@ -15,6 +15,10 @@ enum spi_transfer_cmd { SPI_DUPLEX, }; +#define QCOM_GPI_IMMEDIATE_DMA BIT(1) + +#define QCOM_GPI_IMMEDIATE_DMA_LEN 8 + /** * struct gpi_spi_config - spi config for peripheral * @@ -30,6 +34,7 @@ enum spi_transfer_cmd { * @cs: chip select toggle * @set_config: set peripheral config * @rx_len: receive length for buffer + * @flags: true for immediate dma support */ struct gpi_spi_config { u8 set_config; @@ -44,6 +49,7 @@ struct gpi_spi_config { u32 clk_src; enum spi_transfer_cmd cmd; u32 rx_len; + u8 flags; }; enum i2c_op {
The DMA TRE(Transfer ring element) buffer contains the DMA buffer address. Accessing data from this address can cause significant delays in SPI transfers, which can be mitigated to some extent by utilizing immediate DMA support. QCOM GPI DMA hardware supports an immediate DMA feature for data up to 8 bytes, storing the data directly in the DMA TRE buffer instead of the DMA buffer address. This enhancement enables faster SPI data transfers. This optimization reduces the average transfer time from 25 us to 16 us for a single SPI transfer of 8 bytes length, with a clock frequency of 50 MHz. Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com> --- v1 -> v2: - Separated the patches to dmaengine and spi subsystems - Removed the changes which are not required for this feature from qcom-gpi-dma.h file. - Removed the type conversions used in gpi_create_spi_tre. drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++----- include/linux/dma/qcom-gpi-dma.h | 6 ++++++ 2 files changed, 33 insertions(+), 5 deletions(-)