diff mbox series

[4/7] spi: spi-geni-qcom: Add support for GPI dma

Message ID 20210111151651.1616813-5-vkoul@kernel.org (mailing list archive)
State New, archived
Headers show
Series Add and enable GPI DMA users | expand

Commit Message

Vinod Koul Jan. 11, 2021, 3:16 p.m. UTC
We can use GPI DMA for devices where it is enabled by firmware. Add
support for this mode

Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/spi/spi-geni-qcom.c | 395 +++++++++++++++++++++++++++++++++++-
 1 file changed, 384 insertions(+), 11 deletions(-)

Comments

Mark Brown Jan. 11, 2021, 4:35 p.m. UTC | #1
On Mon, Jan 11, 2021 at 08:46:48PM +0530, Vinod Koul wrote:

> +static int get_xfer_mode(struct spi_master *spi)
> +{
> +	struct spi_geni_master *mas = spi_master_get_devdata(spi);
> +	struct geni_se *se = &mas->se;
> +	int mode = GENI_SE_FIFO;

Why not use the core DMA mapping support?
Bjorn Andersson Jan. 11, 2021, 5:19 p.m. UTC | #2
On Mon 11 Jan 09:16 CST 2021, Vinod Koul wrote:

> We can use GPI DMA for devices where it is enabled by firmware. Add
> support for this mode
> 
> Signed-off-by: Vinod Koul <vkoul@kernel.org>
> ---
>  drivers/spi/spi-geni-qcom.c | 395 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 384 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
> index 512e925d5ea4..5bb0e2192734 100644
> --- a/drivers/spi/spi-geni-qcom.c
> +++ b/drivers/spi/spi-geni-qcom.c
> @@ -2,6 +2,8 @@
>  // Copyright (c) 2017-2018, The Linux foundation. All rights reserved.
>  
>  #include <linux/clk.h>
> +#include <linux/dmaengine.h>
> +#include <linux/dma-mapping.h>
>  #include <linux/interrupt.h>
>  #include <linux/io.h>
>  #include <linux/log2.h>
> @@ -10,6 +12,7 @@
>  #include <linux/pm_opp.h>
>  #include <linux/pm_runtime.h>
>  #include <linux/qcom-geni-se.h>
> +#include <linux/dma/qcom-gpi-dma.h>
>  #include <linux/spi/spi.h>
>  #include <linux/spinlock.h>
>  
> @@ -63,6 +66,35 @@
>  #define TIMESTAMP_AFTER		BIT(3)
>  #define POST_CMD_DELAY		BIT(4)
>  
> +#define GSI_LOOPBACK_EN		(BIT(0))
> +#define GSI_CS_TOGGLE		(BIT(3))
> +#define GSI_CPHA		(BIT(4))
> +#define GSI_CPOL		(BIT(5))
> +
> +#define MAX_TX_SG		(3)
> +#define NUM_SPI_XFER		(8)
> +#define SPI_XFER_TIMEOUT_MS	(250)
> +
> +struct gsi_desc_cb {
> +	struct spi_geni_master *mas;
> +	struct spi_transfer *xfer;
> +};
> +
> +struct spi_geni_gsi {
> +	dma_cookie_t tx_cookie;
> +	dma_cookie_t rx_cookie;
> +	struct dma_async_tx_descriptor *tx_desc;
> +	struct dma_async_tx_descriptor *rx_desc;
> +	struct gsi_desc_cb desc_cb;
> +};
> +
> +enum spi_m_cmd_opcode {
> +	CMD_NONE,
> +	CMD_XFER,
> +	CMD_CS,
> +	CMD_CANCEL,
> +};
> +
>  struct spi_geni_master {
>  	struct geni_se se;
>  	struct device *dev;
> @@ -79,10 +111,21 @@ struct spi_geni_master {
>  	struct completion cs_done;
>  	struct completion cancel_done;
>  	struct completion abort_done;
> +	struct completion xfer_done;
>  	unsigned int oversampling;
>  	spinlock_t lock;
>  	int irq;
>  	bool cs_flag;
> +	struct spi_geni_gsi *gsi;
> +	struct dma_chan *tx;
> +	struct dma_chan *rx;
> +	struct completion tx_cb;
> +	struct completion rx_cb;
> +	bool qn_err;
> +	int cur_xfer_mode;
> +	int num_tx_eot;
> +	int num_rx_eot;
> +	int num_xfers;
>  };
>  
>  static int get_spi_clk_cfg(unsigned int speed_hz,
> @@ -274,31 +317,269 @@ static int setup_fifo_params(struct spi_device *spi_slv,
>  	return geni_spi_set_clock_and_bw(mas, spi_slv->max_speed_hz);
>  }
>  
> +static int get_xfer_mode(struct spi_master *spi)
> +{
> +	struct spi_geni_master *mas = spi_master_get_devdata(spi);
> +	struct geni_se *se = &mas->se;
> +	int mode = GENI_SE_FIFO;

No need to initialize mode, it's overwritten in all code paths.

> +	int fifo_disable;
> +	bool dma_chan_valid;
> +
> +	fifo_disable = readl(se->base + GENI_IF_DISABLE_RO) & FIFO_IF_DISABLE;
> +	dma_chan_valid = !(IS_ERR_OR_NULL(mas->tx) || IS_ERR_OR_NULL(mas->rx));
> +
> +	/*
> +	 * If FIFO Interface is disabled and there are no DMA channels then we
> +	 * can't do this transfer.
> +	 * If FIFO interface is disabled, we can do GSI only,
> +	 * else pick FIFO mode.
> +	 */
> +	if (fifo_disable && !dma_chan_valid) {
> +		dev_err(mas->dev, "Fifo and dma mode disabled!! can't xfer\n");
> +		mode = -EINVAL;
> +	} else if (fifo_disable) {
> +		mode = GENI_GPI_DMA;
> +	} else {
> +		mode = GENI_SE_FIFO;
> +	}
> +
> +	return mode;

Maybe make this tail:

	if (!dma_chan_valid && fifo_disable)
		return -EINVAL;

	return fifo_disable ? GENI_GPI_DMA : GENI_SE_FIFO;

At least to me that's more obvious.

> +}
> +
> +static void
> +spi_gsi_callback_result(void *cb, const struct dmaengine_result *result, bool tx)
> +{
> +	struct gsi_desc_cb *gsi = cb;
> +
> +	if (result->result != DMA_TRANS_NOERROR) {
> +		dev_err(gsi->mas->dev, "%s: DMA %s txn failed\n", __func__, tx ? "tx" : "rx");

"GSI DMA %s failed\n" is just as unique, without the need for __func__.

> +		return;
> +	}
> +
> +	if (!result->residue) {
> +		dev_dbg(gsi->mas->dev, "%s\n", __func__);

You have @tx, so how about including that to make the debug print
slightly more useful?

> +		if (tx)
> +			complete(&gsi->mas->tx_cb);
> +		else
> +			complete(&gsi->mas->rx_cb);
> +	} else {
> +		dev_err(gsi->mas->dev, "DMA xfer has pending: %d\n", result->residue);
> +	}
> +}
> +
> +static void
> +spi_gsi_rx_callback_result(void *cb, const struct dmaengine_result *result)
> +{
> +	spi_gsi_callback_result(cb, result, false);
> +}
> +
> +static void
> +spi_gsi_tx_callback_result(void *cb, const struct dmaengine_result *result)
> +{
> +	spi_gsi_callback_result(cb, result, true);
> +}
> +
> +static int setup_gsi_xfer(struct spi_transfer *xfer, struct spi_geni_master *mas,
> +			  struct spi_device *spi_slv, struct spi_master *spi)
> +{
> +	int ret = 0;
> +	unsigned long flags = DMA_PREP_INTERRUPT | DMA_CTRL_ACK;
> +	struct spi_geni_gsi *gsi;
> +	struct dma_slave_config config;
> +	struct gpi_spi_config peripheral;
> +
> +	memset(&config, 0, sizeof(config));

Assign {} during the declaration and you don't need to start with a
memset.

> +	memset(&peripheral, 0, sizeof(peripheral));
> +	config.peripheral_config = &peripheral;
> +	config.peripheral_size = sizeof(peripheral);
> +
> +	if (xfer->bits_per_word != mas->cur_bits_per_word ||
> +	    xfer->speed_hz != mas->cur_speed_hz) {
> +		mas->cur_bits_per_word = xfer->bits_per_word;
> +		mas->cur_speed_hz = xfer->speed_hz;
> +		peripheral.set_config = true;
> +	}
> +
> +	if (!(mas->cur_bits_per_word % MIN_WORD_LEN)) {
> +		peripheral.rx_len = ((xfer->len << 3) / mas->cur_bits_per_word);
> +	} else {
> +		int bytes_per_word = (mas->cur_bits_per_word / BITS_PER_BYTE) + 1;
> +
> +		peripheral.rx_len = (xfer->len / bytes_per_word);
> +	}
> +
> +	if (xfer->tx_buf && xfer->rx_buf) {
> +		peripheral.cmd = SPI_DUPLEX;
> +	} else if (xfer->tx_buf) {
> +		peripheral.cmd = SPI_TX;
> +		peripheral.rx_len = 0;
> +	} else if (xfer->rx_buf) {
> +		peripheral.cmd = SPI_RX;
> +	}
> +
> +	peripheral.cs = spi_slv->chip_select;
> +
> +	if (spi_slv->mode & SPI_LOOP)
> +		peripheral.loopback_en = true;
> +	if (spi_slv->mode & SPI_CPOL)
> +		peripheral.clock_pol_high = true;
> +	if (spi_slv->mode & SPI_CPHA)
> +		peripheral.data_pol_high = true;
> +	peripheral.pack_en = true;
> +	peripheral.word_len = xfer->bits_per_word - MIN_WORD_LEN;
> +	ret = get_spi_clk_cfg(mas->cur_speed_hz, mas,
> +			      &peripheral.clk_src, &peripheral.clk_div);
> +	if (ret) {
> +		dev_err(mas->dev, "%s:Err setting clks:%d\n", __func__, ret);

Please avoid the __func__.

> +		return ret;
> +	}
> +
> +	if (!xfer->cs_change) {
> +		if (!list_is_last(&xfer->transfer_list, &spi->cur_msg->transfers))
> +			peripheral.fragmentation = FRAGMENTATION;
> +	}
> +
> +	gsi = &mas->gsi[mas->num_xfers];
> +	gsi->desc_cb.mas = mas;
> +	gsi->desc_cb.xfer = xfer;
> +	if (peripheral.cmd & SPI_RX) {
> +		dmaengine_slave_config(mas->rx, &config);
> +		gsi->rx_desc = dmaengine_prep_slave_single(mas->rx, xfer->rx_dma,
> +							   xfer->len, DMA_DEV_TO_MEM, flags);
> +		if (IS_ERR_OR_NULL(gsi->rx_desc)) {

Is this API really returning IS_ERR() or NULL on failure?

Looking at the GPI driver it seems to exclusively return NULL on failure
(for things that in most other subsystems would be -EINVAL).

> +			dev_err(mas->dev, "Err setting up rx desc\n");
> +			return -EIO;
> +		}
> +		gsi->rx_desc->callback_result = spi_gsi_rx_callback_result;
> +		gsi->rx_desc->callback_param = &gsi->desc_cb;
> +		mas->num_rx_eot++;
> +	}
> +
> +	if (peripheral.cmd & SPI_TX_ONLY)
> +		mas->num_tx_eot++;
> +
> +	dmaengine_slave_config(mas->tx, &config);
> +	gsi->tx_desc = dmaengine_prep_slave_single(mas->tx, xfer->tx_dma,
> +						   xfer->len, DMA_MEM_TO_DEV, flags);
> +
> +	if (IS_ERR_OR_NULL(gsi->tx_desc)) {

Is there anything that will clean up rx_desc when this happens?

> +		dev_err(mas->dev, "Err setting up tx desc\n");
> +		return -EIO;
> +	}
> +	gsi->tx_desc->callback_result = spi_gsi_tx_callback_result;
> +	gsi->tx_desc->callback_param = &gsi->desc_cb;
> +	if (peripheral.cmd & SPI_RX)
> +		gsi->rx_cookie = dmaengine_submit(gsi->rx_desc);
> +	gsi->tx_cookie = dmaengine_submit(gsi->tx_desc);
> +	if (peripheral.cmd & SPI_RX)
> +		dma_async_issue_pending(mas->rx);
> +	dma_async_issue_pending(mas->tx);
> +	mas->num_xfers++;
> +	return ret;

ret can only be 0 here.

> +}
> +
> +static int spi_geni_map_buf(struct spi_geni_master *mas, struct spi_message *msg)
> +{
> +	struct spi_transfer *xfer;
> +	struct device *gsi_dev = mas->dev->parent;
> +
> +	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
> +		if (xfer->rx_buf) {
> +			xfer->rx_dma = dma_map_single(gsi_dev, xfer->rx_buf,
> +						      xfer->len, DMA_FROM_DEVICE);
> +			if (dma_mapping_error(mas->dev, xfer->rx_dma)) {
> +				dev_err(mas->dev, "Err mapping buf\n");

You need to unmap rx_dma/tx_dma for all previous entries in
&msg->transfers.

> +				return -ENOMEM;
> +			}
> +		}
> +
> +		if (xfer->tx_buf) {
> +			xfer->tx_dma = dma_map_single(gsi_dev, (void *)xfer->tx_buf,
> +						      xfer->len, DMA_TO_DEVICE);
> +			if (dma_mapping_error(gsi_dev, xfer->tx_dma)) {
> +				dev_err(mas->dev, "Err mapping buf\n");
> +				dma_unmap_single(gsi_dev, xfer->rx_dma, xfer->len, DMA_FROM_DEVICE);

This should only be done if xfer->rx_buf != NULL, right?

> +				return -ENOMEM;
> +			}
> +		}
> +	};
> +
> +	return 0;
> +}
> +
> +static void spi_geni_unmap_buf(struct spi_geni_master *mas, struct spi_message *msg)
> +{
> +	struct spi_transfer *xfer;
> +	struct device *gsi_dev = mas->dev;
> +
> +	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
> +		if (xfer->rx_buf)
> +			dma_unmap_single(gsi_dev, xfer->rx_dma, xfer->len, DMA_FROM_DEVICE);
> +		if (xfer->tx_buf)
> +			dma_unmap_single(gsi_dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE);
> +	};
> +}
> +
>  static int spi_geni_prepare_message(struct spi_master *spi,
>  					struct spi_message *spi_msg)
>  {
>  	int ret;
>  	struct spi_geni_master *mas = spi_master_get_devdata(spi);
> +	struct geni_se *se = &mas->se;
> +
> +	mas->cur_xfer_mode = get_xfer_mode(spi);
> +
> +	if (mas->cur_xfer_mode == GENI_SE_FIFO) {
> +		geni_se_select_mode(se, GENI_SE_FIFO);
> +		reinit_completion(&mas->xfer_done);
> +		ret = setup_fifo_params(spi_msg->spi, spi);
> +		if (ret)
> +			dev_err(mas->dev, "Couldn't select mode %d\n", ret);

Afaict all error paths of setup_fifo_params() will have printed an error
message when we get here.

So

	switch (mas->cur_xfer_mode) {
	case GENI_SE_FIFO:
		...
		return setup_fifo_params();
	case GENI_GPI_DMA:
		...
		return spi_geni_map_buf();
	}

	return -EINVAL;

Seems cleaner to me.

> +
> +	} else if (mas->cur_xfer_mode == GENI_GPI_DMA) {
> +		mas->num_tx_eot = 0;
> +		mas->num_rx_eot = 0;
> +		mas->num_xfers = 0;
> +		reinit_completion(&mas->tx_cb);
> +		reinit_completion(&mas->rx_cb);
> +		memset(mas->gsi, 0, (sizeof(struct spi_geni_gsi) * NUM_SPI_XFER));
> +		geni_se_select_mode(se, GENI_GPI_DMA);
> +		ret = spi_geni_map_buf(mas, spi_msg);
> +
> +	} else {
> +		dev_err(mas->dev, "%s: Couldn't select mode %d", __func__, mas->cur_xfer_mode);
> +		ret = -EINVAL;
> +	}
>  
> -	ret = setup_fifo_params(spi_msg->spi, spi);
> -	if (ret)
> -		dev_err(mas->dev, "Couldn't select mode %d\n", ret);
>  	return ret;
>  }
>  
> +static int spi_geni_unprepare_message(struct spi_master *spi_mas, struct spi_message *spi_msg)
> +{
> +	struct spi_geni_master *mas = spi_master_get_devdata(spi_mas);
> +
> +	mas->cur_speed_hz = 0;
> +	mas->cur_bits_per_word = 0;
> +	if (mas->cur_xfer_mode == GENI_GPI_DMA)
> +		spi_geni_unmap_buf(mas, spi_msg);
> +	return 0;
> +}
> +
>  static int spi_geni_init(struct spi_geni_master *mas)
>  {
>  	struct geni_se *se = &mas->se;
>  	unsigned int proto, major, minor, ver;
>  	u32 spi_tx_cfg;
> +	size_t gsi_sz;
> +	int ret = 0;
>  
>  	pm_runtime_get_sync(mas->dev);
>  
>  	proto = geni_se_read_proto(se);
>  	if (proto != GENI_SE_SPI) {
>  		dev_err(mas->dev, "Invalid proto %d\n", proto);
> -		pm_runtime_put(mas->dev);
> -		return -ENXIO;
> +		ret = -ENXIO;
> +		goto out_pm;
>  	}
>  	mas->tx_fifo_depth = geni_se_get_tx_fifo_depth(se);
>  
> @@ -328,8 +609,34 @@ static int spi_geni_init(struct spi_geni_master *mas)
>  	spi_tx_cfg &= ~CS_TOGGLE;
>  	writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG);
>  
> +	mas->tx = dma_request_slave_channel(mas->dev, "tx");
> +	if (IS_ERR_OR_NULL(mas->tx)) {
> +		dev_err(mas->dev, "Failed to get tx DMA ch %ld", PTR_ERR(mas->tx));

I first wrote a rant about breaking backwards compatibility, but then at
last realized that this hunk doesn't actually modify "ret", so all this
error handling - and error printing - might still result in a successful
exit.

I also don't think it's accurate to have all errors treated the same
way, e.g. EPROBE_DEFER shouldn't be treated the same way as "no dma
property specified".

> +		ret = PTR_ERR(mas->tx);
> +		goto out_pm;
> +	} else {
> +		mas->rx = dma_request_slave_channel(mas->dev, "rx");
> +		if (IS_ERR_OR_NULL(mas->rx)) {
> +			dev_err(mas->dev, "Failed to get rx DMA ch %ld", PTR_ERR(mas->rx));
> +			dma_release_channel(mas->tx);
> +			ret = PTR_ERR(mas->rx);

Per the use of IS_ERR_OR_NULL() ret might become 0 here.

> +			goto out_pm;
> +		}
> +
> +		gsi_sz = sizeof(struct spi_geni_gsi) * NUM_SPI_XFER;
> +		mas->gsi = devm_kzalloc(mas->dev, gsi_sz, GFP_KERNEL);
> +		if (IS_ERR_OR_NULL(mas->gsi)) {

We got both rx and tx, but then this allocation failed, is that reason
for returning success without dma operational? (I'm not sure what the
right thing to do).

But checking for allocation failure isn't done with IS_ERR().

> +			dma_release_channel(mas->tx);

If you spin this hunk off into its own function then you can use the
idiomatic goto cleanup scheme and not have to repeat yourself here.

> +			dma_release_channel(mas->rx);
> +			mas->tx = NULL;
> +			mas->rx = NULL;
> +			goto out_pm;
> +		}
> +	}
> +
> +out_pm:
>  	pm_runtime_put(mas->dev);
> -	return 0;
> +	return ret;
>  }
>  
>  static unsigned int geni_byte_per_fifo_word(struct spi_geni_master *mas)
> @@ -420,6 +727,7 @@ static void setup_fifo_xfer(struct spi_transfer *xfer,
>  {
>  	u32 m_cmd = 0;
>  	u32 len;
> +	u32 m_param = 0;
>  	struct geni_se *se = &mas->se;
>  	int ret;
>  
> @@ -457,6 +765,11 @@ static void setup_fifo_xfer(struct spi_transfer *xfer,
>  		len = xfer->len / (mas->cur_bits_per_word / BITS_PER_BYTE + 1);
>  	len &= TRANS_LEN_MSK;
>  
> +	if (!xfer->cs_change) {
> +		if (!list_is_last(&xfer->transfer_list, &spi->cur_msg->transfers))
> +			m_param |= FRAGMENTATION;
> +	}
> +
>  	mas->cur_xfer = xfer;
>  	if (xfer->tx_buf) {
>  		m_cmd |= SPI_TX_ONLY;
> @@ -475,7 +788,7 @@ static void setup_fifo_xfer(struct spi_transfer *xfer,
>  	 * interrupt could come in at any time now.
>  	 */
>  	spin_lock_irq(&mas->lock);
> -	geni_se_setup_m_cmd(se, m_cmd, FRAGMENTATION);
> +	geni_se_setup_m_cmd(se, m_cmd, m_param);
>  
>  	/*
>  	 * TX_WATERMARK_REG should be set after SPI configuration and
> @@ -494,13 +807,52 @@ static int spi_geni_transfer_one(struct spi_master *spi,
>  				struct spi_transfer *xfer)
>  {
>  	struct spi_geni_master *mas = spi_master_get_devdata(spi);
> +	unsigned long timeout, jiffies;
> +	int ret = 0i, i;

0i? Is that a sign of you using vim?

>  
>  	/* Terminate and return success for 0 byte length transfer */
>  	if (!xfer->len)
> -		return 0;
> +		return ret;
> +
> +	if (mas->cur_xfer_mode == GENI_SE_FIFO) {
> +		setup_fifo_xfer(xfer, mas, slv->mode, spi);
> +	} else {
> +		setup_gsi_xfer(xfer, mas, slv, spi);
> +		if (mas->num_xfers >= NUM_SPI_XFER ||
> +		    (list_is_last(&xfer->transfer_list, &spi->cur_msg->transfers))) {
> +			for (i = 0 ; i < mas->num_tx_eot; i++) {
> +				jiffies = msecs_to_jiffies(SPI_XFER_TIMEOUT_MS);
> +				timeout = wait_for_completion_timeout(&mas->tx_cb, jiffies);
> +				if (timeout <= 0) {
> +					dev_err(mas->dev, "Tx[%d] timeout%lu\n", i, timeout);
> +					ret = -ETIMEDOUT;
> +					goto err_gsi_geni_transfer_one;
> +				}
> +				spi_finalize_current_transfer(spi);
> +			}
> +			for (i = 0 ; i < mas->num_rx_eot; i++) {
> +				jiffies = msecs_to_jiffies(SPI_XFER_TIMEOUT_MS);
> +				timeout = wait_for_completion_timeout(&mas->tx_cb, jiffies);
> +				if (timeout <= 0) {
> +					dev_err(mas->dev, "Rx[%d] timeout%lu\n", i, timeout);
> +					ret = -ETIMEDOUT;
> +					goto err_gsi_geni_transfer_one;
> +				}
> +				spi_finalize_current_transfer(spi);
> +			}
> +			if (mas->qn_err) {
> +				ret = -EIO;
> +				mas->qn_err = false;
> +				goto err_gsi_geni_transfer_one;
> +			}
> +		}
> +	}
>  
> -	setup_fifo_xfer(xfer, mas, slv->mode, spi);
> -	return 1;
> +	return ret;

All assignments to "ret" are followed by a goto
err_gsi_geni_transfer_one, so the only time you actually get here is
with ret has been untouched...in which case it's now 0, rather than the
1 it was previously.

> +
> +err_gsi_geni_transfer_one:
> +	dmaengine_terminate_all(mas->tx);
> +	return ret;
>  }
>  
>  static irqreturn_t geni_spi_isr(int irq, void *data)
> @@ -595,6 +947,15 @@ static int spi_geni_probe(struct platform_device *pdev)
>  	if (irq < 0)
>  		return irq;
>  
> +	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
> +	if (ret) {
> +		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
> +		if (ret) {
> +			dev_err(&pdev->dev, "could not set DMA mask\n");
> +			return ret;
> +		}
> +	}
> +
>  	base = devm_platform_ioremap_resource(pdev, 0);
>  	if (IS_ERR(base))
>  		return PTR_ERR(base);
> @@ -632,15 +993,18 @@ static int spi_geni_probe(struct platform_device *pdev)
>  	spi->num_chipselect = 4;
>  	spi->max_speed_hz = 50000000;
>  	spi->prepare_message = spi_geni_prepare_message;
> +	spi->unprepare_message = spi_geni_unprepare_message;
>  	spi->transfer_one = spi_geni_transfer_one;
>  	spi->auto_runtime_pm = true;
>  	spi->handle_err = handle_fifo_timeout;
> -	spi->set_cs = spi_geni_set_cs;
>  	spi->use_gpio_descriptors = true;
>  
>  	init_completion(&mas->cs_done);
>  	init_completion(&mas->cancel_done);
>  	init_completion(&mas->abort_done);
> +	init_completion(&mas->xfer_done);
> +	init_completion(&mas->tx_cb);
> +	init_completion(&mas->rx_cb);
>  	spin_lock_init(&mas->lock);
>  	pm_runtime_use_autosuspend(&pdev->dev);
>  	pm_runtime_set_autosuspend_delay(&pdev->dev, 250);
> @@ -661,6 +1025,15 @@ static int spi_geni_probe(struct platform_device *pdev)
>  	if (ret)
>  		goto spi_geni_probe_runtime_disable;
>  
> +	/*
> +	 * query the mode supported and set_cs for fifo mode only
> +	 * for dma (gsi) mode, the gsi will set cs based on params passed in
> +	 * TRE
> +	 */

Is there no SE_DMA mode for SPI? (I know it's not implemented right now)
If we get there this would be cur_xfer_mode != GENI_GPI_DMA?

Regards,
Bjorn

> +	mas->cur_xfer_mode = get_xfer_mode(spi);
> +	if (mas->cur_xfer_mode == GENI_SE_FIFO)
> +		spi->set_cs = spi_geni_set_cs;
> +
>  	ret = request_irq(mas->irq, geni_spi_isr, 0, dev_name(dev), spi);
>  	if (ret)
>  		goto spi_geni_probe_runtime_disable;
> -- 
> 2.26.2
>
Lukas Wunner Jan. 12, 2021, 7:31 a.m. UTC | #3
On Mon, Jan 11, 2021 at 08:46:48PM +0530, Vinod Koul wrote:
> @@ -328,8 +609,34 @@ static int spi_geni_init(struct spi_geni_master *mas)
>  	spi_tx_cfg &= ~CS_TOGGLE;
>  	writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG);
>  
> +	mas->tx = dma_request_slave_channel(mas->dev, "tx");
> +	if (IS_ERR_OR_NULL(mas->tx)) {
> +		dev_err(mas->dev, "Failed to get tx DMA ch %ld", PTR_ERR(mas->tx));
> +		ret = PTR_ERR(mas->tx);
> +		goto out_pm;
> +	} else {
> +		mas->rx = dma_request_slave_channel(mas->dev, "rx");
> +		if (IS_ERR_OR_NULL(mas->rx)) {
> +			dev_err(mas->dev, "Failed to get rx DMA ch %ld", PTR_ERR(mas->rx));
> +			dma_release_channel(mas->tx);
> +			ret = PTR_ERR(mas->rx);
> +			goto out_pm;
> +		}

These channels need to be released in spi_geni_remove().

Also, you may want to fall back to PIO mode if channel allocation fails.

Thanks,

Lukas
Chen, Rong A Jan. 12, 2021, 11:02 a.m. UTC | #4
Hi Vinod,

I love your patch! Perhaps something to improve:

[auto build test WARNING on v5.11-rc2]
[cannot apply to robh/for-next spi/for-next wsa/i2c/for-next linus/master v5.11-rc3 next-20210111]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Vinod-Koul/Add-and-enable-GPI-DMA-users/20210111-232052
base:    e71ba9452f0b5b2e8dc8aa5445198cd9214a6a62
config: mips-randconfig-s031-20210111 (attached as .config)
compiler: mipsel-linux-gcc (GCC) 9.3.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-208-g46a52ca4-dirty
        # https://github.com/0day-ci/linux/commit/bcc874d4e76c4129ee33077e2828a311b2edcb96
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Vinod-Koul/Add-and-enable-GPI-DMA-users/20210111-232052
        git checkout bcc874d4e76c4129ee33077e2828a311b2edcb96
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=mips 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <rong.a.chen@intel.com>


"sparse warnings: (new ones prefixed by >>)"
   command-line: note: in included file:
   builtin:1:9: sparse: sparse: preprocessor token __ATOMIC_ACQUIRE redefined
   builtin:0:0: sparse: this was the original definition
   builtin:1:9: sparse: sparse: preprocessor token __ATOMIC_SEQ_CST redefined
   builtin:0:0: sparse: this was the original definition
   builtin:1:9: sparse: sparse: preprocessor token __ATOMIC_ACQ_REL redefined
   builtin:0:0: sparse: this was the original definition
   builtin:1:9: sparse: sparse: preprocessor token __ATOMIC_RELEASE redefined
   builtin:0:0: sparse: this was the original definition
>> drivers/spi/spi-geni-qcom.c:811:19: sparse: sparse: constant 0i is not a valid number

vim +811 drivers/spi/spi-geni-qcom.c

561de45f72bd5f9b Girish Mahadevan 2018-10-03  804  
561de45f72bd5f9b Girish Mahadevan 2018-10-03  805  static int spi_geni_transfer_one(struct spi_master *spi,
561de45f72bd5f9b Girish Mahadevan 2018-10-03  806  				struct spi_device *slv,
561de45f72bd5f9b Girish Mahadevan 2018-10-03  807  				struct spi_transfer *xfer)
561de45f72bd5f9b Girish Mahadevan 2018-10-03  808  {
561de45f72bd5f9b Girish Mahadevan 2018-10-03  809  	struct spi_geni_master *mas = spi_master_get_devdata(spi);
bcc874d4e76c4129 Vinod Koul       2021-01-11  810  	unsigned long timeout, jiffies;
bcc874d4e76c4129 Vinod Koul       2021-01-11 @811  	int ret = 0i, i;
561de45f72bd5f9b Girish Mahadevan 2018-10-03  812  
561de45f72bd5f9b Girish Mahadevan 2018-10-03  813  	/* Terminate and return success for 0 byte length transfer */
561de45f72bd5f9b Girish Mahadevan 2018-10-03  814  	if (!xfer->len)
bcc874d4e76c4129 Vinod Koul       2021-01-11  815  		return ret;
561de45f72bd5f9b Girish Mahadevan 2018-10-03  816  
bcc874d4e76c4129 Vinod Koul       2021-01-11  817  	if (mas->cur_xfer_mode == GENI_SE_FIFO) {
561de45f72bd5f9b Girish Mahadevan 2018-10-03  818  		setup_fifo_xfer(xfer, mas, slv->mode, spi);
bcc874d4e76c4129 Vinod Koul       2021-01-11  819  	} else {
bcc874d4e76c4129 Vinod Koul       2021-01-11  820  		setup_gsi_xfer(xfer, mas, slv, spi);
bcc874d4e76c4129 Vinod Koul       2021-01-11  821  		if (mas->num_xfers >= NUM_SPI_XFER ||
bcc874d4e76c4129 Vinod Koul       2021-01-11  822  		    (list_is_last(&xfer->transfer_list, &spi->cur_msg->transfers))) {
bcc874d4e76c4129 Vinod Koul       2021-01-11  823  			for (i = 0 ; i < mas->num_tx_eot; i++) {
bcc874d4e76c4129 Vinod Koul       2021-01-11  824  				jiffies = msecs_to_jiffies(SPI_XFER_TIMEOUT_MS);
bcc874d4e76c4129 Vinod Koul       2021-01-11  825  				timeout = wait_for_completion_timeout(&mas->tx_cb, jiffies);
bcc874d4e76c4129 Vinod Koul       2021-01-11  826  				if (timeout <= 0) {
bcc874d4e76c4129 Vinod Koul       2021-01-11  827  					dev_err(mas->dev, "Tx[%d] timeout%lu\n", i, timeout);
bcc874d4e76c4129 Vinod Koul       2021-01-11  828  					ret = -ETIMEDOUT;
bcc874d4e76c4129 Vinod Koul       2021-01-11  829  					goto err_gsi_geni_transfer_one;
bcc874d4e76c4129 Vinod Koul       2021-01-11  830  				}
bcc874d4e76c4129 Vinod Koul       2021-01-11  831  				spi_finalize_current_transfer(spi);
bcc874d4e76c4129 Vinod Koul       2021-01-11  832  			}
bcc874d4e76c4129 Vinod Koul       2021-01-11  833  			for (i = 0 ; i < mas->num_rx_eot; i++) {
bcc874d4e76c4129 Vinod Koul       2021-01-11  834  				jiffies = msecs_to_jiffies(SPI_XFER_TIMEOUT_MS);
bcc874d4e76c4129 Vinod Koul       2021-01-11  835  				timeout = wait_for_completion_timeout(&mas->tx_cb, jiffies);
bcc874d4e76c4129 Vinod Koul       2021-01-11  836  				if (timeout <= 0) {
bcc874d4e76c4129 Vinod Koul       2021-01-11  837  					dev_err(mas->dev, "Rx[%d] timeout%lu\n", i, timeout);
bcc874d4e76c4129 Vinod Koul       2021-01-11  838  					ret = -ETIMEDOUT;
bcc874d4e76c4129 Vinod Koul       2021-01-11  839  					goto err_gsi_geni_transfer_one;
bcc874d4e76c4129 Vinod Koul       2021-01-11  840  				}
bcc874d4e76c4129 Vinod Koul       2021-01-11  841  				spi_finalize_current_transfer(spi);
bcc874d4e76c4129 Vinod Koul       2021-01-11  842  			}
bcc874d4e76c4129 Vinod Koul       2021-01-11  843  			if (mas->qn_err) {
bcc874d4e76c4129 Vinod Koul       2021-01-11  844  				ret = -EIO;
bcc874d4e76c4129 Vinod Koul       2021-01-11  845  				mas->qn_err = false;
bcc874d4e76c4129 Vinod Koul       2021-01-11  846  				goto err_gsi_geni_transfer_one;
bcc874d4e76c4129 Vinod Koul       2021-01-11  847  			}
bcc874d4e76c4129 Vinod Koul       2021-01-11  848  		}
bcc874d4e76c4129 Vinod Koul       2021-01-11  849  	}
bcc874d4e76c4129 Vinod Koul       2021-01-11  850  
bcc874d4e76c4129 Vinod Koul       2021-01-11  851  	return ret;
bcc874d4e76c4129 Vinod Koul       2021-01-11  852  
bcc874d4e76c4129 Vinod Koul       2021-01-11  853  err_gsi_geni_transfer_one:
bcc874d4e76c4129 Vinod Koul       2021-01-11  854  	dmaengine_terminate_all(mas->tx);
bcc874d4e76c4129 Vinod Koul       2021-01-11  855  	return ret;
561de45f72bd5f9b Girish Mahadevan 2018-10-03  856  }
561de45f72bd5f9b Girish Mahadevan 2018-10-03  857  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Doug Anderson Jan. 13, 2021, 12:01 a.m. UTC | #5
Hi,

On Mon, Jan 11, 2021 at 7:17 AM Vinod Koul <vkoul@kernel.org> wrote:
>
> We can use GPI DMA for devices where it is enabled by firmware. Add
> support for this mode
>
> Signed-off-by: Vinod Koul <vkoul@kernel.org>
> ---
>  drivers/spi/spi-geni-qcom.c | 395 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 384 insertions(+), 11 deletions(-)

I did a somewhat cursory review, mostly focusing on making sure that
the non-GPI/GSI stuff doesn't regress.  ;-)  I think you've already
got a bunch of feedback for v2 so I'll plan to look back when I see
the v2 and maybe will find time to look at some of the GSI/GPI stuff
too...


> diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
> index 512e925d5ea4..5bb0e2192734 100644
> --- a/drivers/spi/spi-geni-qcom.c
> +++ b/drivers/spi/spi-geni-qcom.c
> @@ -2,6 +2,8 @@
>  // Copyright (c) 2017-2018, The Linux foundation. All rights reserved.
>
>  #include <linux/clk.h>
> +#include <linux/dmaengine.h>
> +#include <linux/dma-mapping.h>
>  #include <linux/interrupt.h>
>  #include <linux/io.h>
>  #include <linux/log2.h>
> @@ -10,6 +12,7 @@
>  #include <linux/pm_opp.h>
>  #include <linux/pm_runtime.h>
>  #include <linux/qcom-geni-se.h>
> +#include <linux/dma/qcom-gpi-dma.h>

nit: sort ordering doesn't match other includes.  It seems like
existing includes in this file are sorted ignoring subdirs.


>  static int spi_geni_prepare_message(struct spi_master *spi,
>                                         struct spi_message *spi_msg)
>  {
>         int ret;
>         struct spi_geni_master *mas = spi_master_get_devdata(spi);
> +       struct geni_se *se = &mas->se;
> +
> +       mas->cur_xfer_mode = get_xfer_mode(spi);
> +
> +       if (mas->cur_xfer_mode == GENI_SE_FIFO) {
> +               geni_se_select_mode(se, GENI_SE_FIFO);

You don't need to do this over and over again.  We set up FIFO mode in
spi_geni_init() and it'll never change.


> +               reinit_completion(&mas->xfer_done);
> +               ret = setup_fifo_params(spi_msg->spi, spi);
> +               if (ret)
> +                       dev_err(mas->dev, "Couldn't select mode %d\n", ret);
> +
> +       } else if (mas->cur_xfer_mode == GENI_GPI_DMA) {
> +               mas->num_tx_eot = 0;
> +               mas->num_rx_eot = 0;
> +               mas->num_xfers = 0;
> +               reinit_completion(&mas->tx_cb);
> +               reinit_completion(&mas->rx_cb);
> +               memset(mas->gsi, 0, (sizeof(struct spi_geni_gsi) * NUM_SPI_XFER));
> +               geni_se_select_mode(se, GENI_GPI_DMA);
> +               ret = spi_geni_map_buf(mas, spi_msg);
> +

Extra blank line?

> +       } else {
> +               dev_err(mas->dev, "%s: Couldn't select mode %d", __func__, mas->cur_xfer_mode);

Please no __func__ in error messages unless you're doing a non-"dev"
print.  If you want to fill your log with function names you should
redefine the generic dev_xxx() functions to prefix "__func__" in your
own kernel.  You probably don't even need a printout here since
get_xfer_mode() already printed.


> +static int spi_geni_unprepare_message(struct spi_master *spi_mas, struct spi_message *spi_msg)
> +{
> +       struct spi_geni_master *mas = spi_master_get_devdata(spi_mas);
> +
> +       mas->cur_speed_hz = 0;
> +       mas->cur_bits_per_word = 0;

I think doing the above zeros will make the code a bunch slower for
FIFO mode.  Specifically we can avoid a whole bunch of (very slow)
interconnect code if the speed doesn't change between transfers and
the runtime PM auto power down hasn't hit.


> @@ -328,8 +609,34 @@ static int spi_geni_init(struct spi_geni_master *mas)
>         spi_tx_cfg &= ~CS_TOGGLE;
>         writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG);
>
> +       mas->tx = dma_request_slave_channel(mas->dev, "tx");
> +       if (IS_ERR_OR_NULL(mas->tx)) {

I didn't look too closely at this since I think Mark wanted you to
look into the core DMA support, but...

In general, don't you only need to do the DMA requests if you're in GPI mode?


> +               dev_err(mas->dev, "Failed to get tx DMA ch %ld", PTR_ERR(mas->tx));
> +               ret = PTR_ERR(mas->tx);
> +               goto out_pm;
> +       } else {

No need for else since last "if" ended up with goto".


> +               mas->rx = dma_request_slave_channel(mas->dev, "rx");
> +               if (IS_ERR_OR_NULL(mas->rx)) {
> +                       dev_err(mas->dev, "Failed to get rx DMA ch %ld", PTR_ERR(mas->rx));
> +                       dma_release_channel(mas->tx);
> +                       ret = PTR_ERR(mas->rx);
> +                       goto out_pm;
> +               }
> +
> +               gsi_sz = sizeof(struct spi_geni_gsi) * NUM_SPI_XFER;
> +               mas->gsi = devm_kzalloc(mas->dev, gsi_sz, GFP_KERNEL);
> +               if (IS_ERR_OR_NULL(mas->gsi)) {

Is it ever an error?  Just check against NULL?


> +                       dma_release_channel(mas->tx);
> +                       dma_release_channel(mas->rx);
> +                       mas->tx = NULL;
> +                       mas->rx = NULL;

ret = -ENOMEM ?


>  static unsigned int geni_byte_per_fifo_word(struct spi_geni_master *mas)
> @@ -457,6 +765,11 @@ static void setup_fifo_xfer(struct spi_transfer *xfer,
>                 len = xfer->len / (mas->cur_bits_per_word / BITS_PER_BYTE + 1);
>         len &= TRANS_LEN_MSK;
>
> +       if (!xfer->cs_change) {
> +               if (!list_is_last(&xfer->transfer_list, &spi->cur_msg->transfers))
> +                       m_param |= FRAGMENTATION;
> +       }

Why are you changing this?  It's for FIFO mode which works correctly
the way it is.  We _always_ want the FRAGMENTATION bit set because we
explicitly set the CS.  I haven't tried it, but I'd imagine this
change breaks stuff?  I'd expect all changes in setup_fifo_xfer() to
be removed from your patch.  If there's some reason you need them then
post a separate patch.


> @@ -494,13 +807,52 @@ static int spi_geni_transfer_one(struct spi_master *spi,
>                                 struct spi_transfer *xfer)
>  {
>         struct spi_geni_master *mas = spi_master_get_devdata(spi);
> +       unsigned long timeout, jiffies;

Doesn't this shadow the global "jiffies"?


> +       int ret = 0i, i;
>
>         /* Terminate and return success for 0 byte length transfer */
>         if (!xfer->len)
> -               return 0;
> +               return ret;

It feels more documenting to just leave this as "return 0".


> +
> +       if (mas->cur_xfer_mode == GENI_SE_FIFO) {
> +               setup_fifo_xfer(xfer, mas, slv->mode, spi);

It's super important to return "1" in this case to tell the SPI core
that you left the transfer in progress.  You don't do that anymore, so
boom.


> +       } else {
> +               setup_gsi_xfer(xfer, mas, slv, spi);

This feels very non-symmetric.  In the FIFO case you just call a
function.  in the GSI case you have a whole pile of stuff inline.  Can
all the stuff below be stuck in setup_gsi_xfer() or maybe you can add
an extra wrapper function?  That means you don't need the weird goto
flow in this function...

> @@ -661,6 +1025,15 @@ static int spi_geni_probe(struct platform_device *pdev)
>         if (ret)
>                 goto spi_geni_probe_runtime_disable;
>
> +       /*
> +        * query the mode supported and set_cs for fifo mode only
> +        * for dma (gsi) mode, the gsi will set cs based on params passed in
> +        * TRE
> +        */
> +       mas->cur_xfer_mode = get_xfer_mode(spi);
> +       if (mas->cur_xfer_mode == GENI_SE_FIFO)

nit: check against != GPI mode?
Vinod Koul Jan. 13, 2021, 3:24 a.m. UTC | #6
On 12-01-21, 16:01, Doug Anderson wrote:
> Hi,
> 
> On Mon, Jan 11, 2021 at 7:17 AM Vinod Koul <vkoul@kernel.org> wrote:
> >
> > We can use GPI DMA for devices where it is enabled by firmware. Add
> > support for this mode
> >
> > Signed-off-by: Vinod Koul <vkoul@kernel.org>
> > ---
> >  drivers/spi/spi-geni-qcom.c | 395 +++++++++++++++++++++++++++++++++++-
> >  1 file changed, 384 insertions(+), 11 deletions(-)
> 
> I did a somewhat cursory review, mostly focusing on making sure that
> the non-GPI/GSI stuff doesn't regress.  ;-)  I think you've already
> got a bunch of feedback for v2 so I'll plan to look back when I see
> the v2 and maybe will find time to look at some of the GSI/GPI stuff
> too...

Thanks for the comments, I will update the comments and post v2. All the
below comments look good to me, I will respin..


> > diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
> > index 512e925d5ea4..5bb0e2192734 100644
> > --- a/drivers/spi/spi-geni-qcom.c
> > +++ b/drivers/spi/spi-geni-qcom.c
> > @@ -2,6 +2,8 @@
> >  // Copyright (c) 2017-2018, The Linux foundation. All rights reserved.
> >
> >  #include <linux/clk.h>
> > +#include <linux/dmaengine.h>
> > +#include <linux/dma-mapping.h>
> >  #include <linux/interrupt.h>
> >  #include <linux/io.h>
> >  #include <linux/log2.h>
> > @@ -10,6 +12,7 @@
> >  #include <linux/pm_opp.h>
> >  #include <linux/pm_runtime.h>
> >  #include <linux/qcom-geni-se.h>
> > +#include <linux/dma/qcom-gpi-dma.h>
> 
> nit: sort ordering doesn't match other includes.  It seems like
> existing includes in this file are sorted ignoring subdirs.
> 
> 
> >  static int spi_geni_prepare_message(struct spi_master *spi,
> >                                         struct spi_message *spi_msg)
> >  {
> >         int ret;
> >         struct spi_geni_master *mas = spi_master_get_devdata(spi);
> > +       struct geni_se *se = &mas->se;
> > +
> > +       mas->cur_xfer_mode = get_xfer_mode(spi);
> > +
> > +       if (mas->cur_xfer_mode == GENI_SE_FIFO) {
> > +               geni_se_select_mode(se, GENI_SE_FIFO);
> 
> You don't need to do this over and over again.  We set up FIFO mode in
> spi_geni_init() and it'll never change.
> 
> 
> > +               reinit_completion(&mas->xfer_done);
> > +               ret = setup_fifo_params(spi_msg->spi, spi);
> > +               if (ret)
> > +                       dev_err(mas->dev, "Couldn't select mode %d\n", ret);
> > +
> > +       } else if (mas->cur_xfer_mode == GENI_GPI_DMA) {
> > +               mas->num_tx_eot = 0;
> > +               mas->num_rx_eot = 0;
> > +               mas->num_xfers = 0;
> > +               reinit_completion(&mas->tx_cb);
> > +               reinit_completion(&mas->rx_cb);
> > +               memset(mas->gsi, 0, (sizeof(struct spi_geni_gsi) * NUM_SPI_XFER));
> > +               geni_se_select_mode(se, GENI_GPI_DMA);
> > +               ret = spi_geni_map_buf(mas, spi_msg);
> > +
> 
> Extra blank line?
> 
> > +       } else {
> > +               dev_err(mas->dev, "%s: Couldn't select mode %d", __func__, mas->cur_xfer_mode);
> 
> Please no __func__ in error messages unless you're doing a non-"dev"
> print.  If you want to fill your log with function names you should
> redefine the generic dev_xxx() functions to prefix "__func__" in your
> own kernel.  You probably don't even need a printout here since
> get_xfer_mode() already printed.
> 
> 
> > +static int spi_geni_unprepare_message(struct spi_master *spi_mas, struct spi_message *spi_msg)
> > +{
> > +       struct spi_geni_master *mas = spi_master_get_devdata(spi_mas);
> > +
> > +       mas->cur_speed_hz = 0;
> > +       mas->cur_bits_per_word = 0;
> 
> I think doing the above zeros will make the code a bunch slower for
> FIFO mode.  Specifically we can avoid a whole bunch of (very slow)
> interconnect code if the speed doesn't change between transfers and
> the runtime PM auto power down hasn't hit.
> 
> 
> > @@ -328,8 +609,34 @@ static int spi_geni_init(struct spi_geni_master *mas)
> >         spi_tx_cfg &= ~CS_TOGGLE;
> >         writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG);
> >
> > +       mas->tx = dma_request_slave_channel(mas->dev, "tx");
> > +       if (IS_ERR_OR_NULL(mas->tx)) {
> 
> I didn't look too closely at this since I think Mark wanted you to
> look into the core DMA support, but...
> 
> In general, don't you only need to do the DMA requests if you're in GPI mode?
> 
> 
> > +               dev_err(mas->dev, "Failed to get tx DMA ch %ld", PTR_ERR(mas->tx));
> > +               ret = PTR_ERR(mas->tx);
> > +               goto out_pm;
> > +       } else {
> 
> No need for else since last "if" ended up with goto".
> 
> 
> > +               mas->rx = dma_request_slave_channel(mas->dev, "rx");
> > +               if (IS_ERR_OR_NULL(mas->rx)) {
> > +                       dev_err(mas->dev, "Failed to get rx DMA ch %ld", PTR_ERR(mas->rx));
> > +                       dma_release_channel(mas->tx);
> > +                       ret = PTR_ERR(mas->rx);
> > +                       goto out_pm;
> > +               }
> > +
> > +               gsi_sz = sizeof(struct spi_geni_gsi) * NUM_SPI_XFER;
> > +               mas->gsi = devm_kzalloc(mas->dev, gsi_sz, GFP_KERNEL);
> > +               if (IS_ERR_OR_NULL(mas->gsi)) {
> 
> Is it ever an error?  Just check against NULL?
> 
> 
> > +                       dma_release_channel(mas->tx);
> > +                       dma_release_channel(mas->rx);
> > +                       mas->tx = NULL;
> > +                       mas->rx = NULL;
> 
> ret = -ENOMEM ?
> 
> 
> >  static unsigned int geni_byte_per_fifo_word(struct spi_geni_master *mas)
> > @@ -457,6 +765,11 @@ static void setup_fifo_xfer(struct spi_transfer *xfer,
> >                 len = xfer->len / (mas->cur_bits_per_word / BITS_PER_BYTE + 1);
> >         len &= TRANS_LEN_MSK;
> >
> > +       if (!xfer->cs_change) {
> > +               if (!list_is_last(&xfer->transfer_list, &spi->cur_msg->transfers))
> > +                       m_param |= FRAGMENTATION;
> > +       }
> 
> Why are you changing this?  It's for FIFO mode which works correctly
> the way it is.  We _always_ want the FRAGMENTATION bit set because we
> explicitly set the CS.  I haven't tried it, but I'd imagine this
> change breaks stuff?  I'd expect all changes in setup_fifo_xfer() to
> be removed from your patch.  If there's some reason you need them then
> post a separate patch.
> 
> 
> > @@ -494,13 +807,52 @@ static int spi_geni_transfer_one(struct spi_master *spi,
> >                                 struct spi_transfer *xfer)
> >  {
> >         struct spi_geni_master *mas = spi_master_get_devdata(spi);
> > +       unsigned long timeout, jiffies;
> 
> Doesn't this shadow the global "jiffies"?
> 
> 
> > +       int ret = 0i, i;
> >
> >         /* Terminate and return success for 0 byte length transfer */
> >         if (!xfer->len)
> > -               return 0;
> > +               return ret;
> 
> It feels more documenting to just leave this as "return 0".
> 
> 
> > +
> > +       if (mas->cur_xfer_mode == GENI_SE_FIFO) {
> > +               setup_fifo_xfer(xfer, mas, slv->mode, spi);
> 
> It's super important to return "1" in this case to tell the SPI core
> that you left the transfer in progress.  You don't do that anymore, so
> boom.
> 
> 
> > +       } else {
> > +               setup_gsi_xfer(xfer, mas, slv, spi);
> 
> This feels very non-symmetric.  In the FIFO case you just call a
> function.  in the GSI case you have a whole pile of stuff inline.  Can
> all the stuff below be stuck in setup_gsi_xfer() or maybe you can add
> an extra wrapper function?  That means you don't need the weird goto
> flow in this function...
> 
> > @@ -661,6 +1025,15 @@ static int spi_geni_probe(struct platform_device *pdev)
> >         if (ret)
> >                 goto spi_geni_probe_runtime_disable;
> >
> > +       /*
> > +        * query the mode supported and set_cs for fifo mode only
> > +        * for dma (gsi) mode, the gsi will set cs based on params passed in
> > +        * TRE
> > +        */
> > +       mas->cur_xfer_mode = get_xfer_mode(spi);
> > +       if (mas->cur_xfer_mode == GENI_SE_FIFO)
> 
> nit: check against != GPI mode?
Dmitry Baryshkov Feb. 4, 2021, 9:34 p.m. UTC | #7
On 11/01/2021 18:16, Vinod Koul wrote:
> We can use GPI DMA for devices where it is enabled by firmware. Add
> support for this mode
> 
> Signed-off-by: Vinod Koul <vkoul@kernel.org>
> ---
>   drivers/spi/spi-geni-qcom.c | 395 +++++++++++++++++++++++++++++++++++-
>   1 file changed, 384 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
> index 512e925d5ea4..5bb0e2192734 100644
> --- a/drivers/spi/spi-geni-qcom.c
> +++ b/drivers/spi/spi-geni-qcom.c

[skipped]

> @@ -328,8 +609,34 @@ static int spi_geni_init(struct spi_geni_master *mas)
>   	spi_tx_cfg &= ~CS_TOGGLE;
>   	writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG);
>   
> +	mas->tx = dma_request_slave_channel(mas->dev, "tx");
> +	if (IS_ERR_OR_NULL(mas->tx)) {

dma_request_slave_channel() is deprecated. Also it does not return an 
actualy error, it always returns NULL. So the error path here is bugged.
Judging from the rest of the driver, it might be logical to select the 
transfer mode at the probe time, then it would be possible to skip 
checking for DMA channels if FIFO mode is selected.

> +		dev_err(mas->dev, "Failed to get tx DMA ch %ld", PTR_ERR(mas->tx));
> +		ret = PTR_ERR(mas->tx);
> +		goto out_pm;
> +	} else {
> +		mas->rx = dma_request_slave_channel(mas->dev, "rx");
> +		if (IS_ERR_OR_NULL(mas->rx)) {
> +			dev_err(mas->dev, "Failed to get rx DMA ch %ld", PTR_ERR(mas->rx));
> +			dma_release_channel(mas->tx);
> +			ret = PTR_ERR(mas->rx);
> +			goto out_pm;
> +		}
> +
> +		gsi_sz = sizeof(struct spi_geni_gsi) * NUM_SPI_XFER;
> +		mas->gsi = devm_kzalloc(mas->dev, gsi_sz, GFP_KERNEL);
> +		if (IS_ERR_OR_NULL(mas->gsi)) {
> +			dma_release_channel(mas->tx);
> +			dma_release_channel(mas->rx);
> +			mas->tx = NULL;
> +			mas->rx = NULL;
> +			goto out_pm;
> +		}
> +	}
> +
> +out_pm:
>   	pm_runtime_put(mas->dev);
> -	return 0;
> +	return ret;
>   }
>   
>   static unsigned int geni_byte_per_fifo_word(struct spi_geni_master *mas)
> @@ -420,6 +727,7 @@ static void setup_fifo_xfer(struct spi_transfer *xfer,
>   {
>   	u32 m_cmd = 0;
>   	u32 len;
> +	u32 m_param = 0;
>   	struct geni_se *se = &mas->se;
>   	int ret;
>   
> @@ -457,6 +765,11 @@ static void setup_fifo_xfer(struct spi_transfer *xfer,
>   		len = xfer->len / (mas->cur_bits_per_word / BITS_PER_BYTE + 1);
>   	len &= TRANS_LEN_MSK;
>   
> +	if (!xfer->cs_change) {
> +		if (!list_is_last(&xfer->transfer_list, &spi->cur_msg->transfers))
> +			m_param |= FRAGMENTATION;
> +	}
> +
>   	mas->cur_xfer = xfer;
>   	if (xfer->tx_buf) {
>   		m_cmd |= SPI_TX_ONLY;
> @@ -475,7 +788,7 @@ static void setup_fifo_xfer(struct spi_transfer *xfer,
>   	 * interrupt could come in at any time now.
>   	 */
>   	spin_lock_irq(&mas->lock);
> -	geni_se_setup_m_cmd(se, m_cmd, FRAGMENTATION);
> +	geni_se_setup_m_cmd(se, m_cmd, m_param);
>   
>   	/*
>   	 * TX_WATERMARK_REG should be set after SPI configuration and
> @@ -494,13 +807,52 @@ static int spi_geni_transfer_one(struct spi_master *spi,
>   				struct spi_transfer *xfer)
>   {
>   	struct spi_geni_master *mas = spi_master_get_devdata(spi);
> +	unsigned long timeout, jiffies;
> +	int ret = 0i, i;
>   
>   	/* Terminate and return success for 0 byte length transfer */
>   	if (!xfer->len)
> -		return 0;
> +		return ret;
> +
> +	if (mas->cur_xfer_mode == GENI_SE_FIFO) {
> +		setup_fifo_xfer(xfer, mas, slv->mode, spi);
> +	} else {
> +		setup_gsi_xfer(xfer, mas, slv, spi);
> +		if (mas->num_xfers >= NUM_SPI_XFER ||
> +		    (list_is_last(&xfer->transfer_list, &spi->cur_msg->transfers))) {
> +			for (i = 0 ; i < mas->num_tx_eot; i++) {
> +				jiffies = msecs_to_jiffies(SPI_XFER_TIMEOUT_MS);
> +				timeout = wait_for_completion_timeout(&mas->tx_cb, jiffies);
> +				if (timeout <= 0) {
> +					dev_err(mas->dev, "Tx[%d] timeout%lu\n", i, timeout);
> +					ret = -ETIMEDOUT;
> +					goto err_gsi_geni_transfer_one;
> +				}
> +				spi_finalize_current_transfer(spi);
> +			}
> +			for (i = 0 ; i < mas->num_rx_eot; i++) {
> +				jiffies = msecs_to_jiffies(SPI_XFER_TIMEOUT_MS);
> +				timeout = wait_for_completion_timeout(&mas->tx_cb, jiffies);
> +				if (timeout <= 0) {
> +					dev_err(mas->dev, "Rx[%d] timeout%lu\n", i, timeout);
> +					ret = -ETIMEDOUT;
> +					goto err_gsi_geni_transfer_one;
> +				}
> +				spi_finalize_current_transfer(spi);
> +			}
> +			if (mas->qn_err) {
> +				ret = -EIO;
> +				mas->qn_err = false;
> +				goto err_gsi_geni_transfer_one;
> +			}
> +		}
> +	}
>   
> -	setup_fifo_xfer(xfer, mas, slv->mode, spi);
> -	return 1;
> +	return ret;
> +
> +err_gsi_geni_transfer_one:
> +	dmaengine_terminate_all(mas->tx);
> +	return ret;
>   }
>   
>   static irqreturn_t geni_spi_isr(int irq, void *data)
> @@ -595,6 +947,15 @@ static int spi_geni_probe(struct platform_device *pdev)
>   	if (irq < 0)
>   		return irq;
>   
> +	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
> +	if (ret) {
> +		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
> +		if (ret) {
> +			dev_err(&pdev->dev, "could not set DMA mask\n");
> +			return ret;
> +		}
> +	}
> +
>   	base = devm_platform_ioremap_resource(pdev, 0);
>   	if (IS_ERR(base))
>   		return PTR_ERR(base);
> @@ -632,15 +993,18 @@ static int spi_geni_probe(struct platform_device *pdev)
>   	spi->num_chipselect = 4;
>   	spi->max_speed_hz = 50000000;
>   	spi->prepare_message = spi_geni_prepare_message;
> +	spi->unprepare_message = spi_geni_unprepare_message;
>   	spi->transfer_one = spi_geni_transfer_one;
>   	spi->auto_runtime_pm = true;
>   	spi->handle_err = handle_fifo_timeout;
> -	spi->set_cs = spi_geni_set_cs;
>   	spi->use_gpio_descriptors = true;
>   
>   	init_completion(&mas->cs_done);
>   	init_completion(&mas->cancel_done);
>   	init_completion(&mas->abort_done);
> +	init_completion(&mas->xfer_done);
> +	init_completion(&mas->tx_cb);
> +	init_completion(&mas->rx_cb);
>   	spin_lock_init(&mas->lock);
>   	pm_runtime_use_autosuspend(&pdev->dev);
>   	pm_runtime_set_autosuspend_delay(&pdev->dev, 250);
> @@ -661,6 +1025,15 @@ static int spi_geni_probe(struct platform_device *pdev)
>   	if (ret)
>   		goto spi_geni_probe_runtime_disable;
>   
> +	/*
> +	 * query the mode supported and set_cs for fifo mode only
> +	 * for dma (gsi) mode, the gsi will set cs based on params passed in
> +	 * TRE
> +	 */
> +	mas->cur_xfer_mode = get_xfer_mode(spi);
> +	if (mas->cur_xfer_mode == GENI_SE_FIFO)
> +		spi->set_cs = spi_geni_set_cs;
> +
>   	ret = request_irq(mas->irq, geni_spi_isr, 0, dev_name(dev), spi);
>   	if (ret)
>   		goto spi_geni_probe_runtime_disable;
>
Vinod Koul June 16, 2021, 8:50 a.m. UTC | #8
Hi Mark,

On 11-01-21, 16:35, Mark Brown wrote:
> On Mon, Jan 11, 2021 at 08:46:48PM +0530, Vinod Koul wrote:
> 
> > +static int get_xfer_mode(struct spi_master *spi)
> > +{
> > +	struct spi_geni_master *mas = spi_master_get_devdata(spi);
> > +	struct geni_se *se = &mas->se;
> > +	int mode = GENI_SE_FIFO;
> 
> Why not use the core DMA mapping support?

Sorry I seemed to have missed replying to this one.

Looking at the code, that is ideal case. Only issue I can see is that
core DMA mapping device being used is incorrect. The core would use
ctlr->dev.parent which is the spi0 device here.

But in this case, that wont work. We have a parent qup device which is
the parent for both spi and dma device and needs to be used for
dma-mapping! 

If we allow drivers to set dma mapping device and use that, then I can
reuse the core. Let me know if that is agreeable to you and I can hack
this up. Maybe add a new member in spi_controller which is filled by
drivers in can_dma() callback?

Thanks
Mark Brown June 16, 2021, 11:35 a.m. UTC | #9
On Wed, Jun 16, 2021 at 02:20:45PM +0530, Vinod Koul wrote:

> Looking at the code, that is ideal case. Only issue I can see is that
> core DMA mapping device being used is incorrect. The core would use
> ctlr->dev.parent which is the spi0 device here.

Why would the parent of the controller be a SPI device?

> But in this case, that wont work. We have a parent qup device which is
> the parent for both spi and dma device and needs to be used for
> dma-mapping! 

> If we allow drivers to set dma mapping device and use that, then I can
> reuse the core. Let me know if that is agreeable to you and I can hack
> this up. Maybe add a new member in spi_controller which is filled by
> drivers in can_dma() callback?

Possibly, I'd need to see the code.
Vinod Koul June 16, 2021, 12:02 p.m. UTC | #10
On 16-06-21, 12:35, Mark Brown wrote:
> On Wed, Jun 16, 2021 at 02:20:45PM +0530, Vinod Koul wrote:
> 
> > Looking at the code, that is ideal case. Only issue I can see is that
> > core DMA mapping device being used is incorrect. The core would use
> > ctlr->dev.parent which is the spi0 device here.
> 
> Why would the parent of the controller be a SPI device?

Sorry my bad, I meant the core use ctlr->dev.parent which in this case is
the SPI master device, 880000.spi

> > But in this case, that wont work. We have a parent qup device which is
> > the parent for both spi and dma device and needs to be used for
> > dma-mapping! 
> 
> > If we allow drivers to set dma mapping device and use that, then I can
> > reuse the core. Let me know if that is agreeable to you and I can hack
> > this up. Maybe add a new member in spi_controller which is filled by
> > drivers in can_dma() callback?
> 
> Possibly, I'd need to see the code.

Ok, let me do a prototype and share ...

Thanks
Vinod Koul June 17, 2021, 6:20 a.m. UTC | #11
On 16-06-21, 17:32, Vinod Koul wrote:
> On 16-06-21, 12:35, Mark Brown wrote:
> > On Wed, Jun 16, 2021 at 02:20:45PM +0530, Vinod Koul wrote:
> > > But in this case, that wont work. We have a parent qup device which is
> > > the parent for both spi and dma device and needs to be used for
> > > dma-mapping! 
> > 
> > > If we allow drivers to set dma mapping device and use that, then I can
> > > reuse the core. Let me know if that is agreeable to you and I can hack
> > > this up. Maybe add a new member in spi_controller which is filled by
> > > drivers in can_dma() callback?
> > 
> > Possibly, I'd need to see the code.
> 
> Ok, let me do a prototype and share ...

So setting the dma_map_dev in the can_dma() callback does not work as we
would need device before we invoke can_dma(), so modified this to be set
earlier by driver (in driver probe, set the dma_map_dev) and use in
__spi_map_msg().

With this it works for me & I was able to get rid of driver mapping code

-- >8 --

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index e353b7a9e54e..315f7e7545f7 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -961,11 +961,15 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
 
 	if (ctlr->dma_tx)
 		tx_dev = ctlr->dma_tx->device->dev;
+	else if (ctlr->dma_map_dev)
+		tx_dev = ctlr->dma_map_dev;
 	else
 		tx_dev = ctlr->dev.parent;
 
 	if (ctlr->dma_rx)
 		rx_dev = ctlr->dma_rx->device->dev;
+	else if (ctlr->dma_map_dev)
+		rx_dev = ctlr->dma_map_dev;
 	else
 		rx_dev = ctlr->dev.parent;
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 74239d65c7fd..4d3f116f5723 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -586,6 +586,7 @@ struct spi_controller {
 	bool			(*can_dma)(struct spi_controller *ctlr,
 					   struct spi_device *spi,
 					   struct spi_transfer *xfer);
+	struct device *dma_map_dev;
 
 	/*
 	 * These hooks are for drivers that want to use the generic
diff mbox series

Patch

diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 512e925d5ea4..5bb0e2192734 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -2,6 +2,8 @@ 
 // Copyright (c) 2017-2018, The Linux foundation. All rights reserved.
 
 #include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/log2.h>
@@ -10,6 +12,7 @@ 
 #include <linux/pm_opp.h>
 #include <linux/pm_runtime.h>
 #include <linux/qcom-geni-se.h>
+#include <linux/dma/qcom-gpi-dma.h>
 #include <linux/spi/spi.h>
 #include <linux/spinlock.h>
 
@@ -63,6 +66,35 @@ 
 #define TIMESTAMP_AFTER		BIT(3)
 #define POST_CMD_DELAY		BIT(4)
 
+#define GSI_LOOPBACK_EN		(BIT(0))
+#define GSI_CS_TOGGLE		(BIT(3))
+#define GSI_CPHA		(BIT(4))
+#define GSI_CPOL		(BIT(5))
+
+#define MAX_TX_SG		(3)
+#define NUM_SPI_XFER		(8)
+#define SPI_XFER_TIMEOUT_MS	(250)
+
+struct gsi_desc_cb {
+	struct spi_geni_master *mas;
+	struct spi_transfer *xfer;
+};
+
+struct spi_geni_gsi {
+	dma_cookie_t tx_cookie;
+	dma_cookie_t rx_cookie;
+	struct dma_async_tx_descriptor *tx_desc;
+	struct dma_async_tx_descriptor *rx_desc;
+	struct gsi_desc_cb desc_cb;
+};
+
+enum spi_m_cmd_opcode {
+	CMD_NONE,
+	CMD_XFER,
+	CMD_CS,
+	CMD_CANCEL,
+};
+
 struct spi_geni_master {
 	struct geni_se se;
 	struct device *dev;
@@ -79,10 +111,21 @@  struct spi_geni_master {
 	struct completion cs_done;
 	struct completion cancel_done;
 	struct completion abort_done;
+	struct completion xfer_done;
 	unsigned int oversampling;
 	spinlock_t lock;
 	int irq;
 	bool cs_flag;
+	struct spi_geni_gsi *gsi;
+	struct dma_chan *tx;
+	struct dma_chan *rx;
+	struct completion tx_cb;
+	struct completion rx_cb;
+	bool qn_err;
+	int cur_xfer_mode;
+	int num_tx_eot;
+	int num_rx_eot;
+	int num_xfers;
 };
 
 static int get_spi_clk_cfg(unsigned int speed_hz,
@@ -274,31 +317,269 @@  static int setup_fifo_params(struct spi_device *spi_slv,
 	return geni_spi_set_clock_and_bw(mas, spi_slv->max_speed_hz);
 }
 
+static int get_xfer_mode(struct spi_master *spi)
+{
+	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+	struct geni_se *se = &mas->se;
+	int mode = GENI_SE_FIFO;
+	int fifo_disable;
+	bool dma_chan_valid;
+
+	fifo_disable = readl(se->base + GENI_IF_DISABLE_RO) & FIFO_IF_DISABLE;
+	dma_chan_valid = !(IS_ERR_OR_NULL(mas->tx) || IS_ERR_OR_NULL(mas->rx));
+
+	/*
+	 * If FIFO Interface is disabled and there are no DMA channels then we
+	 * can't do this transfer.
+	 * If FIFO interface is disabled, we can do GSI only,
+	 * else pick FIFO mode.
+	 */
+	if (fifo_disable && !dma_chan_valid) {
+		dev_err(mas->dev, "Fifo and dma mode disabled!! can't xfer\n");
+		mode = -EINVAL;
+	} else if (fifo_disable) {
+		mode = GENI_GPI_DMA;
+	} else {
+		mode = GENI_SE_FIFO;
+	}
+
+	return mode;
+}
+
+static void
+spi_gsi_callback_result(void *cb, const struct dmaengine_result *result, bool tx)
+{
+	struct gsi_desc_cb *gsi = cb;
+
+	if (result->result != DMA_TRANS_NOERROR) {
+		dev_err(gsi->mas->dev, "%s: DMA %s txn failed\n", __func__, tx ? "tx" : "rx");
+		return;
+	}
+
+	if (!result->residue) {
+		dev_dbg(gsi->mas->dev, "%s\n", __func__);
+		if (tx)
+			complete(&gsi->mas->tx_cb);
+		else
+			complete(&gsi->mas->rx_cb);
+	} else {
+		dev_err(gsi->mas->dev, "DMA xfer has pending: %d\n", result->residue);
+	}
+}
+
+static void
+spi_gsi_rx_callback_result(void *cb, const struct dmaengine_result *result)
+{
+	spi_gsi_callback_result(cb, result, false);
+}
+
+static void
+spi_gsi_tx_callback_result(void *cb, const struct dmaengine_result *result)
+{
+	spi_gsi_callback_result(cb, result, true);
+}
+
+static int setup_gsi_xfer(struct spi_transfer *xfer, struct spi_geni_master *mas,
+			  struct spi_device *spi_slv, struct spi_master *spi)
+{
+	int ret = 0;
+	unsigned long flags = DMA_PREP_INTERRUPT | DMA_CTRL_ACK;
+	struct spi_geni_gsi *gsi;
+	struct dma_slave_config config;
+	struct gpi_spi_config peripheral;
+
+	memset(&config, 0, sizeof(config));
+	memset(&peripheral, 0, sizeof(peripheral));
+	config.peripheral_config = &peripheral;
+	config.peripheral_size = sizeof(peripheral);
+
+	if (xfer->bits_per_word != mas->cur_bits_per_word ||
+	    xfer->speed_hz != mas->cur_speed_hz) {
+		mas->cur_bits_per_word = xfer->bits_per_word;
+		mas->cur_speed_hz = xfer->speed_hz;
+		peripheral.set_config = true;
+	}
+
+	if (!(mas->cur_bits_per_word % MIN_WORD_LEN)) {
+		peripheral.rx_len = ((xfer->len << 3) / mas->cur_bits_per_word);
+	} else {
+		int bytes_per_word = (mas->cur_bits_per_word / BITS_PER_BYTE) + 1;
+
+		peripheral.rx_len = (xfer->len / bytes_per_word);
+	}
+
+	if (xfer->tx_buf && xfer->rx_buf) {
+		peripheral.cmd = SPI_DUPLEX;
+	} else if (xfer->tx_buf) {
+		peripheral.cmd = SPI_TX;
+		peripheral.rx_len = 0;
+	} else if (xfer->rx_buf) {
+		peripheral.cmd = SPI_RX;
+	}
+
+	peripheral.cs = spi_slv->chip_select;
+
+	if (spi_slv->mode & SPI_LOOP)
+		peripheral.loopback_en = true;
+	if (spi_slv->mode & SPI_CPOL)
+		peripheral.clock_pol_high = true;
+	if (spi_slv->mode & SPI_CPHA)
+		peripheral.data_pol_high = true;
+	peripheral.pack_en = true;
+	peripheral.word_len = xfer->bits_per_word - MIN_WORD_LEN;
+	ret = get_spi_clk_cfg(mas->cur_speed_hz, mas,
+			      &peripheral.clk_src, &peripheral.clk_div);
+	if (ret) {
+		dev_err(mas->dev, "%s:Err setting clks:%d\n", __func__, ret);
+		return ret;
+	}
+
+	if (!xfer->cs_change) {
+		if (!list_is_last(&xfer->transfer_list, &spi->cur_msg->transfers))
+			peripheral.fragmentation = FRAGMENTATION;
+	}
+
+	gsi = &mas->gsi[mas->num_xfers];
+	gsi->desc_cb.mas = mas;
+	gsi->desc_cb.xfer = xfer;
+	if (peripheral.cmd & SPI_RX) {
+		dmaengine_slave_config(mas->rx, &config);
+		gsi->rx_desc = dmaengine_prep_slave_single(mas->rx, xfer->rx_dma,
+							   xfer->len, DMA_DEV_TO_MEM, flags);
+		if (IS_ERR_OR_NULL(gsi->rx_desc)) {
+			dev_err(mas->dev, "Err setting up rx desc\n");
+			return -EIO;
+		}
+		gsi->rx_desc->callback_result = spi_gsi_rx_callback_result;
+		gsi->rx_desc->callback_param = &gsi->desc_cb;
+		mas->num_rx_eot++;
+	}
+
+	if (peripheral.cmd & SPI_TX_ONLY)
+		mas->num_tx_eot++;
+
+	dmaengine_slave_config(mas->tx, &config);
+	gsi->tx_desc = dmaengine_prep_slave_single(mas->tx, xfer->tx_dma,
+						   xfer->len, DMA_MEM_TO_DEV, flags);
+
+	if (IS_ERR_OR_NULL(gsi->tx_desc)) {
+		dev_err(mas->dev, "Err setting up tx desc\n");
+		return -EIO;
+	}
+	gsi->tx_desc->callback_result = spi_gsi_tx_callback_result;
+	gsi->tx_desc->callback_param = &gsi->desc_cb;
+	if (peripheral.cmd & SPI_RX)
+		gsi->rx_cookie = dmaengine_submit(gsi->rx_desc);
+	gsi->tx_cookie = dmaengine_submit(gsi->tx_desc);
+	if (peripheral.cmd & SPI_RX)
+		dma_async_issue_pending(mas->rx);
+	dma_async_issue_pending(mas->tx);
+	mas->num_xfers++;
+	return ret;
+}
+
+static int spi_geni_map_buf(struct spi_geni_master *mas, struct spi_message *msg)
+{
+	struct spi_transfer *xfer;
+	struct device *gsi_dev = mas->dev->parent;
+
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		if (xfer->rx_buf) {
+			xfer->rx_dma = dma_map_single(gsi_dev, xfer->rx_buf,
+						      xfer->len, DMA_FROM_DEVICE);
+			if (dma_mapping_error(mas->dev, xfer->rx_dma)) {
+				dev_err(mas->dev, "Err mapping buf\n");
+				return -ENOMEM;
+			}
+		}
+
+		if (xfer->tx_buf) {
+			xfer->tx_dma = dma_map_single(gsi_dev, (void *)xfer->tx_buf,
+						      xfer->len, DMA_TO_DEVICE);
+			if (dma_mapping_error(gsi_dev, xfer->tx_dma)) {
+				dev_err(mas->dev, "Err mapping buf\n");
+				dma_unmap_single(gsi_dev, xfer->rx_dma, xfer->len, DMA_FROM_DEVICE);
+				return -ENOMEM;
+			}
+		}
+	};
+
+	return 0;
+}
+
+static void spi_geni_unmap_buf(struct spi_geni_master *mas, struct spi_message *msg)
+{
+	struct spi_transfer *xfer;
+	struct device *gsi_dev = mas->dev;
+
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		if (xfer->rx_buf)
+			dma_unmap_single(gsi_dev, xfer->rx_dma, xfer->len, DMA_FROM_DEVICE);
+		if (xfer->tx_buf)
+			dma_unmap_single(gsi_dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE);
+	};
+}
+
 static int spi_geni_prepare_message(struct spi_master *spi,
 					struct spi_message *spi_msg)
 {
 	int ret;
 	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+	struct geni_se *se = &mas->se;
+
+	mas->cur_xfer_mode = get_xfer_mode(spi);
+
+	if (mas->cur_xfer_mode == GENI_SE_FIFO) {
+		geni_se_select_mode(se, GENI_SE_FIFO);
+		reinit_completion(&mas->xfer_done);
+		ret = setup_fifo_params(spi_msg->spi, spi);
+		if (ret)
+			dev_err(mas->dev, "Couldn't select mode %d\n", ret);
+
+	} else if (mas->cur_xfer_mode == GENI_GPI_DMA) {
+		mas->num_tx_eot = 0;
+		mas->num_rx_eot = 0;
+		mas->num_xfers = 0;
+		reinit_completion(&mas->tx_cb);
+		reinit_completion(&mas->rx_cb);
+		memset(mas->gsi, 0, (sizeof(struct spi_geni_gsi) * NUM_SPI_XFER));
+		geni_se_select_mode(se, GENI_GPI_DMA);
+		ret = spi_geni_map_buf(mas, spi_msg);
+
+	} else {
+		dev_err(mas->dev, "%s: Couldn't select mode %d", __func__, mas->cur_xfer_mode);
+		ret = -EINVAL;
+	}
 
-	ret = setup_fifo_params(spi_msg->spi, spi);
-	if (ret)
-		dev_err(mas->dev, "Couldn't select mode %d\n", ret);
 	return ret;
 }
 
+static int spi_geni_unprepare_message(struct spi_master *spi_mas, struct spi_message *spi_msg)
+{
+	struct spi_geni_master *mas = spi_master_get_devdata(spi_mas);
+
+	mas->cur_speed_hz = 0;
+	mas->cur_bits_per_word = 0;
+	if (mas->cur_xfer_mode == GENI_GPI_DMA)
+		spi_geni_unmap_buf(mas, spi_msg);
+	return 0;
+}
+
 static int spi_geni_init(struct spi_geni_master *mas)
 {
 	struct geni_se *se = &mas->se;
 	unsigned int proto, major, minor, ver;
 	u32 spi_tx_cfg;
+	size_t gsi_sz;
+	int ret = 0;
 
 	pm_runtime_get_sync(mas->dev);
 
 	proto = geni_se_read_proto(se);
 	if (proto != GENI_SE_SPI) {
 		dev_err(mas->dev, "Invalid proto %d\n", proto);
-		pm_runtime_put(mas->dev);
-		return -ENXIO;
+		ret = -ENXIO;
+		goto out_pm;
 	}
 	mas->tx_fifo_depth = geni_se_get_tx_fifo_depth(se);
 
@@ -328,8 +609,34 @@  static int spi_geni_init(struct spi_geni_master *mas)
 	spi_tx_cfg &= ~CS_TOGGLE;
 	writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG);
 
+	mas->tx = dma_request_slave_channel(mas->dev, "tx");
+	if (IS_ERR_OR_NULL(mas->tx)) {
+		dev_err(mas->dev, "Failed to get tx DMA ch %ld", PTR_ERR(mas->tx));
+		ret = PTR_ERR(mas->tx);
+		goto out_pm;
+	} else {
+		mas->rx = dma_request_slave_channel(mas->dev, "rx");
+		if (IS_ERR_OR_NULL(mas->rx)) {
+			dev_err(mas->dev, "Failed to get rx DMA ch %ld", PTR_ERR(mas->rx));
+			dma_release_channel(mas->tx);
+			ret = PTR_ERR(mas->rx);
+			goto out_pm;
+		}
+
+		gsi_sz = sizeof(struct spi_geni_gsi) * NUM_SPI_XFER;
+		mas->gsi = devm_kzalloc(mas->dev, gsi_sz, GFP_KERNEL);
+		if (IS_ERR_OR_NULL(mas->gsi)) {
+			dma_release_channel(mas->tx);
+			dma_release_channel(mas->rx);
+			mas->tx = NULL;
+			mas->rx = NULL;
+			goto out_pm;
+		}
+	}
+
+out_pm:
 	pm_runtime_put(mas->dev);
-	return 0;
+	return ret;
 }
 
 static unsigned int geni_byte_per_fifo_word(struct spi_geni_master *mas)
@@ -420,6 +727,7 @@  static void setup_fifo_xfer(struct spi_transfer *xfer,
 {
 	u32 m_cmd = 0;
 	u32 len;
+	u32 m_param = 0;
 	struct geni_se *se = &mas->se;
 	int ret;
 
@@ -457,6 +765,11 @@  static void setup_fifo_xfer(struct spi_transfer *xfer,
 		len = xfer->len / (mas->cur_bits_per_word / BITS_PER_BYTE + 1);
 	len &= TRANS_LEN_MSK;
 
+	if (!xfer->cs_change) {
+		if (!list_is_last(&xfer->transfer_list, &spi->cur_msg->transfers))
+			m_param |= FRAGMENTATION;
+	}
+
 	mas->cur_xfer = xfer;
 	if (xfer->tx_buf) {
 		m_cmd |= SPI_TX_ONLY;
@@ -475,7 +788,7 @@  static void setup_fifo_xfer(struct spi_transfer *xfer,
 	 * interrupt could come in at any time now.
 	 */
 	spin_lock_irq(&mas->lock);
-	geni_se_setup_m_cmd(se, m_cmd, FRAGMENTATION);
+	geni_se_setup_m_cmd(se, m_cmd, m_param);
 
 	/*
 	 * TX_WATERMARK_REG should be set after SPI configuration and
@@ -494,13 +807,52 @@  static int spi_geni_transfer_one(struct spi_master *spi,
 				struct spi_transfer *xfer)
 {
 	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+	unsigned long timeout, jiffies;
+	int ret = 0i, i;
 
 	/* Terminate and return success for 0 byte length transfer */
 	if (!xfer->len)
-		return 0;
+		return ret;
+
+	if (mas->cur_xfer_mode == GENI_SE_FIFO) {
+		setup_fifo_xfer(xfer, mas, slv->mode, spi);
+	} else {
+		setup_gsi_xfer(xfer, mas, slv, spi);
+		if (mas->num_xfers >= NUM_SPI_XFER ||
+		    (list_is_last(&xfer->transfer_list, &spi->cur_msg->transfers))) {
+			for (i = 0 ; i < mas->num_tx_eot; i++) {
+				jiffies = msecs_to_jiffies(SPI_XFER_TIMEOUT_MS);
+				timeout = wait_for_completion_timeout(&mas->tx_cb, jiffies);
+				if (timeout <= 0) {
+					dev_err(mas->dev, "Tx[%d] timeout%lu\n", i, timeout);
+					ret = -ETIMEDOUT;
+					goto err_gsi_geni_transfer_one;
+				}
+				spi_finalize_current_transfer(spi);
+			}
+			for (i = 0 ; i < mas->num_rx_eot; i++) {
+				jiffies = msecs_to_jiffies(SPI_XFER_TIMEOUT_MS);
+				timeout = wait_for_completion_timeout(&mas->tx_cb, jiffies);
+				if (timeout <= 0) {
+					dev_err(mas->dev, "Rx[%d] timeout%lu\n", i, timeout);
+					ret = -ETIMEDOUT;
+					goto err_gsi_geni_transfer_one;
+				}
+				spi_finalize_current_transfer(spi);
+			}
+			if (mas->qn_err) {
+				ret = -EIO;
+				mas->qn_err = false;
+				goto err_gsi_geni_transfer_one;
+			}
+		}
+	}
 
-	setup_fifo_xfer(xfer, mas, slv->mode, spi);
-	return 1;
+	return ret;
+
+err_gsi_geni_transfer_one:
+	dmaengine_terminate_all(mas->tx);
+	return ret;
 }
 
 static irqreturn_t geni_spi_isr(int irq, void *data)
@@ -595,6 +947,15 @@  static int spi_geni_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret) {
+		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (ret) {
+			dev_err(&pdev->dev, "could not set DMA mask\n");
+			return ret;
+		}
+	}
+
 	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
@@ -632,15 +993,18 @@  static int spi_geni_probe(struct platform_device *pdev)
 	spi->num_chipselect = 4;
 	spi->max_speed_hz = 50000000;
 	spi->prepare_message = spi_geni_prepare_message;
+	spi->unprepare_message = spi_geni_unprepare_message;
 	spi->transfer_one = spi_geni_transfer_one;
 	spi->auto_runtime_pm = true;
 	spi->handle_err = handle_fifo_timeout;
-	spi->set_cs = spi_geni_set_cs;
 	spi->use_gpio_descriptors = true;
 
 	init_completion(&mas->cs_done);
 	init_completion(&mas->cancel_done);
 	init_completion(&mas->abort_done);
+	init_completion(&mas->xfer_done);
+	init_completion(&mas->tx_cb);
+	init_completion(&mas->rx_cb);
 	spin_lock_init(&mas->lock);
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_set_autosuspend_delay(&pdev->dev, 250);
@@ -661,6 +1025,15 @@  static int spi_geni_probe(struct platform_device *pdev)
 	if (ret)
 		goto spi_geni_probe_runtime_disable;
 
+	/*
+	 * query the mode supported and set_cs for fifo mode only
+	 * for dma (gsi) mode, the gsi will set cs based on params passed in
+	 * TRE
+	 */
+	mas->cur_xfer_mode = get_xfer_mode(spi);
+	if (mas->cur_xfer_mode == GENI_SE_FIFO)
+		spi->set_cs = spi_geni_set_cs;
+
 	ret = request_irq(mas->irq, geni_spi_isr, 0, dev_name(dev), spi);
 	if (ret)
 		goto spi_geni_probe_runtime_disable;