Message ID | 1388789632-12238-1-git-send-email-Frank.Li@freescale.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Jan 3, 2014 at 4:53 PM, Frank Li <Frank.Li@freescale.com> wrote: > After enable DMA > > spi-nor read speed is > dd if=/dev/mtd0 of=/dev/null bs=1M count=1 > 1+0 records in > 1+0 records out > 1048576 bytes (1.0 MB) copied, 0.720402 s, 1.5 MB/s > > spi-nor write speed is > dd if=/dev/zero of=/dev/mtd0 bs=1M count=1 > 1+0 records in > 1+0 records out > 1048576 bytes (1.0 MB) copied, 3.56044 s, 295 kB/s > > Before enable DMA > > spi-nor read speed is > dd if=/dev/mtd0 of=/dev/null bs=1M count=1 > 1+0 records in > 1+0 records out > 1048576 bytes (1.0 MB) copied, 2.37717 s, 441 kB/s > > spi-nor write speed is > > dd if=/dev/zero of=/dev/mtd0 bs=1M count=1 > 1+0 records in > 1+0 records out > 1048576 bytes (1.0 MB) copied, 4.83181 s, 217 kB/s > > Signed-off-by: Frank Li <Frank.Li@freescale.com> ping! > --- > drivers/spi/spi-imx.c | 447 ++++++++++++++++++++++++++++++++++++++++++++++++- > 1 files changed, 441 insertions(+), 6 deletions(-) > > Change from v1. > 1. check if res is null at res = platform_get_resource(pdev, IORESOURCE_MEM, 0) > 2. fix failure transfer when len is not multiple watermark > > diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c > index b80f2f7..1a9099a 100644 > --- a/drivers/spi/spi-imx.c > +++ b/drivers/spi/spi-imx.c > @@ -39,6 +39,10 @@ > #include <linux/of_gpio.h> > > #include <linux/platform_data/spi-imx.h> > +#include <linux/dma-mapping.h> > +#include <linux/platform_data/dma-imx.h> > +#include <linux/dmaengine.h> > + > > #define DRIVER_NAME "spi_imx" > > @@ -52,6 +56,9 @@ > #define MXC_INT_RR (1 << 0) /* Receive data ready interrupt */ > #define MXC_INT_TE (1 << 1) /* Transmit FIFO empty interrupt */ > > +/* The maximum bytes that a sdma BD can transfer.*/ > +#define MAX_SDMA_BD_BYTES (1 << 15) > + > struct spi_imx_config { > unsigned int speed_hz; > unsigned int bpw; > @@ -84,6 +91,7 @@ struct spi_imx_data { > > struct completion xfer_done; > void __iomem *base; > + resource_size_t mapbase; > int irq; > struct clk *clk_per; > struct clk *clk_ipg; > @@ -92,6 +100,29 @@ struct spi_imx_data { > unsigned int count; > void (*tx)(struct spi_imx_data *); > void (*rx)(struct spi_imx_data *); > + int (*txrx_bufs)(struct spi_device *spi, struct spi_transfer *t); > + struct dma_chan *dma_chan_rx, *dma_chan_tx; > + unsigned int dma_is_inited; > + struct device *dev; > + > + struct completion dma_rx_completion; > + struct completion dma_tx_completion; > + > + u8 *dma_rx_tmpbuf; > + unsigned int dma_rx_tmpbuf_size; > + unsigned int dma_rx_tmpbuf_phy_addr; > + > + u8 *dma_tx_tmpbuf; > + unsigned int dma_tx_tmpbuf_size; > + unsigned int dma_tx_tmpbuf_phy_addr; > + > + unsigned int usedma; > + unsigned int dma_finished; > + /* SDMA wartermark */ > + u32 rx_wml; > + u32 tx_wml; > + u32 rxt_wml; > + > void *rx_buf; > const void *tx_buf; > unsigned int txfifo; /* number of words pushed in tx FIFO */ > @@ -185,6 +216,7 @@ static unsigned int spi_imx_clkdiv_2(unsigned int fin, > #define MX51_ECSPI_CTRL 0x08 > #define MX51_ECSPI_CTRL_ENABLE (1 << 0) > #define MX51_ECSPI_CTRL_XCH (1 << 2) > +#define MX51_ECSPI_CTRL_SMC (1 << 3) > #define MX51_ECSPI_CTRL_MODE_MASK (0xf << 4) > #define MX51_ECSPI_CTRL_POSTDIV_OFFSET 8 > #define MX51_ECSPI_CTRL_PREDIV_OFFSET 12 > @@ -202,9 +234,22 @@ static unsigned int spi_imx_clkdiv_2(unsigned int fin, > #define MX51_ECSPI_INT_TEEN (1 << 0) > #define MX51_ECSPI_INT_RREN (1 << 3) > > +#define MX51_ECSPI_DMA 0x14 > +#define MX51_ECSPI_DMA_TX_WML_OFFSET 0 > +#define MX51_ECSPI_DMA_TX_WML_MASK 0x3F > +#define MX51_ECSPI_DMA_RX_WML_OFFSET 16 > +#define MX51_ECSPI_DMA_RX_WML_MASK (0x3F << 16) > +#define MX51_ECSPI_DMA_RXT_WML_OFFSET 24 > +#define MX51_ECSPI_DMA_RXT_WML_MASK (0x3F << 16) > + > +#define MX51_ECSPI_DMA_TEDEN_OFFSET 7 > +#define MX51_ECSPI_DMA_RXDEN_OFFSET 23 > +#define MX51_ECSPI_DMA_RXTDEN_OFFSET 31 > + > #define MX51_ECSPI_STAT 0x18 > #define MX51_ECSPI_STAT_RR (1 << 3) > > +#define MX51_ECSPI_TESTREG 0x20 > /* MX51 eCSPI */ > static unsigned int mx51_ecspi_clkdiv(unsigned int fin, unsigned int fspi) > { > @@ -255,16 +300,28 @@ static void __maybe_unused mx51_ecspi_trigger(struct spi_imx_data *spi_imx) > { > u32 reg; > > - reg = readl(spi_imx->base + MX51_ECSPI_CTRL); > - reg |= MX51_ECSPI_CTRL_XCH; > - writel(reg, spi_imx->base + MX51_ECSPI_CTRL); > + if (!spi_imx->usedma) { > + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); > + reg |= MX51_ECSPI_CTRL_XCH; > + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); > + } else { > + if (!spi_imx->dma_finished) { > + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); > + reg |= MX51_ECSPI_CTRL_SMC; > + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); > + } else { > + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); > + reg &= (~MX51_ECSPI_CTRL_SMC); > + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); > + } > + } > } > > static int __maybe_unused mx51_ecspi_config(struct spi_imx_data *spi_imx, > struct spi_imx_config *config) > { > - u32 ctrl = MX51_ECSPI_CTRL_ENABLE, cfg = 0; > - > + u32 ctrl = MX51_ECSPI_CTRL_ENABLE, cfg = 0, dma = 0; > + u32 tx_wml_cfg, rx_wml_cfg, rxt_wml_cfg; > /* > * The hardware seems to have a race condition when changing modes. The > * current assumption is that the selection of the channel arrives > @@ -297,6 +354,30 @@ static int __maybe_unused mx51_ecspi_config(struct spi_imx_data *spi_imx, > writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL); > writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG); > > + /* > + * Configure the DMA register: setup the watermark > + * and enable DMA request. > + */ > + if (spi_imx->dma_is_inited) { > + dma = readl(spi_imx->base + MX51_ECSPI_DMA); > + > + spi_imx->tx_wml = spi_imx_get_fifosize(spi_imx) / 2; > + spi_imx->rx_wml = spi_imx_get_fifosize(spi_imx) / 2; > + spi_imx->rxt_wml = spi_imx_get_fifosize(spi_imx) / 2; > + rx_wml_cfg = spi_imx->rx_wml << MX51_ECSPI_DMA_RX_WML_OFFSET; > + tx_wml_cfg = spi_imx->tx_wml << MX51_ECSPI_DMA_TX_WML_OFFSET; > + rxt_wml_cfg = spi_imx->rxt_wml << MX51_ECSPI_DMA_RXT_WML_OFFSET; > + dma = (dma & (~MX51_ECSPI_DMA_TX_WML_MASK) > + & (~MX51_ECSPI_DMA_RX_WML_MASK) > + & (~MX51_ECSPI_DMA_RXT_WML_MASK)) > + | rx_wml_cfg | tx_wml_cfg | rxt_wml_cfg > + | (1 << MX51_ECSPI_DMA_TEDEN_OFFSET) > + | (1 << MX51_ECSPI_DMA_RXDEN_OFFSET) > + | (1 << MX51_ECSPI_DMA_RXTDEN_OFFSET); > + > + writel(dma, spi_imx->base + MX51_ECSPI_DMA); > + } > + > return 0; > } > > @@ -708,7 +789,285 @@ static int spi_imx_setupxfer(struct spi_device *spi, > return 0; > } > > -static int spi_imx_transfer(struct spi_device *spi, > +static void spi_imx_sdma_exit(struct spi_imx_data *spi_imx) > +{ > + if (spi_imx->dma_chan_rx) { > + dma_release_channel(spi_imx->dma_chan_rx); > + spi_imx->dma_chan_rx = NULL; > + } > + > + if (spi_imx->dma_chan_tx) { > + dma_release_channel(spi_imx->dma_chan_tx); > + spi_imx->dma_chan_tx = NULL; > + } > + > + spi_imx->dma_is_inited = 0; > +} > + > +static void spi_imx_dma_rx_callback(void *cookie) > +{ > + struct spi_imx_data *spi_imx = (struct spi_imx_data *)cookie; > + > + complete(&spi_imx->dma_rx_completion); > + > +} > + > +static void spi_imx_dma_tx_callback(void *cookie) > +{ > + struct spi_imx_data *spi_imx = (struct spi_imx_data *)cookie; > + > + complete(&spi_imx->dma_tx_completion); > +} > + > +static int spi_imx_sdma_transfer(struct spi_device *spi, > + struct spi_transfer *transfer) > +{ > + struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); > + int ret = 0; > + int sg_num; > + int loop; > + int left; > + u32 dma; > + > + struct scatterlist *sg_rx, *sg_tx; > + struct dma_async_tx_descriptor *txdesc; > + struct dma_async_tx_descriptor *rxdesc; > + > + init_completion(&spi_imx->dma_rx_completion); > + init_completion(&spi_imx->dma_tx_completion); > + > + /* > + * Get the valid physical address for the tx buf, if the rx buf address > + * is null or it cannot be mapped, we should allocate memory for it. > + */ > + if (virt_addr_valid(transfer->tx_buf)) { > + transfer->tx_dma = dma_map_single(spi_imx->dev, > + (void *)transfer->tx_buf, transfer->len, > + DMA_TO_DEVICE); > + if (dma_mapping_error(spi_imx->dev, transfer->tx_dma)) { > + dev_err(spi_imx->dev, > + "Memory dma map fail, line = %d\n", __LINE__); > + ret = -EFAULT; > + goto err_tx; > + } > + } else { > + if (transfer->len > spi_imx->dma_tx_tmpbuf_size) { > + if (!spi_imx->dma_tx_tmpbuf_size) { > + kfree(spi_imx->dma_tx_tmpbuf); > + spi_imx->dma_tx_tmpbuf_size = 0; > + } > + > + spi_imx->dma_tx_tmpbuf = > + kzalloc(transfer->len, GFP_KERNEL); > + if (NULL == spi_imx->dma_tx_tmpbuf) { > + dev_err(spi_imx->dev, "Alloc memory fail.\n"); > + ret = -EFAULT; > + goto err_tx; > + } > + spi_imx->dma_tx_tmpbuf_size = transfer->len; > + } > + > + /* > + * Move the transfered data to new buffer from the old one > + * that cannot be mapped. > + */ > + if (transfer->tx_buf) > + memcpy(spi_imx->dma_tx_tmpbuf, > + transfer->tx_buf, > + transfer->len); > + > + spi_imx->dma_tx_tmpbuf_phy_addr = dma_map_single(spi_imx->dev, > + spi_imx->dma_tx_tmpbuf, transfer->len, > + DMA_TO_DEVICE); > + > + if (dma_mapping_error(spi_imx->dev, > + spi_imx->dma_tx_tmpbuf_phy_addr)) { > + dev_err(spi_imx->dev, > + "Memory dma map fail, line = %d\n", > + __LINE__); > + ret = -EFAULT; > + goto err_tx; > + } > + > + transfer->tx_dma = spi_imx->dma_tx_tmpbuf_phy_addr; > + } > + > + /* Prepare sg for tx sdma. */ > + sg_num = ((transfer->len - 1) / MAX_SDMA_BD_BYTES) + 1; > + sg_tx = kzalloc(sg_num * sizeof(struct scatterlist), GFP_KERNEL); > + if (NULL == sg_tx) { > + dev_err(spi_imx->dev, > + "Memory allocate fail, line = %d\n", > + __LINE__); > + goto err_tx_sg; > + } > + sg_init_table(sg_tx, sg_num); > + for (loop = 0; loop < (sg_num - 1); loop++) { > + sg_dma_address(&sg_tx[loop]) = > + transfer->tx_dma + loop * MAX_SDMA_BD_BYTES; > + sg_dma_len(&sg_tx[loop]) = MAX_SDMA_BD_BYTES; > + } > + > + sg_dma_address(&sg_tx[loop]) = > + transfer->tx_dma + loop * MAX_SDMA_BD_BYTES; > + sg_dma_len(&sg_tx[loop]) = transfer->len - loop * MAX_SDMA_BD_BYTES; > + > + txdesc = dmaengine_prep_slave_sg(spi_imx->dma_chan_tx, > + sg_tx, sg_num , DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT); > + if (!txdesc) { > + ret = -EFAULT; > + goto err_rx; > + } > + > + txdesc->callback = spi_imx_dma_tx_callback; > + txdesc->callback_param = (void *)spi_imx; > + > + /* > + * Get the valid physical address for the rx buf, if the rx buf address > + * is null or it cannot be mapped, we should allocate memory for it. > + */ > + if (virt_addr_valid(transfer->rx_buf)) { > + transfer->rx_dma = dma_map_single(spi_imx->dev, > + transfer->rx_buf, transfer->len, > + DMA_FROM_DEVICE); > + > + if (dma_mapping_error(spi_imx->dev, transfer->rx_dma)) { > + dev_err(spi_imx->dev, > + "Memory allocate fail, line = %d\n", > + __LINE__); > + ret = -EFAULT; > + goto err_rx; > + } > + } else { > + if (transfer->len > spi_imx->dma_rx_tmpbuf_size) { > + if (!spi_imx->dma_rx_tmpbuf_size) { > + kfree(spi_imx->dma_rx_tmpbuf); > + spi_imx->dma_rx_tmpbuf_size = 0; > + } > + > + spi_imx->dma_rx_tmpbuf = > + kzalloc(transfer->len, GFP_KERNEL); > + if (NULL == spi_imx->dma_rx_tmpbuf) { > + dev_err(spi_imx->dev, "Alloc memory fail.\n"); > + ret = -EFAULT; > + goto err_rx; > + } > + spi_imx->dma_rx_tmpbuf_size = transfer->len; > + } > + > + spi_imx->dma_rx_tmpbuf_phy_addr = dma_map_single(spi_imx->dev, > + spi_imx->dma_rx_tmpbuf, transfer->len, > + DMA_FROM_DEVICE); > + > + if (dma_mapping_error(spi_imx->dev, > + spi_imx->dma_rx_tmpbuf_phy_addr)) { > + dev_err(spi_imx->dev, > + "Memory dma map fail, line = %d\n", > + __LINE__); > + ret = -EFAULT; > + goto err_rx; > + } > + > + transfer->rx_dma = spi_imx->dma_rx_tmpbuf_phy_addr; > + } > + > + /* Prepare sg for rx sdma. */ > + sg_num = ((transfer->len - 1) / MAX_SDMA_BD_BYTES) + 1; > + sg_rx = kzalloc(sg_num * sizeof(struct scatterlist), GFP_KERNEL); > + if (NULL == sg_rx) { > + dev_err(spi_imx->dev, > + "Memory dma map fail, line = %d\n", __LINE__); > + goto err_rx_sg; > + } > + sg_init_table(sg_rx, sg_num); > + for (loop = 0; loop < (sg_num - 1); loop++) { > + sg_dma_address(&sg_rx[loop]) = > + transfer->rx_dma + loop * MAX_SDMA_BD_BYTES; > + sg_dma_len(&sg_rx[loop]) = MAX_SDMA_BD_BYTES; > + } > + > + sg_dma_address(&sg_rx[loop]) = > + transfer->rx_dma + loop * MAX_SDMA_BD_BYTES; > + sg_dma_len(&sg_rx[loop]) = transfer->len - loop * MAX_SDMA_BD_BYTES; > + rxdesc = dmaengine_prep_slave_sg(spi_imx->dma_chan_rx, > + sg_rx, sg_num , DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT); > + if (!rxdesc) { > + ret = -EFAULT; > + goto err_desc; > + } > + > + rxdesc->callback = spi_imx_dma_rx_callback; > + rxdesc->callback_param = (void *)spi_imx; > + > + /* Trigger the cspi module. */ > + spi_imx->dma_finished = 0; > + > + spi_imx->devtype_data->trigger(spi_imx); > + > + dmaengine_submit(txdesc); > + dmaengine_submit(rxdesc); > + > + dma_async_issue_pending(spi_imx->dma_chan_tx); > + dma_async_issue_pending(spi_imx->dma_chan_rx); > + > + /* Wait SDMA to finish the data transfer.*/ > + ret = wait_for_completion_timeout(&spi_imx->dma_tx_completion, > + msecs_to_jiffies(3000)); > + if (!ret) { > + dev_err(spi_imx->dev, > + "I/O Error in DMA TX, line = %d ####\n", __LINE__); > + dmaengine_terminate_all(spi_imx->dma_chan_tx); > + goto err_desc; > + } else { > + dma = readl(spi_imx->base + MX51_ECSPI_DMA); > + dma = dma & (~MX51_ECSPI_DMA_RXT_WML_MASK); > + /* Change RX_DMA_LENGTH trigger dma fetch tail data */ > + left = transfer->len & (~spi_imx->rxt_wml); > + if (left) > + writel(dma | (left << MX51_ECSPI_DMA_RXT_WML_OFFSET), > + spi_imx->base + MX51_ECSPI_DMA); > + > + ret = wait_for_completion_timeout(&spi_imx->dma_rx_completion, > + msecs_to_jiffies(3000)); > + > + writel(dma | (spi_imx->rxt_wml << MX51_ECSPI_DMA_RXT_WML_OFFSET), > + spi_imx->base + MX51_ECSPI_DMA); > + if (!ret) { > + dev_err(spi_imx->dev, > + "I/O Error in DMA RX. len %d, line = %d\n", > + transfer->len, > + __LINE__); > + spi_imx->devtype_data->reset(spi_imx); > + dmaengine_terminate_all(spi_imx->dma_chan_rx); > + } > + } > + > + /* Move the transfered data to rx buf when it cannot be mapped.*/ > + if (transfer->rx_buf && (!virt_addr_valid(transfer->rx_buf))) > + memcpy(transfer->rx_buf, > + spi_imx->dma_rx_tmpbuf, > + transfer->len); > + > +err_desc: > + kfree(sg_rx); > +err_rx_sg: > + dma_unmap_single(spi_imx->dev, transfer->rx_dma, > + transfer->len, DMA_TO_DEVICE); > +err_rx: > + kfree(sg_tx); > +err_tx_sg: > + dma_unmap_single(spi_imx->dev, transfer->tx_dma, > + transfer->len, DMA_FROM_DEVICE); > +err_tx: > + spi_imx->dma_finished = 1; > + spi_imx->devtype_data->trigger(spi_imx); > + if ((!ret) || (-EFAULT == ret)) > + return -EIO; > + else > + return transfer->len; > +} > + > +static int spi_imx_pio_transfer(struct spi_device *spi, > struct spi_transfer *transfer) > { > struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); > @@ -729,6 +1088,27 @@ static int spi_imx_transfer(struct spi_device *spi, > return transfer->len; > } > > +static int spi_imx_transfer(struct spi_device *spi, > + struct spi_transfer *transfer) > +{ > + struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); > + > + if (spi_imx->dma_chan_tx && spi_imx->dma_chan_rx) { > + /* > + * Don't use sdma when the size of data to be transfered is > + * lower then SDMA wartermark. > + */ > + if ((transfer->len >= spi_imx->rx_wml) && > + (transfer->len > spi_imx->tx_wml)) { > + spi_imx->usedma = 1; > + return spi_imx_sdma_transfer(spi, transfer); > + } > + } > + > + spi_imx->usedma = 0; > + return spi_imx_pio_transfer(spi, transfer); > +} > + > static int spi_imx_setup(struct spi_device *spi) > { > struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); > @@ -778,6 +1158,56 @@ spi_imx_unprepare_message(struct spi_master *master, struct spi_message *msg) > return 0; > } > > +static int spi_imx_sdma_init(struct spi_imx_data *spi_imx) > +{ > + struct dma_slave_config slave_config = {}; > + struct device *dev = spi_imx->dev; > + int ret; > + > + > + /* Prepare for TX : */ > + spi_imx->dma_chan_tx = dma_request_slave_channel(dev, "tx"); > + if (!spi_imx->dma_chan_tx) { > + dev_err(dev, "cannot get the TX DMA channel!\n"); > + ret = -EINVAL; > + goto err; > + } > + > + slave_config.direction = DMA_MEM_TO_DEV; > + slave_config.dst_addr = spi_imx->mapbase + MXC_CSPITXDATA; > + slave_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; > + slave_config.dst_maxburst = spi_imx_get_fifosize(spi_imx) / 2; > + ret = dmaengine_slave_config(spi_imx->dma_chan_tx, &slave_config); > + if (ret) { > + dev_err(dev, "error in TX dma configuration."); > + goto err; > + } > + > + /* Prepare for RX : */ > + spi_imx->dma_chan_rx = dma_request_slave_channel(dev, "rx"); > + if (!spi_imx->dma_chan_rx) { > + dev_dbg(dev, "cannot get the DMA channel.\n"); > + ret = -EINVAL; > + goto err; > + } > + > + slave_config.direction = DMA_DEV_TO_MEM; > + slave_config.src_addr = spi_imx->mapbase + MXC_CSPIRXDATA; > + slave_config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; > + slave_config.src_maxburst = spi_imx_get_fifosize(spi_imx) / 2; > + ret = dmaengine_slave_config(spi_imx->dma_chan_rx, &slave_config); > + if (ret) { > + dev_err(dev, "error in RX dma configuration.\n"); > + goto err; > + } > + spi_imx->dma_is_inited = 1; > + > + return 0; > +err: > + spi_imx_sdma_exit(spi_imx); > + return ret; > +} > + > static int spi_imx_probe(struct platform_device *pdev) > { > struct device_node *np = pdev->dev.of_node; > @@ -849,6 +1279,8 @@ static int spi_imx_probe(struct platform_device *pdev) > (struct spi_imx_devtype_data *) pdev->id_entry->driver_data; > > res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + if (res) > + spi_imx->mapbase = res->start; > spi_imx->base = devm_ioremap_resource(&pdev->dev, res); > if (IS_ERR(spi_imx->base)) { > ret = PTR_ERR(spi_imx->base); > @@ -890,6 +1322,9 @@ static int spi_imx_probe(struct platform_device *pdev) > > spi_imx->spi_clk = clk_get_rate(spi_imx->clk_per); > > + spi_imx->dev = &pdev->dev; > + spi_imx_sdma_init(spi_imx); > + > spi_imx->devtype_data->reset(spi_imx); > > spi_imx->devtype_data->intctrl(spi_imx, 0); > -- > 1.7.8 > >
The list linux-spi@vger.kernel.org should be copied. I also added Marek who has great experience of play DMA on SPI. On Sat, Jan 04, 2014 at 06:53:51AM +0800, Frank Li wrote: > After enable DMA > > spi-nor read speed is > dd if=/dev/mtd0 of=/dev/null bs=1M count=1 > 1+0 records in > 1+0 records out > 1048576 bytes (1.0 MB) copied, 0.720402 s, 1.5 MB/s > > spi-nor write speed is > dd if=/dev/zero of=/dev/mtd0 bs=1M count=1 > 1+0 records in > 1+0 records out > 1048576 bytes (1.0 MB) copied, 3.56044 s, 295 kB/s > > Before enable DMA > > spi-nor read speed is > dd if=/dev/mtd0 of=/dev/null bs=1M count=1 > 1+0 records in > 1+0 records out > 1048576 bytes (1.0 MB) copied, 2.37717 s, 441 kB/s > > spi-nor write speed is > > dd if=/dev/zero of=/dev/mtd0 bs=1M count=1 > 1+0 records in > 1+0 records out > 1048576 bytes (1.0 MB) copied, 4.83181 s, 217 kB/s > > Signed-off-by: Frank Li <Frank.Li@freescale.com> > --- > drivers/spi/spi-imx.c | 447 ++++++++++++++++++++++++++++++++++++++++++++++++- > 1 files changed, 441 insertions(+), 6 deletions(-) > > Change from v1. > 1. check if res is null at res = platform_get_resource(pdev, IORESOURCE_MEM, 0) > 2. fix failure transfer when len is not multiple watermark > > diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c > index b80f2f7..1a9099a 100644 > --- a/drivers/spi/spi-imx.c > +++ b/drivers/spi/spi-imx.c > @@ -39,6 +39,10 @@ > #include <linux/of_gpio.h> > > #include <linux/platform_data/spi-imx.h> > +#include <linux/dma-mapping.h> > +#include <linux/platform_data/dma-imx.h> > +#include <linux/dmaengine.h> > + Nit: we already have one blank in between. > > #define DRIVER_NAME "spi_imx" > > @@ -52,6 +56,9 @@ > #define MXC_INT_RR (1 << 0) /* Receive data ready interrupt */ > #define MXC_INT_TE (1 << 1) /* Transmit FIFO empty interrupt */ > > +/* The maximum bytes that a sdma BD can transfer.*/ > +#define MAX_SDMA_BD_BYTES (1 << 15) > + > struct spi_imx_config { > unsigned int speed_hz; > unsigned int bpw; > @@ -84,6 +91,7 @@ struct spi_imx_data { > > struct completion xfer_done; > void __iomem *base; > + resource_size_t mapbase; I think the following definition is more easy to understand. phys_addr_t pbase; > int irq; > struct clk *clk_per; > struct clk *clk_ipg; > @@ -92,6 +100,29 @@ struct spi_imx_data { > unsigned int count; > void (*tx)(struct spi_imx_data *); > void (*rx)(struct spi_imx_data *); > + int (*txrx_bufs)(struct spi_device *spi, struct spi_transfer *t); > + struct dma_chan *dma_chan_rx, *dma_chan_tx; Having them on two line fits better to the style used here. > + unsigned int dma_is_inited; All other lines use space in between. Why these two use tabs? > + struct device *dev; > + > + struct completion dma_rx_completion; > + struct completion dma_tx_completion; > + > + u8 *dma_rx_tmpbuf; > + unsigned int dma_rx_tmpbuf_size; > + unsigned int dma_rx_tmpbuf_phy_addr; Use type dma_addr_t for it? > + > + u8 *dma_tx_tmpbuf; > + unsigned int dma_tx_tmpbuf_size; > + unsigned int dma_tx_tmpbuf_phy_addr; > + > + unsigned int usedma; > + unsigned int dma_finished; > + /* SDMA wartermark */ > + u32 rx_wml; > + u32 tx_wml; > + u32 rxt_wml; > + > void *rx_buf; > const void *tx_buf; > unsigned int txfifo; /* number of words pushed in tx FIFO */ > @@ -185,6 +216,7 @@ static unsigned int spi_imx_clkdiv_2(unsigned int fin, > #define MX51_ECSPI_CTRL 0x08 > #define MX51_ECSPI_CTRL_ENABLE (1 << 0) > #define MX51_ECSPI_CTRL_XCH (1 << 2) > +#define MX51_ECSPI_CTRL_SMC (1 << 3) You do not have to follow the wrong pattern. One space after '<<' is good enough. > #define MX51_ECSPI_CTRL_MODE_MASK (0xf << 4) > #define MX51_ECSPI_CTRL_POSTDIV_OFFSET 8 > #define MX51_ECSPI_CTRL_PREDIV_OFFSET 12 > @@ -202,9 +234,22 @@ static unsigned int spi_imx_clkdiv_2(unsigned int fin, > #define MX51_ECSPI_INT_TEEN (1 << 0) > #define MX51_ECSPI_INT_RREN (1 << 3) > > +#define MX51_ECSPI_DMA 0x14 > +#define MX51_ECSPI_DMA_TX_WML_OFFSET 0 > +#define MX51_ECSPI_DMA_TX_WML_MASK 0x3F It seems that the common pattern for hex value in this file is to use lowercase. > +#define MX51_ECSPI_DMA_RX_WML_OFFSET 16 > +#define MX51_ECSPI_DMA_RX_WML_MASK (0x3F << 16) > +#define MX51_ECSPI_DMA_RXT_WML_OFFSET 24 > +#define MX51_ECSPI_DMA_RXT_WML_MASK (0x3F << 16) Shouldn't it be (0x3F << 24)? > + > +#define MX51_ECSPI_DMA_TEDEN_OFFSET 7 > +#define MX51_ECSPI_DMA_RXDEN_OFFSET 23 > +#define MX51_ECSPI_DMA_RXTDEN_OFFSET 31 Please use tabs to align these numbers in the same column. > + > #define MX51_ECSPI_STAT 0x18 > #define MX51_ECSPI_STAT_RR (1 << 3) > > +#define MX51_ECSPI_TESTREG 0x20 It's used nowhere? > /* MX51 eCSPI */ > static unsigned int mx51_ecspi_clkdiv(unsigned int fin, unsigned int fspi) > { > @@ -255,16 +300,28 @@ static void __maybe_unused mx51_ecspi_trigger(struct spi_imx_data *spi_imx) > { > u32 reg; > > - reg = readl(spi_imx->base + MX51_ECSPI_CTRL); > - reg |= MX51_ECSPI_CTRL_XCH; > - writel(reg, spi_imx->base + MX51_ECSPI_CTRL); > + if (!spi_imx->usedma) { So the DMA support is added for imx51-ecspi type device only? If that's case, we should make it clear in commit log. > + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); > + reg |= MX51_ECSPI_CTRL_XCH; > + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); > + } else { > + if (!spi_imx->dma_finished) { > + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); > + reg |= MX51_ECSPI_CTRL_SMC; > + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); > + } else { > + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); > + reg &= (~MX51_ECSPI_CTRL_SMC); Unnecessary parentheses. > + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); > + } Wouldn't it make more sense to write it like below? reg = readl(spi_imx->base + MX51_ECSPI_CTRL); if (!spi_imx->dma_finished) reg |= MX51_ECSPI_CTRL_SMC; else reg &= ~MX51_ECSPI_CTRL_SMC; writel(reg, spi_imx->base + MX51_ECSPI_CTRL); > + } > } > > static int __maybe_unused mx51_ecspi_config(struct spi_imx_data *spi_imx, > struct spi_imx_config *config) > { > - u32 ctrl = MX51_ECSPI_CTRL_ENABLE, cfg = 0; > - > + u32 ctrl = MX51_ECSPI_CTRL_ENABLE, cfg = 0, dma = 0; > + u32 tx_wml_cfg, rx_wml_cfg, rxt_wml_cfg; > /* > * The hardware seems to have a race condition when changing modes. The > * current assumption is that the selection of the channel arrives > @@ -297,6 +354,30 @@ static int __maybe_unused mx51_ecspi_config(struct spi_imx_data *spi_imx, > writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL); > writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG); > > + /* > + * Configure the DMA register: setup the watermark > + * and enable DMA request. > + */ > + if (spi_imx->dma_is_inited) { > + dma = readl(spi_imx->base + MX51_ECSPI_DMA); > + > + spi_imx->tx_wml = spi_imx_get_fifosize(spi_imx) / 2; > + spi_imx->rx_wml = spi_imx_get_fifosize(spi_imx) / 2; > + spi_imx->rxt_wml = spi_imx_get_fifosize(spi_imx) / 2; > + rx_wml_cfg = spi_imx->rx_wml << MX51_ECSPI_DMA_RX_WML_OFFSET; > + tx_wml_cfg = spi_imx->tx_wml << MX51_ECSPI_DMA_TX_WML_OFFSET; > + rxt_wml_cfg = spi_imx->rxt_wml << MX51_ECSPI_DMA_RXT_WML_OFFSET; > + dma = (dma & (~MX51_ECSPI_DMA_TX_WML_MASK) > + & (~MX51_ECSPI_DMA_RX_WML_MASK) > + & (~MX51_ECSPI_DMA_RXT_WML_MASK)) Unnecessary parentheses. > + | rx_wml_cfg | tx_wml_cfg | rxt_wml_cfg > + | (1 << MX51_ECSPI_DMA_TEDEN_OFFSET) > + | (1 << MX51_ECSPI_DMA_RXDEN_OFFSET) > + | (1 << MX51_ECSPI_DMA_RXTDEN_OFFSET); > + > + writel(dma, spi_imx->base + MX51_ECSPI_DMA); > + } > + > return 0; > } ... skip the dma code, and hope Marek can help review it ... > @@ -729,6 +1088,27 @@ static int spi_imx_transfer(struct spi_device *spi, > return transfer->len; > } > > +static int spi_imx_transfer(struct spi_device *spi, > + struct spi_transfer *transfer) > +{ > + struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); > + > + if (spi_imx->dma_chan_tx && spi_imx->dma_chan_rx) { > + /* > + * Don't use sdma when the size of data to be transfered is > + * lower then SDMA wartermark. > + */ > + if ((transfer->len >= spi_imx->rx_wml) && > + (transfer->len > spi_imx->tx_wml)) { > + spi_imx->usedma = 1; > + return spi_imx_sdma_transfer(spi, transfer); > + } > + } > + > + spi_imx->usedma = 0; > + return spi_imx_pio_transfer(spi, transfer); > +} > + > static int spi_imx_setup(struct spi_device *spi) > { > struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); > @@ -778,6 +1158,56 @@ spi_imx_unprepare_message(struct spi_master *master, struct spi_message *msg) > return 0; > } > > +static int spi_imx_sdma_init(struct spi_imx_data *spi_imx) > +{ > + struct dma_slave_config slave_config = {}; > + struct device *dev = spi_imx->dev; > + int ret; > + > + > + /* Prepare for TX : */ > + spi_imx->dma_chan_tx = dma_request_slave_channel(dev, "tx"); > + if (!spi_imx->dma_chan_tx) { > + dev_err(dev, "cannot get the TX DMA channel!\n"); So we will see this error message for each type of imx spi device, though the driver only supports DMA for imx51-ecspi. Shawn > + ret = -EINVAL; > + goto err; > + } > + > + slave_config.direction = DMA_MEM_TO_DEV; > + slave_config.dst_addr = spi_imx->mapbase + MXC_CSPITXDATA; > + slave_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; > + slave_config.dst_maxburst = spi_imx_get_fifosize(spi_imx) / 2; > + ret = dmaengine_slave_config(spi_imx->dma_chan_tx, &slave_config); > + if (ret) { > + dev_err(dev, "error in TX dma configuration."); > + goto err; > + } > + > + /* Prepare for RX : */ > + spi_imx->dma_chan_rx = dma_request_slave_channel(dev, "rx"); > + if (!spi_imx->dma_chan_rx) { > + dev_dbg(dev, "cannot get the DMA channel.\n"); > + ret = -EINVAL; > + goto err; > + } > + > + slave_config.direction = DMA_DEV_TO_MEM; > + slave_config.src_addr = spi_imx->mapbase + MXC_CSPIRXDATA; > + slave_config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; > + slave_config.src_maxburst = spi_imx_get_fifosize(spi_imx) / 2; > + ret = dmaengine_slave_config(spi_imx->dma_chan_rx, &slave_config); > + if (ret) { > + dev_err(dev, "error in RX dma configuration.\n"); > + goto err; > + } > + spi_imx->dma_is_inited = 1; > + > + return 0; > +err: > + spi_imx_sdma_exit(spi_imx); > + return ret; > +} > + > static int spi_imx_probe(struct platform_device *pdev) > { > struct device_node *np = pdev->dev.of_node; > @@ -849,6 +1279,8 @@ static int spi_imx_probe(struct platform_device *pdev) > (struct spi_imx_devtype_data *) pdev->id_entry->driver_data; > > res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + if (res) > + spi_imx->mapbase = res->start; > spi_imx->base = devm_ioremap_resource(&pdev->dev, res); > if (IS_ERR(spi_imx->base)) { > ret = PTR_ERR(spi_imx->base); > @@ -890,6 +1322,9 @@ static int spi_imx_probe(struct platform_device *pdev) > > spi_imx->spi_clk = clk_get_rate(spi_imx->clk_per); > > + spi_imx->dev = &pdev->dev; > + spi_imx_sdma_init(spi_imx); > + > spi_imx->devtype_data->reset(spi_imx); > > spi_imx->devtype_data->intctrl(spi_imx, 0); > -- > 1.7.8 > > > > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On Friday, January 03, 2014 at 11:53:51 PM, Frank Li wrote: > After enable DMA > > spi-nor read speed is > dd if=/dev/mtd0 of=/dev/null bs=1M count=1 > 1+0 records in > 1+0 records out > 1048576 bytes (1.0 MB) copied, 0.720402 s, 1.5 MB/s > > spi-nor write speed is > dd if=/dev/zero of=/dev/mtd0 bs=1M count=1 > 1+0 records in > 1+0 records out > 1048576 bytes (1.0 MB) copied, 3.56044 s, 295 kB/s > > Before enable DMA > > spi-nor read speed is > dd if=/dev/mtd0 of=/dev/null bs=1M count=1 > 1+0 records in > 1+0 records out > 1048576 bytes (1.0 MB) copied, 2.37717 s, 441 kB/s > > spi-nor write speed is > > dd if=/dev/zero of=/dev/mtd0 bs=1M count=1 > 1+0 records in > 1+0 records out > 1048576 bytes (1.0 MB) copied, 4.83181 s, 217 kB/s > > Signed-off-by: Frank Li <Frank.Li@freescale.com> [...] > + > +static void spi_imx_dma_rx_callback(void *cookie) > +{ > + struct spi_imx_data *spi_imx = (struct spi_imx_data *)cookie; > + > + complete(&spi_imx->dma_rx_completion); > + > +} > + > +static void spi_imx_dma_tx_callback(void *cookie) > +{ > + struct spi_imx_data *spi_imx = (struct spi_imx_data *)cookie; > + > + complete(&spi_imx->dma_tx_completion); > +} > + > +static int spi_imx_sdma_transfer(struct spi_device *spi, > + struct spi_transfer *transfer) > +{ This function needs splitting into multiple smaller ones, seriously. This doesn't fit either on my rMBP's screen or on my desktop's _pivoted_ 27" widescreen 2500x1400 LCD ! > + struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); > + int ret = 0; > + int sg_num; > + int loop; > + int left; > + u32 dma; > + > + struct scatterlist *sg_rx, *sg_tx; > + struct dma_async_tx_descriptor *txdesc; > + struct dma_async_tx_descriptor *rxdesc; > + > + init_completion(&spi_imx->dma_rx_completion); > + init_completion(&spi_imx->dma_tx_completion); This should be reinit_completion(), the init_completion() should happen in the probe function, CAREFUL about this ! > + > + /* > + * Get the valid physical address for the tx buf, if the rx buf address > + * is null or it cannot be mapped, we should allocate memory for it. > + */ I will skip this function until it's properly split. But anyway... I know the MXC SPI controller works in full-duplex mode, thus must have two equally big buffers (one for RX and one for TX) even if used in half-duplex mode, right? The problem I perceive here is that when I do for example 4 MB long continuous half-duplex transfer with the IMX SPI, your code will happily allocate 4MB big buffer, right ? Hence my suggestion, won't it be better to split such long transfers into a chain of DMA descriptors AND use a small (say, 16KiB) buffer for the unwanted direction ? This way, you would allocate the small 16KiB block only once (heck, you can even allocate it in probe() ), and each descriptor would point to this block, overwriting it or sourcing zeroes from it . But please, correct me if I misunderstood your code. [...] > + > +static int spi_imx_pio_transfer(struct spi_device *spi, > struct spi_transfer *transfer) > { > struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); > @@ -729,6 +1088,27 @@ static int spi_imx_transfer(struct spi_device *spi, > return transfer->len; > } > > +static int spi_imx_transfer(struct spi_device *spi, > + struct spi_transfer *transfer) > +{ > + struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); > + > + if (spi_imx->dma_chan_tx && spi_imx->dma_chan_rx) { You do have spi_imx->dma_is_inited variable , so use it here. > + /* > + * Don't use sdma when the size of data to be transfered is > + * lower then SDMA wartermark. > + */ > + if ((transfer->len >= spi_imx->rx_wml) && > + (transfer->len > spi_imx->tx_wml)) { Why do you use -ge in the first test and -gt in the second ? Besides, the formating of the condition is really weird. > + spi_imx->usedma = 1; Don't you need to do some kind of locking here so that spi_imx->usedma won't be overwritten if multiple users enter the spi_imx_transfer() function ? I am not sure if this is called from a context already protected by the SPI framework's mutex or not. > + return spi_imx_sdma_transfer(spi, transfer); > + } > + } > + > + spi_imx->usedma = 0; > + return spi_imx_pio_transfer(spi, transfer); > +} > + > static int spi_imx_setup(struct spi_device *spi) > { > struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); > @@ -778,6 +1158,56 @@ spi_imx_unprepare_message(struct spi_master *master, > struct spi_message *msg) return 0; > } > > +static int spi_imx_sdma_init(struct spi_imx_data *spi_imx) > +{ > + struct dma_slave_config slave_config = {}; > + struct device *dev = spi_imx->dev; > + int ret; > + > + One newline too much here. > + /* Prepare for TX : */ I suppose this should be "Prepare TX DMA" ? > + spi_imx->dma_chan_tx = dma_request_slave_channel(dev, "tx"); > + if (!spi_imx->dma_chan_tx) { > + dev_err(dev, "cannot get the TX DMA channel!\n"); Sentence usually starts with capital letter ;-) > + ret = -EINVAL; > + goto err; > + } > + > + slave_config.direction = DMA_MEM_TO_DEV; > + slave_config.dst_addr = spi_imx->mapbase + MXC_CSPITXDATA; > + slave_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; > + slave_config.dst_maxburst = spi_imx_get_fifosize(spi_imx) / 2; > + ret = dmaengine_slave_config(spi_imx->dma_chan_tx, &slave_config); > + if (ret) { > + dev_err(dev, "error in TX dma configuration."); > + goto err; > + } > + > + /* Prepare for RX : */ > + spi_imx->dma_chan_rx = dma_request_slave_channel(dev, "rx"); > + if (!spi_imx->dma_chan_rx) { > + dev_dbg(dev, "cannot get the DMA channel.\n"); I'd turn all these dev_err() calls here into dev_dbg() and I'd print an error message if this function returns !0 at where's it's called from below. > + ret = -EINVAL; > + goto err; > + } > + > + slave_config.direction = DMA_DEV_TO_MEM; > + slave_config.src_addr = spi_imx->mapbase + MXC_CSPIRXDATA; > + slave_config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; > + slave_config.src_maxburst = spi_imx_get_fifosize(spi_imx) / 2; > + ret = dmaengine_slave_config(spi_imx->dma_chan_rx, &slave_config); > + if (ret) { > + dev_err(dev, "error in RX dma configuration.\n"); > + goto err; > + } > + spi_imx->dma_is_inited = 1; > + > + return 0; > +err: > + spi_imx_sdma_exit(spi_imx); > + return ret; > +} > + > static int spi_imx_probe(struct platform_device *pdev) > { > struct device_node *np = pdev->dev.of_node; > @@ -849,6 +1279,8 @@ static int spi_imx_probe(struct platform_device *pdev) > (struct spi_imx_devtype_data *) pdev->id_entry->driver_data; > > res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + if (res) > + spi_imx->mapbase = res->start; > spi_imx->base = devm_ioremap_resource(&pdev->dev, res); > if (IS_ERR(spi_imx->base)) { > ret = PTR_ERR(spi_imx->base); > @@ -890,6 +1322,9 @@ static int spi_imx_probe(struct platform_device *pdev) > > spi_imx->spi_clk = clk_get_rate(spi_imx->clk_per); > > + spi_imx->dev = &pdev->dev; > + spi_imx_sdma_init(spi_imx); You must check return value here. I would say you want to print an error message here, but you also want to avoid failing entirely, just indicate the DMA couldn't be init'd and you'd just use PIO. > + > spi_imx->devtype_data->reset(spi_imx); > > spi_imx->devtype_data->intctrl(spi_imx, 0);
On Tuesday, January 14, 2014 at 07:02:58 AM, Shawn Guo wrote: > The list linux-spi@vger.kernel.org should be copied. > > I also added Marek who has great experience of play DMA on SPI. I'm flattered, thanks ;-) Best regards, Marek Vasut
On Thu, Jan 09, 2014 at 04:56:02PM -0600, Frank Li wrote: > > Signed-off-by: Frank Li <Frank.Li@freescale.com> > ping! Don't send contentless pings, either your mail has been lost (in which case it'll need resending anyway) or this is just yet another mail to read. You should also, as covered in SubmittingPatches, CC any patches to both the maintainers (remembering to use the address in MAINTAINERS) and the relevant mailing list. For SPI that's: SPI SUBSYSTEM M: Mark Brown <broonie@kernel.org> L: linux-spi@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git Q: http://patchwork.kernel.org/project/spi-devel-general/list/ S: Maintained F: Documentation/spi/ F: drivers/spi/ F: include/linux/spi/ F: include/uapi/linux/spi/ It's OK to send to other places as well but don't rely on them, for example my linux-arm-kernel folder currently has 1405 unread messages in it.
On Tue, Jan 14, 2014 at 12:02 AM, Shawn Guo <shawn.guo@linaro.org> wrote: > The list linux-spi@vger.kernel.org should be copied. > > I also added Marek who has great experience of play DMA on SPI. > > On Sat, Jan 04, 2014 at 06:53:51AM +0800, Frank Li wrote: >> After enable DMA >> >> spi-nor read speed is >> dd if=/dev/mtd0 of=/dev/null bs=1M count=1 >> 1+0 records in >> 1+0 records out >> 1048576 bytes (1.0 MB) copied, 0.720402 s, 1.5 MB/s >> >> spi-nor write speed is >> dd if=/dev/zero of=/dev/mtd0 bs=1M count=1 >> 1+0 records in >> 1+0 records out >> 1048576 bytes (1.0 MB) copied, 3.56044 s, 295 kB/s >> >> Before enable DMA >> >> spi-nor read speed is >> dd if=/dev/mtd0 of=/dev/null bs=1M count=1 >> 1+0 records in >> 1+0 records out >> 1048576 bytes (1.0 MB) copied, 2.37717 s, 441 kB/s >> >> spi-nor write speed is >> >> dd if=/dev/zero of=/dev/mtd0 bs=1M count=1 >> 1+0 records in >> 1+0 records out >> 1048576 bytes (1.0 MB) copied, 4.83181 s, 217 kB/s >> >> Signed-off-by: Frank Li <Frank.Li@freescale.com> >> --- >> drivers/spi/spi-imx.c | 447 ++++++++++++++++++++++++++++++++++++++++++++++++- >> 1 files changed, 441 insertions(+), 6 deletions(-) >> >> Change from v1. >> 1. check if res is null at res = platform_get_resource(pdev, IORESOURCE_MEM, 0) >> 2. fix failure transfer when len is not multiple watermark >> >> diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c >> index b80f2f7..1a9099a 100644 >> --- a/drivers/spi/spi-imx.c >> +++ b/drivers/spi/spi-imx.c >> @@ -39,6 +39,10 @@ >> #include <linux/of_gpio.h> >> >> #include <linux/platform_data/spi-imx.h> >> +#include <linux/dma-mapping.h> >> +#include <linux/platform_data/dma-imx.h> >> +#include <linux/dmaengine.h> >> + > > Nit: we already have one blank in between. > >> >> #define DRIVER_NAME "spi_imx" >> >> @@ -52,6 +56,9 @@ >> #define MXC_INT_RR (1 << 0) /* Receive data ready interrupt */ >> #define MXC_INT_TE (1 << 1) /* Transmit FIFO empty interrupt */ >> >> +/* The maximum bytes that a sdma BD can transfer.*/ >> +#define MAX_SDMA_BD_BYTES (1 << 15) >> + >> struct spi_imx_config { >> unsigned int speed_hz; >> unsigned int bpw; >> @@ -84,6 +91,7 @@ struct spi_imx_data { >> >> struct completion xfer_done; >> void __iomem *base; >> + resource_size_t mapbase; > > I think the following definition is more easy to understand. > > phys_addr_t pbase; > >> int irq; >> struct clk *clk_per; >> struct clk *clk_ipg; >> @@ -92,6 +100,29 @@ struct spi_imx_data { >> unsigned int count; >> void (*tx)(struct spi_imx_data *); >> void (*rx)(struct spi_imx_data *); >> + int (*txrx_bufs)(struct spi_device *spi, struct spi_transfer *t); >> + struct dma_chan *dma_chan_rx, *dma_chan_tx; > > Having them on two line fits better to the style used here. > >> + unsigned int dma_is_inited; > > All other lines use space in between. Why these two use tabs? > >> + struct device *dev; >> + >> + struct completion dma_rx_completion; >> + struct completion dma_tx_completion; >> + >> + u8 *dma_rx_tmpbuf; >> + unsigned int dma_rx_tmpbuf_size; >> + unsigned int dma_rx_tmpbuf_phy_addr; > > Use type dma_addr_t for it? > >> + >> + u8 *dma_tx_tmpbuf; >> + unsigned int dma_tx_tmpbuf_size; >> + unsigned int dma_tx_tmpbuf_phy_addr; >> + >> + unsigned int usedma; >> + unsigned int dma_finished; >> + /* SDMA wartermark */ >> + u32 rx_wml; >> + u32 tx_wml; >> + u32 rxt_wml; >> + >> void *rx_buf; >> const void *tx_buf; >> unsigned int txfifo; /* number of words pushed in tx FIFO */ >> @@ -185,6 +216,7 @@ static unsigned int spi_imx_clkdiv_2(unsigned int fin, >> #define MX51_ECSPI_CTRL 0x08 >> #define MX51_ECSPI_CTRL_ENABLE (1 << 0) >> #define MX51_ECSPI_CTRL_XCH (1 << 2) >> +#define MX51_ECSPI_CTRL_SMC (1 << 3) > > You do not have to follow the wrong pattern. One space after '<<' is > good enough. > >> #define MX51_ECSPI_CTRL_MODE_MASK (0xf << 4) >> #define MX51_ECSPI_CTRL_POSTDIV_OFFSET 8 >> #define MX51_ECSPI_CTRL_PREDIV_OFFSET 12 >> @@ -202,9 +234,22 @@ static unsigned int spi_imx_clkdiv_2(unsigned int fin, >> #define MX51_ECSPI_INT_TEEN (1 << 0) >> #define MX51_ECSPI_INT_RREN (1 << 3) >> >> +#define MX51_ECSPI_DMA 0x14 >> +#define MX51_ECSPI_DMA_TX_WML_OFFSET 0 >> +#define MX51_ECSPI_DMA_TX_WML_MASK 0x3F > > It seems that the common pattern for hex value in this file is to use > lowercase. > >> +#define MX51_ECSPI_DMA_RX_WML_OFFSET 16 >> +#define MX51_ECSPI_DMA_RX_WML_MASK (0x3F << 16) >> +#define MX51_ECSPI_DMA_RXT_WML_OFFSET 24 >> +#define MX51_ECSPI_DMA_RXT_WML_MASK (0x3F << 16) > > Shouldn't it be (0x3F << 24)? > >> + >> +#define MX51_ECSPI_DMA_TEDEN_OFFSET 7 >> +#define MX51_ECSPI_DMA_RXDEN_OFFSET 23 >> +#define MX51_ECSPI_DMA_RXTDEN_OFFSET 31 > > Please use tabs to align these numbers in the same column. > >> + >> #define MX51_ECSPI_STAT 0x18 >> #define MX51_ECSPI_STAT_RR (1 << 3) >> >> +#define MX51_ECSPI_TESTREG 0x20 > > It's used nowhere? > >> /* MX51 eCSPI */ >> static unsigned int mx51_ecspi_clkdiv(unsigned int fin, unsigned int fspi) >> { >> @@ -255,16 +300,28 @@ static void __maybe_unused mx51_ecspi_trigger(struct spi_imx_data *spi_imx) >> { >> u32 reg; >> >> - reg = readl(spi_imx->base + MX51_ECSPI_CTRL); >> - reg |= MX51_ECSPI_CTRL_XCH; >> - writel(reg, spi_imx->base + MX51_ECSPI_CTRL); >> + if (!spi_imx->usedma) { > > So the DMA support is added for imx51-ecspi type device only? If that's > case, we should make it clear in commit log. > >> + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); >> + reg |= MX51_ECSPI_CTRL_XCH; >> + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); >> + } else { >> + if (!spi_imx->dma_finished) { >> + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); >> + reg |= MX51_ECSPI_CTRL_SMC; >> + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); >> + } else { >> + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); >> + reg &= (~MX51_ECSPI_CTRL_SMC); > > Unnecessary parentheses. > >> + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); >> + } > > Wouldn't it make more sense to write it like below? > > reg = readl(spi_imx->base + MX51_ECSPI_CTRL); > if (!spi_imx->dma_finished) > reg |= MX51_ECSPI_CTRL_SMC; > else > reg &= ~MX51_ECSPI_CTRL_SMC; > writel(reg, spi_imx->base + MX51_ECSPI_CTRL); > >> + } >> } >> >> static int __maybe_unused mx51_ecspi_config(struct spi_imx_data *spi_imx, >> struct spi_imx_config *config) >> { >> - u32 ctrl = MX51_ECSPI_CTRL_ENABLE, cfg = 0; >> - >> + u32 ctrl = MX51_ECSPI_CTRL_ENABLE, cfg = 0, dma = 0; >> + u32 tx_wml_cfg, rx_wml_cfg, rxt_wml_cfg; >> /* >> * The hardware seems to have a race condition when changing modes. The >> * current assumption is that the selection of the channel arrives >> @@ -297,6 +354,30 @@ static int __maybe_unused mx51_ecspi_config(struct spi_imx_data *spi_imx, >> writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL); >> writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG); >> >> + /* >> + * Configure the DMA register: setup the watermark >> + * and enable DMA request. >> + */ >> + if (spi_imx->dma_is_inited) { >> + dma = readl(spi_imx->base + MX51_ECSPI_DMA); >> + >> + spi_imx->tx_wml = spi_imx_get_fifosize(spi_imx) / 2; >> + spi_imx->rx_wml = spi_imx_get_fifosize(spi_imx) / 2; >> + spi_imx->rxt_wml = spi_imx_get_fifosize(spi_imx) / 2; >> + rx_wml_cfg = spi_imx->rx_wml << MX51_ECSPI_DMA_RX_WML_OFFSET; >> + tx_wml_cfg = spi_imx->tx_wml << MX51_ECSPI_DMA_TX_WML_OFFSET; >> + rxt_wml_cfg = spi_imx->rxt_wml << MX51_ECSPI_DMA_RXT_WML_OFFSET; >> + dma = (dma & (~MX51_ECSPI_DMA_TX_WML_MASK) >> + & (~MX51_ECSPI_DMA_RX_WML_MASK) >> + & (~MX51_ECSPI_DMA_RXT_WML_MASK)) > > Unnecessary parentheses. > >> + | rx_wml_cfg | tx_wml_cfg | rxt_wml_cfg >> + | (1 << MX51_ECSPI_DMA_TEDEN_OFFSET) >> + | (1 << MX51_ECSPI_DMA_RXDEN_OFFSET) >> + | (1 << MX51_ECSPI_DMA_RXTDEN_OFFSET); >> + >> + writel(dma, spi_imx->base + MX51_ECSPI_DMA); >> + } >> + >> return 0; >> } > > ... skip the dma code, and hope Marek can help review it ... > >> @@ -729,6 +1088,27 @@ static int spi_imx_transfer(struct spi_device *spi, >> return transfer->len; >> } >> >> +static int spi_imx_transfer(struct spi_device *spi, >> + struct spi_transfer *transfer) >> +{ >> + struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); >> + >> + if (spi_imx->dma_chan_tx && spi_imx->dma_chan_rx) { >> + /* >> + * Don't use sdma when the size of data to be transfered is >> + * lower then SDMA wartermark. >> + */ >> + if ((transfer->len >= spi_imx->rx_wml) && >> + (transfer->len > spi_imx->tx_wml)) { >> + spi_imx->usedma = 1; >> + return spi_imx_sdma_transfer(spi, transfer); >> + } >> + } >> + >> + spi_imx->usedma = 0; >> + return spi_imx_pio_transfer(spi, transfer); >> +} >> + >> static int spi_imx_setup(struct spi_device *spi) >> { >> struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); >> @@ -778,6 +1158,56 @@ spi_imx_unprepare_message(struct spi_master *master, struct spi_message *msg) >> return 0; >> } >> >> +static int spi_imx_sdma_init(struct spi_imx_data *spi_imx) >> +{ >> + struct dma_slave_config slave_config = {}; >> + struct device *dev = spi_imx->dev; >> + int ret; >> + >> + >> + /* Prepare for TX : */ >> + spi_imx->dma_chan_tx = dma_request_slave_channel(dev, "tx"); >> + if (!spi_imx->dma_chan_tx) { >> + dev_err(dev, "cannot get the TX DMA channel!\n"); > > So we will see this error message for each type of imx spi device, > though the driver only supports DMA for imx51-ecspi. > > Shawn How to handle this one? change to devinfo? Or check check if imx51-ecspi? > >> + ret = -EINVAL; >> + goto err; >> + } >> + >> + slave_config.direction = DMA_MEM_TO_DEV; >> + slave_config.dst_addr = spi_imx->mapbase + MXC_CSPITXDATA; >> + slave_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; >> + slave_config.dst_maxburst = spi_imx_get_fifosize(spi_imx) / 2; >> + ret = dmaengine_slave_config(spi_imx->dma_chan_tx, &slave_config); >> + if (ret) { >> + dev_err(dev, "error in TX dma configuration."); >> + goto err; >> + } >> + >> + /* Prepare for RX : */ >> + spi_imx->dma_chan_rx = dma_request_slave_channel(dev, "rx"); >> + if (!spi_imx->dma_chan_rx) { >> + dev_dbg(dev, "cannot get the DMA channel.\n"); >> + ret = -EINVAL; >> + goto err; >> + } >> + >> + slave_config.direction = DMA_DEV_TO_MEM; >> + slave_config.src_addr = spi_imx->mapbase + MXC_CSPIRXDATA; >> + slave_config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; >> + slave_config.src_maxburst = spi_imx_get_fifosize(spi_imx) / 2; >> + ret = dmaengine_slave_config(spi_imx->dma_chan_rx, &slave_config); >> + if (ret) { >> + dev_err(dev, "error in RX dma configuration.\n"); >> + goto err; >> + } >> + spi_imx->dma_is_inited = 1; >> + >> + return 0; >> +err: >> + spi_imx_sdma_exit(spi_imx); >> + return ret; >> +} >> + >> static int spi_imx_probe(struct platform_device *pdev) >> { >> struct device_node *np = pdev->dev.of_node; >> @@ -849,6 +1279,8 @@ static int spi_imx_probe(struct platform_device *pdev) >> (struct spi_imx_devtype_data *) pdev->id_entry->driver_data; >> >> res = platform_get_resource(pdev, IORESOURCE_MEM, 0); >> + if (res) >> + spi_imx->mapbase = res->start; >> spi_imx->base = devm_ioremap_resource(&pdev->dev, res); >> if (IS_ERR(spi_imx->base)) { >> ret = PTR_ERR(spi_imx->base); >> @@ -890,6 +1322,9 @@ static int spi_imx_probe(struct platform_device *pdev) >> >> spi_imx->spi_clk = clk_get_rate(spi_imx->clk_per); >> >> + spi_imx->dev = &pdev->dev; >> + spi_imx_sdma_init(spi_imx); >> + >> spi_imx->devtype_data->reset(spi_imx); >> >> spi_imx->devtype_data->intctrl(spi_imx, 0); >> -- >> 1.7.8 >> >> >> >> _______________________________________________ >> linux-arm-kernel mailing list >> linux-arm-kernel@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel >
On Tue, Jan 14, 2014 at 10:38:32PM +0100, Marek Vasut wrote: > I know the MXC SPI controller works in full-duplex mode, thus must have two > equally big buffers (one for RX and one for TX) even if used in half-duplex > mode, right? We should factor this stuff out into the core, a bunch of controllers have that limitation. > The problem I perceive here is that when I do for example 4 MB long continuous > half-duplex transfer with the IMX SPI, your code will happily allocate 4MB big > buffer, right ? Or try to - the other trick here is getting 4MB of contiguous memory in the first place (unless there's an IOMMU making everything pretty). > Hence my suggestion, won't it be better to split such long transfers into a > chain of DMA descriptors AND use a small (say, 16KiB) buffer for the unwanted > direction ? This way, you would allocate the small 16KiB block only once (heck, > you can even allocate it in probe() ), and each descriptor would point to this > block, overwriting it or sourcing zeroes from it . Yes, indeed. I've half started working on some dmaengine code and was going to do something like this as part of it - the idea was to have the core generate a sg_list to pass to devices with DMAable transfers which would enable this sort of rewriting and would also mean we could do things like scatter/gather more effectively. Not got as far as actually getting things into a nice shape for that though. There's a lot of common patterns here which we shouldn't be open coding.
On Tuesday, January 14, 2014 at 10:55:48 PM, Mark Brown wrote: > On Tue, Jan 14, 2014 at 10:38:32PM +0100, Marek Vasut wrote: > > I know the MXC SPI controller works in full-duplex mode, thus must have > > two equally big buffers (one for RX and one for TX) even if used in > > half-duplex mode, right? > > We should factor this stuff out into the core, a bunch of controllers > have that limitation. > > > The problem I perceive here is that when I do for example 4 MB long > > continuous half-duplex transfer with the IMX SPI, your code will happily > > allocate 4MB big buffer, right ? > > Or try to - the other trick here is getting 4MB of contiguous memory in > the first place (unless there's an IOMMU making everything pretty). Oh, but what if I want to make a really looong transfer ? Say, read entire SPI NOR of 128MB in size ... Spansion has such big SPI NORs in their new portfolio. > > Hence my suggestion, won't it be better to split such long transfers into > > a chain of DMA descriptors AND use a small (say, 16KiB) buffer for the > > unwanted direction ? This way, you would allocate the small 16KiB block > > only once (heck, you can even allocate it in probe() ), and each > > descriptor would point to this block, overwriting it or sourcing zeroes > > from it . > > Yes, indeed. I've half started working on some dmaengine code and was > going to do something like this as part of it - the idea was to have the > core generate a sg_list to pass to devices with DMAable transfers which > would enable this sort of rewriting and would also mean we could do > things like scatter/gather more effectively. Not got as far as actually > getting things into a nice shape for that though. There's a lot of > common patterns here which we shouldn't be open coding. Indeed, full ACK on this. Best regards, Marek Vasut
On Tue, Jan 14, 2014 at 11:02:31PM +0100, Marek Vasut wrote: > On Tuesday, January 14, 2014 at 10:55:48 PM, Mark Brown wrote: > > Or try to - the other trick here is getting 4MB of contiguous memory in > > the first place (unless there's an IOMMU making everything pretty). > Oh, but what if I want to make a really looong transfer ? Say, read entire SPI > NOR of 128MB in size ... Spansion has such big SPI NORs in their new portfolio. Use scatter/gather - to the SPI controller and the outside world it still looks like one big transfer (I'm missing a way to go from a virtual address to a sg_list, that'd be handy). It's an issue if the DMA controller doesn't support it but but that's rare.
On Tuesday, January 14, 2014 at 11:13:15 PM, Mark Brown wrote: > On Tue, Jan 14, 2014 at 11:02:31PM +0100, Marek Vasut wrote: > > On Tuesday, January 14, 2014 at 10:55:48 PM, Mark Brown wrote: > > > Or try to - the other trick here is getting 4MB of contiguous memory in > > > the first place (unless there's an IOMMU making everything pretty). > > > > Oh, but what if I want to make a really looong transfer ? Say, read > > entire SPI NOR of 128MB in size ... Spansion has such big SPI NORs in > > their new portfolio. > > Use scatter/gather - to the SPI controller and the outside world it > still looks like one big transfer (I'm missing a way to go from a > virtual address to a sg_list, that'd be handy). It's an issue if the > DMA controller doesn't support it but but that's rare. OK, yep, indeed. Best regards, Marek Vasut
On Tue, Jan 14, 2014 at 03:52:01PM -0600, Frank Li wrote: > >> @@ -778,6 +1158,56 @@ spi_imx_unprepare_message(struct spi_master *master, struct spi_message *msg) > >> return 0; > >> } > >> > >> +static int spi_imx_sdma_init(struct spi_imx_data *spi_imx) > >> +{ > >> + struct dma_slave_config slave_config = {}; > >> + struct device *dev = spi_imx->dev; > >> + int ret; > >> + > >> + > >> + /* Prepare for TX : */ > >> + spi_imx->dma_chan_tx = dma_request_slave_channel(dev, "tx"); > >> + if (!spi_imx->dma_chan_tx) { > >> + dev_err(dev, "cannot get the TX DMA channel!\n"); > > > > So we will see this error message for each type of imx spi device, > > though the driver only supports DMA for imx51-ecspi. > > > > Shawn > > How to handle this one? change to devinfo? Or check check if imx51-ecspi? I think we should check device type and only initialize DMA for imx51-ecspi type of devices. Shawn
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index b80f2f7..1a9099a 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -39,6 +39,10 @@ #include <linux/of_gpio.h> #include <linux/platform_data/spi-imx.h> +#include <linux/dma-mapping.h> +#include <linux/platform_data/dma-imx.h> +#include <linux/dmaengine.h> + #define DRIVER_NAME "spi_imx" @@ -52,6 +56,9 @@ #define MXC_INT_RR (1 << 0) /* Receive data ready interrupt */ #define MXC_INT_TE (1 << 1) /* Transmit FIFO empty interrupt */ +/* The maximum bytes that a sdma BD can transfer.*/ +#define MAX_SDMA_BD_BYTES (1 << 15) + struct spi_imx_config { unsigned int speed_hz; unsigned int bpw; @@ -84,6 +91,7 @@ struct spi_imx_data { struct completion xfer_done; void __iomem *base; + resource_size_t mapbase; int irq; struct clk *clk_per; struct clk *clk_ipg; @@ -92,6 +100,29 @@ struct spi_imx_data { unsigned int count; void (*tx)(struct spi_imx_data *); void (*rx)(struct spi_imx_data *); + int (*txrx_bufs)(struct spi_device *spi, struct spi_transfer *t); + struct dma_chan *dma_chan_rx, *dma_chan_tx; + unsigned int dma_is_inited; + struct device *dev; + + struct completion dma_rx_completion; + struct completion dma_tx_completion; + + u8 *dma_rx_tmpbuf; + unsigned int dma_rx_tmpbuf_size; + unsigned int dma_rx_tmpbuf_phy_addr; + + u8 *dma_tx_tmpbuf; + unsigned int dma_tx_tmpbuf_size; + unsigned int dma_tx_tmpbuf_phy_addr; + + unsigned int usedma; + unsigned int dma_finished; + /* SDMA wartermark */ + u32 rx_wml; + u32 tx_wml; + u32 rxt_wml; + void *rx_buf; const void *tx_buf; unsigned int txfifo; /* number of words pushed in tx FIFO */ @@ -185,6 +216,7 @@ static unsigned int spi_imx_clkdiv_2(unsigned int fin, #define MX51_ECSPI_CTRL 0x08 #define MX51_ECSPI_CTRL_ENABLE (1 << 0) #define MX51_ECSPI_CTRL_XCH (1 << 2) +#define MX51_ECSPI_CTRL_SMC (1 << 3) #define MX51_ECSPI_CTRL_MODE_MASK (0xf << 4) #define MX51_ECSPI_CTRL_POSTDIV_OFFSET 8 #define MX51_ECSPI_CTRL_PREDIV_OFFSET 12 @@ -202,9 +234,22 @@ static unsigned int spi_imx_clkdiv_2(unsigned int fin, #define MX51_ECSPI_INT_TEEN (1 << 0) #define MX51_ECSPI_INT_RREN (1 << 3) +#define MX51_ECSPI_DMA 0x14 +#define MX51_ECSPI_DMA_TX_WML_OFFSET 0 +#define MX51_ECSPI_DMA_TX_WML_MASK 0x3F +#define MX51_ECSPI_DMA_RX_WML_OFFSET 16 +#define MX51_ECSPI_DMA_RX_WML_MASK (0x3F << 16) +#define MX51_ECSPI_DMA_RXT_WML_OFFSET 24 +#define MX51_ECSPI_DMA_RXT_WML_MASK (0x3F << 16) + +#define MX51_ECSPI_DMA_TEDEN_OFFSET 7 +#define MX51_ECSPI_DMA_RXDEN_OFFSET 23 +#define MX51_ECSPI_DMA_RXTDEN_OFFSET 31 + #define MX51_ECSPI_STAT 0x18 #define MX51_ECSPI_STAT_RR (1 << 3) +#define MX51_ECSPI_TESTREG 0x20 /* MX51 eCSPI */ static unsigned int mx51_ecspi_clkdiv(unsigned int fin, unsigned int fspi) { @@ -255,16 +300,28 @@ static void __maybe_unused mx51_ecspi_trigger(struct spi_imx_data *spi_imx) { u32 reg; - reg = readl(spi_imx->base + MX51_ECSPI_CTRL); - reg |= MX51_ECSPI_CTRL_XCH; - writel(reg, spi_imx->base + MX51_ECSPI_CTRL); + if (!spi_imx->usedma) { + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); + reg |= MX51_ECSPI_CTRL_XCH; + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); + } else { + if (!spi_imx->dma_finished) { + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); + reg |= MX51_ECSPI_CTRL_SMC; + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); + } else { + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); + reg &= (~MX51_ECSPI_CTRL_SMC); + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); + } + } } static int __maybe_unused mx51_ecspi_config(struct spi_imx_data *spi_imx, struct spi_imx_config *config) { - u32 ctrl = MX51_ECSPI_CTRL_ENABLE, cfg = 0; - + u32 ctrl = MX51_ECSPI_CTRL_ENABLE, cfg = 0, dma = 0; + u32 tx_wml_cfg, rx_wml_cfg, rxt_wml_cfg; /* * The hardware seems to have a race condition when changing modes. The * current assumption is that the selection of the channel arrives @@ -297,6 +354,30 @@ static int __maybe_unused mx51_ecspi_config(struct spi_imx_data *spi_imx, writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL); writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG); + /* + * Configure the DMA register: setup the watermark + * and enable DMA request. + */ + if (spi_imx->dma_is_inited) { + dma = readl(spi_imx->base + MX51_ECSPI_DMA); + + spi_imx->tx_wml = spi_imx_get_fifosize(spi_imx) / 2; + spi_imx->rx_wml = spi_imx_get_fifosize(spi_imx) / 2; + spi_imx->rxt_wml = spi_imx_get_fifosize(spi_imx) / 2; + rx_wml_cfg = spi_imx->rx_wml << MX51_ECSPI_DMA_RX_WML_OFFSET; + tx_wml_cfg = spi_imx->tx_wml << MX51_ECSPI_DMA_TX_WML_OFFSET; + rxt_wml_cfg = spi_imx->rxt_wml << MX51_ECSPI_DMA_RXT_WML_OFFSET; + dma = (dma & (~MX51_ECSPI_DMA_TX_WML_MASK) + & (~MX51_ECSPI_DMA_RX_WML_MASK) + & (~MX51_ECSPI_DMA_RXT_WML_MASK)) + | rx_wml_cfg | tx_wml_cfg | rxt_wml_cfg + | (1 << MX51_ECSPI_DMA_TEDEN_OFFSET) + | (1 << MX51_ECSPI_DMA_RXDEN_OFFSET) + | (1 << MX51_ECSPI_DMA_RXTDEN_OFFSET); + + writel(dma, spi_imx->base + MX51_ECSPI_DMA); + } + return 0; } @@ -708,7 +789,285 @@ static int spi_imx_setupxfer(struct spi_device *spi, return 0; } -static int spi_imx_transfer(struct spi_device *spi, +static void spi_imx_sdma_exit(struct spi_imx_data *spi_imx) +{ + if (spi_imx->dma_chan_rx) { + dma_release_channel(spi_imx->dma_chan_rx); + spi_imx->dma_chan_rx = NULL; + } + + if (spi_imx->dma_chan_tx) { + dma_release_channel(spi_imx->dma_chan_tx); + spi_imx->dma_chan_tx = NULL; + } + + spi_imx->dma_is_inited = 0; +} + +static void spi_imx_dma_rx_callback(void *cookie) +{ + struct spi_imx_data *spi_imx = (struct spi_imx_data *)cookie; + + complete(&spi_imx->dma_rx_completion); + +} + +static void spi_imx_dma_tx_callback(void *cookie) +{ + struct spi_imx_data *spi_imx = (struct spi_imx_data *)cookie; + + complete(&spi_imx->dma_tx_completion); +} + +static int spi_imx_sdma_transfer(struct spi_device *spi, + struct spi_transfer *transfer) +{ + struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); + int ret = 0; + int sg_num; + int loop; + int left; + u32 dma; + + struct scatterlist *sg_rx, *sg_tx; + struct dma_async_tx_descriptor *txdesc; + struct dma_async_tx_descriptor *rxdesc; + + init_completion(&spi_imx->dma_rx_completion); + init_completion(&spi_imx->dma_tx_completion); + + /* + * Get the valid physical address for the tx buf, if the rx buf address + * is null or it cannot be mapped, we should allocate memory for it. + */ + if (virt_addr_valid(transfer->tx_buf)) { + transfer->tx_dma = dma_map_single(spi_imx->dev, + (void *)transfer->tx_buf, transfer->len, + DMA_TO_DEVICE); + if (dma_mapping_error(spi_imx->dev, transfer->tx_dma)) { + dev_err(spi_imx->dev, + "Memory dma map fail, line = %d\n", __LINE__); + ret = -EFAULT; + goto err_tx; + } + } else { + if (transfer->len > spi_imx->dma_tx_tmpbuf_size) { + if (!spi_imx->dma_tx_tmpbuf_size) { + kfree(spi_imx->dma_tx_tmpbuf); + spi_imx->dma_tx_tmpbuf_size = 0; + } + + spi_imx->dma_tx_tmpbuf = + kzalloc(transfer->len, GFP_KERNEL); + if (NULL == spi_imx->dma_tx_tmpbuf) { + dev_err(spi_imx->dev, "Alloc memory fail.\n"); + ret = -EFAULT; + goto err_tx; + } + spi_imx->dma_tx_tmpbuf_size = transfer->len; + } + + /* + * Move the transfered data to new buffer from the old one + * that cannot be mapped. + */ + if (transfer->tx_buf) + memcpy(spi_imx->dma_tx_tmpbuf, + transfer->tx_buf, + transfer->len); + + spi_imx->dma_tx_tmpbuf_phy_addr = dma_map_single(spi_imx->dev, + spi_imx->dma_tx_tmpbuf, transfer->len, + DMA_TO_DEVICE); + + if (dma_mapping_error(spi_imx->dev, + spi_imx->dma_tx_tmpbuf_phy_addr)) { + dev_err(spi_imx->dev, + "Memory dma map fail, line = %d\n", + __LINE__); + ret = -EFAULT; + goto err_tx; + } + + transfer->tx_dma = spi_imx->dma_tx_tmpbuf_phy_addr; + } + + /* Prepare sg for tx sdma. */ + sg_num = ((transfer->len - 1) / MAX_SDMA_BD_BYTES) + 1; + sg_tx = kzalloc(sg_num * sizeof(struct scatterlist), GFP_KERNEL); + if (NULL == sg_tx) { + dev_err(spi_imx->dev, + "Memory allocate fail, line = %d\n", + __LINE__); + goto err_tx_sg; + } + sg_init_table(sg_tx, sg_num); + for (loop = 0; loop < (sg_num - 1); loop++) { + sg_dma_address(&sg_tx[loop]) = + transfer->tx_dma + loop * MAX_SDMA_BD_BYTES; + sg_dma_len(&sg_tx[loop]) = MAX_SDMA_BD_BYTES; + } + + sg_dma_address(&sg_tx[loop]) = + transfer->tx_dma + loop * MAX_SDMA_BD_BYTES; + sg_dma_len(&sg_tx[loop]) = transfer->len - loop * MAX_SDMA_BD_BYTES; + + txdesc = dmaengine_prep_slave_sg(spi_imx->dma_chan_tx, + sg_tx, sg_num , DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT); + if (!txdesc) { + ret = -EFAULT; + goto err_rx; + } + + txdesc->callback = spi_imx_dma_tx_callback; + txdesc->callback_param = (void *)spi_imx; + + /* + * Get the valid physical address for the rx buf, if the rx buf address + * is null or it cannot be mapped, we should allocate memory for it. + */ + if (virt_addr_valid(transfer->rx_buf)) { + transfer->rx_dma = dma_map_single(spi_imx->dev, + transfer->rx_buf, transfer->len, + DMA_FROM_DEVICE); + + if (dma_mapping_error(spi_imx->dev, transfer->rx_dma)) { + dev_err(spi_imx->dev, + "Memory allocate fail, line = %d\n", + __LINE__); + ret = -EFAULT; + goto err_rx; + } + } else { + if (transfer->len > spi_imx->dma_rx_tmpbuf_size) { + if (!spi_imx->dma_rx_tmpbuf_size) { + kfree(spi_imx->dma_rx_tmpbuf); + spi_imx->dma_rx_tmpbuf_size = 0; + } + + spi_imx->dma_rx_tmpbuf = + kzalloc(transfer->len, GFP_KERNEL); + if (NULL == spi_imx->dma_rx_tmpbuf) { + dev_err(spi_imx->dev, "Alloc memory fail.\n"); + ret = -EFAULT; + goto err_rx; + } + spi_imx->dma_rx_tmpbuf_size = transfer->len; + } + + spi_imx->dma_rx_tmpbuf_phy_addr = dma_map_single(spi_imx->dev, + spi_imx->dma_rx_tmpbuf, transfer->len, + DMA_FROM_DEVICE); + + if (dma_mapping_error(spi_imx->dev, + spi_imx->dma_rx_tmpbuf_phy_addr)) { + dev_err(spi_imx->dev, + "Memory dma map fail, line = %d\n", + __LINE__); + ret = -EFAULT; + goto err_rx; + } + + transfer->rx_dma = spi_imx->dma_rx_tmpbuf_phy_addr; + } + + /* Prepare sg for rx sdma. */ + sg_num = ((transfer->len - 1) / MAX_SDMA_BD_BYTES) + 1; + sg_rx = kzalloc(sg_num * sizeof(struct scatterlist), GFP_KERNEL); + if (NULL == sg_rx) { + dev_err(spi_imx->dev, + "Memory dma map fail, line = %d\n", __LINE__); + goto err_rx_sg; + } + sg_init_table(sg_rx, sg_num); + for (loop = 0; loop < (sg_num - 1); loop++) { + sg_dma_address(&sg_rx[loop]) = + transfer->rx_dma + loop * MAX_SDMA_BD_BYTES; + sg_dma_len(&sg_rx[loop]) = MAX_SDMA_BD_BYTES; + } + + sg_dma_address(&sg_rx[loop]) = + transfer->rx_dma + loop * MAX_SDMA_BD_BYTES; + sg_dma_len(&sg_rx[loop]) = transfer->len - loop * MAX_SDMA_BD_BYTES; + rxdesc = dmaengine_prep_slave_sg(spi_imx->dma_chan_rx, + sg_rx, sg_num , DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT); + if (!rxdesc) { + ret = -EFAULT; + goto err_desc; + } + + rxdesc->callback = spi_imx_dma_rx_callback; + rxdesc->callback_param = (void *)spi_imx; + + /* Trigger the cspi module. */ + spi_imx->dma_finished = 0; + + spi_imx->devtype_data->trigger(spi_imx); + + dmaengine_submit(txdesc); + dmaengine_submit(rxdesc); + + dma_async_issue_pending(spi_imx->dma_chan_tx); + dma_async_issue_pending(spi_imx->dma_chan_rx); + + /* Wait SDMA to finish the data transfer.*/ + ret = wait_for_completion_timeout(&spi_imx->dma_tx_completion, + msecs_to_jiffies(3000)); + if (!ret) { + dev_err(spi_imx->dev, + "I/O Error in DMA TX, line = %d ####\n", __LINE__); + dmaengine_terminate_all(spi_imx->dma_chan_tx); + goto err_desc; + } else { + dma = readl(spi_imx->base + MX51_ECSPI_DMA); + dma = dma & (~MX51_ECSPI_DMA_RXT_WML_MASK); + /* Change RX_DMA_LENGTH trigger dma fetch tail data */ + left = transfer->len & (~spi_imx->rxt_wml); + if (left) + writel(dma | (left << MX51_ECSPI_DMA_RXT_WML_OFFSET), + spi_imx->base + MX51_ECSPI_DMA); + + ret = wait_for_completion_timeout(&spi_imx->dma_rx_completion, + msecs_to_jiffies(3000)); + + writel(dma | (spi_imx->rxt_wml << MX51_ECSPI_DMA_RXT_WML_OFFSET), + spi_imx->base + MX51_ECSPI_DMA); + if (!ret) { + dev_err(spi_imx->dev, + "I/O Error in DMA RX. len %d, line = %d\n", + transfer->len, + __LINE__); + spi_imx->devtype_data->reset(spi_imx); + dmaengine_terminate_all(spi_imx->dma_chan_rx); + } + } + + /* Move the transfered data to rx buf when it cannot be mapped.*/ + if (transfer->rx_buf && (!virt_addr_valid(transfer->rx_buf))) + memcpy(transfer->rx_buf, + spi_imx->dma_rx_tmpbuf, + transfer->len); + +err_desc: + kfree(sg_rx); +err_rx_sg: + dma_unmap_single(spi_imx->dev, transfer->rx_dma, + transfer->len, DMA_TO_DEVICE); +err_rx: + kfree(sg_tx); +err_tx_sg: + dma_unmap_single(spi_imx->dev, transfer->tx_dma, + transfer->len, DMA_FROM_DEVICE); +err_tx: + spi_imx->dma_finished = 1; + spi_imx->devtype_data->trigger(spi_imx); + if ((!ret) || (-EFAULT == ret)) + return -EIO; + else + return transfer->len; +} + +static int spi_imx_pio_transfer(struct spi_device *spi, struct spi_transfer *transfer) { struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); @@ -729,6 +1088,27 @@ static int spi_imx_transfer(struct spi_device *spi, return transfer->len; } +static int spi_imx_transfer(struct spi_device *spi, + struct spi_transfer *transfer) +{ + struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); + + if (spi_imx->dma_chan_tx && spi_imx->dma_chan_rx) { + /* + * Don't use sdma when the size of data to be transfered is + * lower then SDMA wartermark. + */ + if ((transfer->len >= spi_imx->rx_wml) && + (transfer->len > spi_imx->tx_wml)) { + spi_imx->usedma = 1; + return spi_imx_sdma_transfer(spi, transfer); + } + } + + spi_imx->usedma = 0; + return spi_imx_pio_transfer(spi, transfer); +} + static int spi_imx_setup(struct spi_device *spi) { struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); @@ -778,6 +1158,56 @@ spi_imx_unprepare_message(struct spi_master *master, struct spi_message *msg) return 0; } +static int spi_imx_sdma_init(struct spi_imx_data *spi_imx) +{ + struct dma_slave_config slave_config = {}; + struct device *dev = spi_imx->dev; + int ret; + + + /* Prepare for TX : */ + spi_imx->dma_chan_tx = dma_request_slave_channel(dev, "tx"); + if (!spi_imx->dma_chan_tx) { + dev_err(dev, "cannot get the TX DMA channel!\n"); + ret = -EINVAL; + goto err; + } + + slave_config.direction = DMA_MEM_TO_DEV; + slave_config.dst_addr = spi_imx->mapbase + MXC_CSPITXDATA; + slave_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; + slave_config.dst_maxburst = spi_imx_get_fifosize(spi_imx) / 2; + ret = dmaengine_slave_config(spi_imx->dma_chan_tx, &slave_config); + if (ret) { + dev_err(dev, "error in TX dma configuration."); + goto err; + } + + /* Prepare for RX : */ + spi_imx->dma_chan_rx = dma_request_slave_channel(dev, "rx"); + if (!spi_imx->dma_chan_rx) { + dev_dbg(dev, "cannot get the DMA channel.\n"); + ret = -EINVAL; + goto err; + } + + slave_config.direction = DMA_DEV_TO_MEM; + slave_config.src_addr = spi_imx->mapbase + MXC_CSPIRXDATA; + slave_config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; + slave_config.src_maxburst = spi_imx_get_fifosize(spi_imx) / 2; + ret = dmaengine_slave_config(spi_imx->dma_chan_rx, &slave_config); + if (ret) { + dev_err(dev, "error in RX dma configuration.\n"); + goto err; + } + spi_imx->dma_is_inited = 1; + + return 0; +err: + spi_imx_sdma_exit(spi_imx); + return ret; +} + static int spi_imx_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -849,6 +1279,8 @@ static int spi_imx_probe(struct platform_device *pdev) (struct spi_imx_devtype_data *) pdev->id_entry->driver_data; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res) + spi_imx->mapbase = res->start; spi_imx->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(spi_imx->base)) { ret = PTR_ERR(spi_imx->base); @@ -890,6 +1322,9 @@ static int spi_imx_probe(struct platform_device *pdev) spi_imx->spi_clk = clk_get_rate(spi_imx->clk_per); + spi_imx->dev = &pdev->dev; + spi_imx_sdma_init(spi_imx); + spi_imx->devtype_data->reset(spi_imx); spi_imx->devtype_data->intctrl(spi_imx, 0);
After enable DMA spi-nor read speed is dd if=/dev/mtd0 of=/dev/null bs=1M count=1 1+0 records in 1+0 records out 1048576 bytes (1.0 MB) copied, 0.720402 s, 1.5 MB/s spi-nor write speed is dd if=/dev/zero of=/dev/mtd0 bs=1M count=1 1+0 records in 1+0 records out 1048576 bytes (1.0 MB) copied, 3.56044 s, 295 kB/s Before enable DMA spi-nor read speed is dd if=/dev/mtd0 of=/dev/null bs=1M count=1 1+0 records in 1+0 records out 1048576 bytes (1.0 MB) copied, 2.37717 s, 441 kB/s spi-nor write speed is dd if=/dev/zero of=/dev/mtd0 bs=1M count=1 1+0 records in 1+0 records out 1048576 bytes (1.0 MB) copied, 4.83181 s, 217 kB/s Signed-off-by: Frank Li <Frank.Li@freescale.com> --- drivers/spi/spi-imx.c | 447 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 441 insertions(+), 6 deletions(-) Change from v1. 1. check if res is null at res = platform_get_resource(pdev, IORESOURCE_MEM, 0) 2. fix failure transfer when len is not multiple watermark