diff mbox

[PATCHv4,2/3] drivers: spi: Add qspi flash controller

Message ID 1374141687-10790-3-git-send-email-sourav.poddar@ti.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Poddar, Sourav July 18, 2013, 10:01 a.m. UTC
The patch add basic support for the quad spi controller.

QSPI is a kind of spi module that allows single,
dual and quad read access to external spi devices. The module
has a memory mapped interface which provide direct interface
for accessing data form external spi devices.

The patch will configure controller clocks, device control
register and for defining low level transfer apis which
will be used by the spi framework to transfer data to
the slave spi device(flash in this case).

Signed-off-by: Sourav Poddar <sourav.poddar@ti.com>
---
v3->v4
- Did miscellaneous cleanup
- Added power management support.
 Documentation/devicetree/bindings/spi/ti_qspi.txt |   22 +
 drivers/spi/Kconfig                               |    8 +
 drivers/spi/Makefile                              |    1 +
 drivers/spi/spi-ti-qspi.c                         |  537 +++++++++++++++++++++
 4 files changed, 568 insertions(+), 0 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/spi/ti_qspi.txt
 create mode 100644 drivers/spi/spi-ti-qspi.c

Comments

Poddar, Sourav July 18, 2013, 11:18 a.m. UTC | #1
Hi Felipe,
On Thursday 18 July 2013 03:54 PM, Felipe Balbi wrote:
> Hi,
>
> it might be just me, but ...
>
> On Thu, Jul 18, 2013 at 03:31:26PM +0530, Sourav Poddar wrote:
>> +static inline unsigned long ti_qspi_readl_data(struct ti_qspi *qspi,
>> +		unsigned long reg, int wlen)
>> +{
>> +	switch (wlen) {
>> +	case 8:
>> +		return readw(qspi->base + reg);
>> +		break;
>> +	case 16:
>> +		return readb(qspi->base + reg);
>> +		break;
>> +	case 32:
>> +		return readl(qspi->base + reg);
>> +		break;
>> +	default:
>> +		return -EINVAL;
>> +	}
>> +}
>> +
>> +static inline void ti_qspi_writel_data(struct ti_qspi *qspi,
>> +		unsigned long val, unsigned long reg, int wlen)
>> +{
>> +	switch (wlen) {
>> +	case 8:
>> +		writew(val, qspi->base + reg);
>> +		break;
>> +	case 16:
>> +		writeb(val, qspi->base + reg);
>> +		break;
>> +	case 32:
>> +		writeb(val, qspi->base + reg);
>> +		break;
>> +	default:
>> +		dev_dbg(qspi->dev, "word lenght out of range");
>> +		break;
>> +	}
>> +}
> because of these two functions you have the hability to read/write
> *more* than one byte, and yet ...
>
>> +static void qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t)
>> +{
>> +	const u8 *txbuf;
>> +	int wlen, count;
>> +
>> +	count = t->len;
>> +	txbuf = t->tx_buf;
>> +	wlen = t->bits_per_word;
>> +
>> +	while (count--) {
>> +		dev_dbg(qspi->dev, "tx cmd %08x dc %08x data %02x\n",
>> +			qspi->cmd | QSPI_WR_SNGL, qspi->dc, *txbuf);
>> +		ti_qspi_writel_data(qspi, *txbuf++, QSPI_SPI_DATA_REG, wlen);
> you always increment by each byte. Here, if you used writel(), you wrote
> 4 bytes and should increment txbuf by 4.

hmm..got this point. Yes, my mistake, here I agree if wlen is not 8 bits
txbuf++ is not valid.
>   Same goes for read_data(),
> below. Another thing. Even though your wlen might be 8 bits, if you
> write 4 bytes to write, you can save a few CPU cycles by using writel().
>
Do you mean 4 words of 8 bits?
> You only use writew() if you have exactly 2 bytes to write and writeb()
> if you have exactly 1 byte to write. 3 bytes we'll be left as an
> exercise.
hmm..yes.
>> +static int ti_qspi_start_transfer_one(struct spi_master *master,
>> +		struct spi_message *m)
>> +{
>> +	struct ti_qspi *qspi = spi_master_get_devdata(master);
>> +	struct spi_device *spi = m->spi;
>> +	struct spi_transfer *t;
>> +	int status = 0, ret;
>> +	int frame_length;
>> +
>> +	/* setup device control reg */
>> +	qspi->dc = 0;
>> +
>> +	if (spi->mode&  SPI_CPHA)
>> +		qspi->dc |= QSPI_CKPHA(spi->chip_select);
>> +	if (spi->mode&  SPI_CPOL)
>> +		qspi->dc |= QSPI_CKPOL(spi->chip_select);
>> +	if (spi->mode&  SPI_CS_HIGH)
>> +		qspi->dc |= QSPI_CSPOL(spi->chip_select);
>> +
>> +	frame_length = DIV_ROUND_UP(m->frame_length * spi->bits_per_word,
>> +				spi->bits_per_word);
> this calculation doesn't look correct.
>
> 	(m->frame_length * spi->bits_per_word) /
> 		spi->bits_per_word = m->frame_length
>
> What are you trying to achieve here ? frame_length should be counted in
> words right ? And we get that value in bytes. So what's the best
> calculation to convert bytes into words ? If you have 8 bits_per_word
> you don't need any calculation, but if you have 32 bits_per_word, you
> _do_ need something.
>
Yes, just derive this formulae with 8 bits per word in mind.
Will change.
It should be (m->frame_length * 8) / spi->bits_per_word
> How will you achieve the number you want ? (hint: 1 byte == 8 bits)
>
> And btw, all of these mistakes pretty much tell me that this driver
> hasn't been tested. How have you tested this driver ?
After bootup, I checked for deive detting enumerated as /proc/mtd.
After which I am using mtdutils(erase, dump and write utilied to
check for the communication with the flash device.)
> Is your spansion
> memory accessed with 8 bits_per_word only ?
Yes, most of the places is like that and data is sapmled in 8 bits.
For some opcodes, we need to send 3 bytes addresses after instruction
  to the flash chip.
>   Is there anyway to use
> 32 bits_per_word with that device ? That would uncover quite a few
> mistakes in $subject.
>


------------------------------------------------------------------------------
See everything from the browser to the database with AppDynamics
Get end-to-end visibility with application monitoring from AppDynamics
Isolate bottlenecks and diagnose root cause in seconds.
Start your free trial of AppDynamics Pro today!
http://pubads.g.doubleclick.net/gampad/clk?id=48808831&iu=/4140/ostg.clktrk
Poddar, Sourav July 18, 2013, 11:45 a.m. UTC | #2
On Thursday 18 July 2013 04:12 PM, Mark Brown wrote:
> On Thu, Jul 18, 2013 at 03:31:26PM +0530, Sourav Poddar wrote:
>
>> QSPI is a kind of spi module that allows single,
>> dual and quad read access to external spi devices. The module
>> has a memory mapped interface which provide direct interface
>> for accessing data form external spi devices.
> Have you seen the ongoing thread about SPI buses with extra data lines?
> How does this driver fit in with that?
>
I have tried using it in my patch3 of this series..
>> --- a/drivers/spi/Makefile
>> +++ b/drivers/spi/Makefile
>> @@ -46,6 +46,7 @@ obj-$(CONFIG_SPI_OCTEON)		+= spi-octeon.o
>>   obj-$(CONFIG_SPI_OMAP_UWIRE)		+= spi-omap-uwire.o
>>   obj-$(CONFIG_SPI_OMAP_100K)		+= spi-omap-100k.o
>>   obj-$(CONFIG_SPI_OMAP24XX)		+= spi-omap2-mcspi.o
>> +obj-$(CONFIG_QSPI_DRA7xxx)              += spi-ti-qspi.o
>>   obj-$(CONFIG_SPI_ORION)			+= spi-orion.o
>>   obj-$(CONFIG_SPI_PL022)			+= spi-pl022.o
>>   obj-$(CONFIG_SPI_PPC4xx)		+= spi-ppc4xx.o
> Please use SPI_ like the other drivers.
>
Ok.
>> +static int ti_qspi_prepare_xfer(struct spi_master *master)
>> +{
>> +	struct ti_qspi *qspi = spi_master_get_devdata(master);
>> +	int ret;
>> +
>> +	ret = pm_runtime_get_sync(qspi->dev);
>> +	if (ret<  0) {
>> +		dev_err(qspi->dev, "pm_runtime_get_sync() failed\n");
>> +		return ret;
>> +	}
>> +
>> +	return 0;
>> +}
> This is a very common pattern, it should probably be factored out into
> the core, probably not even as ops but rather as an actual feature.
>
May be yes.
>> +	list_for_each_entry(t,&m->transfers, transfer_list) {
>> +		qspi->cmd |= QSPI_WLEN(t->bits_per_word);
>> +		qspi->cmd |= QSPI_WC_CMD_INT_EN;
>> +
>> +		ret = qspi_transfer_msg(qspi, t);
>> +		if (ret) {
>> +			dev_dbg(qspi->dev, "transfer message failed\n");
>> +			return -EINVAL;
>> +		}
>> +
>> +		m->actual_length += t->len;
>> +
>> +		if (list_is_last(&t->transfer_list,&m->transfers))
>> +			goto out;
>> +	}
> The use of list_is_last() here is *realy* strange - what's going on
> there?
>
We are checking if there is any transfer left, if no we are signalling the
flash device about the end of transfer.
>> +static irqreturn_t ti_qspi_isr(int irq, void *dev_id)
>> +{
>> +	struct ti_qspi *qspi = dev_id;
>> +	u16 mask, stat;
>> +
>> +	irqreturn_t ret = IRQ_HANDLED;
>> +
>> +	spin_lock(&qspi->lock);
>> +
>> +	stat = ti_qspi_readl(qspi, QSPI_SPI_STATUS_REG);
>> +	mask = ti_qspi_readl(qspi, QSPI_INTR_ENABLE_SET_REG);
>> +
>> +	if (stat&&  mask)
>> +		ret = IRQ_WAKE_THREAD;
>> +
>> +	spin_unlock(&qspi->lock);
>> +
>> +	return ret;
> According to the above code we might interrupt for masked events...
> that's a bit worrying isn't it?
>
Yes, there is WC interrupt enable bit which enables the interrupt. This 
interrupt
gets disabled by writing to the CLEAR reg in the threaded irq.
>> +	ret = devm_request_threaded_irq(&pdev->dev, irq, ti_qspi_isr,
>> +			ti_qspi_threaded_isr, IRQF_NO_SUSPEND | IRQF_ONESHOT,
>> +			dev_name(&pdev->dev), qspi);
>> +	if (ret<  0) {
>> +		dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n",
>> +				irq);
>> +		goto free_master;
>> +	}
> Standard question about devm_request_threaded_irq() - how can we be
> certain it's safe to use during removal?
I am not sure about the exact flow. If we see the api description, it 
says about irq getting
freed automatically.
Practically, I will check that on removing the driver, cat 
/proc/interrupts  should not show
the required interrupt getting registered.
Though, I see an api also existing "devm_free_irq", which explicitly un 
allocate your irq requested
through devm_* variants.



------------------------------------------------------------------------------
See everything from the browser to the database with AppDynamics
Get end-to-end visibility with application monitoring from AppDynamics
Isolate bottlenecks and diagnose root cause in seconds.
Start your free trial of AppDynamics Pro today!
http://pubads.g.doubleclick.net/gampad/clk?id=48808831&iu=/4140/ostg.clktrk
Poddar, Sourav July 18, 2013, 12:24 p.m. UTC | #3
On Thursday 18 July 2013 04:54 PM, Felipe Balbi wrote:
> On Thu, Jul 18, 2013 at 04:48:41PM +0530, Sourav Poddar wrote:
>>>> +static void qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t)
>>>> +{
>>>> +	const u8 *txbuf;
>>>> +	int wlen, count;
>>>> +
>>>> +	count = t->len;
>>>> +	txbuf = t->tx_buf;
>>>> +	wlen = t->bits_per_word;
>>>> +
>>>> +	while (count--) {
>>>> +		dev_dbg(qspi->dev, "tx cmd %08x dc %08x data %02x\n",
>>>> +			qspi->cmd | QSPI_WR_SNGL, qspi->dc, *txbuf);
>>>> +		ti_qspi_writel_data(qspi, *txbuf++, QSPI_SPI_DATA_REG, wlen);
>>> you always increment by each byte. Here, if you used writel(), you wrote
>>> 4 bytes and should increment txbuf by 4.
>> hmm..got this point. Yes, my mistake, here I agree if wlen is not 8 bits
>> txbuf++ is not valid.
>>>   Same goes for read_data(),
>>> below. Another thing. Even though your wlen might be 8 bits, if you
>>> write 4 bytes to write, you can save a few CPU cycles by using writel().
>>>
>> Do you mean 4 words of 8 bits?
> yeah. Say you have wlen = 8 but the transfer length is 8 bytes (64
> bits). If you use writeb(), you will do 8 writes, if you use writel()
> you'll do 2 writes.
>
hmm.. I will check this out.
If our wlen is 8, after every 8 bits there will be
an interrupt. Will check that out, how that interrupt
should be tackled if we desired to read 4 bytes in a single writel/readl.
>>>> +static int ti_qspi_start_transfer_one(struct spi_master *master,
>>>> +		struct spi_message *m)
>>>> +{
>>>> +	struct ti_qspi *qspi = spi_master_get_devdata(master);
>>>> +	struct spi_device *spi = m->spi;
>>>> +	struct spi_transfer *t;
>>>> +	int status = 0, ret;
>>>> +	int frame_length;
>>>> +
>>>> +	/* setup device control reg */
>>>> +	qspi->dc = 0;
>>>> +
>>>> +	if (spi->mode&   SPI_CPHA)
>>>> +		qspi->dc |= QSPI_CKPHA(spi->chip_select);
>>>> +	if (spi->mode&   SPI_CPOL)
>>>> +		qspi->dc |= QSPI_CKPOL(spi->chip_select);
>>>> +	if (spi->mode&   SPI_CS_HIGH)
>>>> +		qspi->dc |= QSPI_CSPOL(spi->chip_select);
>>>> +
>>>> +	frame_length = DIV_ROUND_UP(m->frame_length * spi->bits_per_word,
>>>> +				spi->bits_per_word);
>>> this calculation doesn't look correct.
>>>
>>> 	(m->frame_length * spi->bits_per_word) /
>>> 		spi->bits_per_word = m->frame_length
>>>
>>> What are you trying to achieve here ? frame_length should be counted in
>>> words right ? And we get that value in bytes. So what's the best
>>> calculation to convert bytes into words ? If you have 8 bits_per_word
>>> you don't need any calculation, but if you have 32 bits_per_word, you
>>> _do_ need something.
>>>
>> Yes, just derive this formulae with 8 bits per word in mind.
>> Will change.
>> It should be (m->frame_length * 8) / spi->bits_per_word
> right on. To make sure this will execute a little faster (you never know
> what several different versions of GCC will do), instead of multiplying
> by 8, left shift by 3.
>
Ok. Will do.
>>> How will you achieve the number you want ? (hint: 1 byte == 8 bits)
>>>
>>> And btw, all of these mistakes pretty much tell me that this driver
>>> hasn't been tested. How have you tested this driver ?
>> After bootup, I checked for deive detting enumerated as /proc/mtd.
>> After which I am using mtdutils(erase, dump and write utilied to
>> check for the communication with the flash device.)
> alright, make that clear in your commit log.
>
Ok.

------------------------------------------------------------------------------
See everything from the browser to the database with AppDynamics
Get end-to-end visibility with application monitoring from AppDynamics
Isolate bottlenecks and diagnose root cause in seconds.
Start your free trial of AppDynamics Pro today!
http://pubads.g.doubleclick.net/gampad/clk?id=48808831&iu=/4140/ostg.clktrk
Poddar, Sourav July 18, 2013, 2:55 p.m. UTC | #4
Hi Mark,
On Thursday 18 July 2013 08:12 PM, Mark Brown wrote:
> On Thu, Jul 18, 2013 at 04:31:58PM +0300, Felipe Balbi wrote:
>> On Thu, Jul 18, 2013 at 02:18:22PM +0100, Mark Brown wrote:
>>> So why do we report that we handled the interrupt then?  Shouldn't we at
>>> least warn if we're getting spurious IRQs?
>> not spurious. OMAP has two sets of IRQ status registers. One is call
>> IRQSTATUS$n (n = 0, 1, ...) and IRQSTATUS_RAW$n.
>> IRQSTATUS$n will only enable the bits which fired IRQs and aren't
>> masked while IRQSTATUS_RAW$n will also enable the bits which are masked.
>> I could never come up with a use case where we would need to handle IRQs
>> which we decided to mask, but perhaps there might be some cases, I don't
>> know.
>> Based on that, I believe Sourav is reading IRQSTATUS_RAW$n, then he need
>> to clear the masked bits.
> That's not the issue - the issue is that if none of the unmasked
> interrupts are being asserted we shouldn't be in the interrupt handler
> in the first place but the driver silently accepts that and reports that
> it handled the interrupt.
I believe this is what you hinted at doing..

there is a QSPI_INTR_STATUS_ENABLED_CLEAR register, which indicated the 
interrupt
status.
if nothing is set in the above register, I should return IRQ_NONE.

------------------------------------------------------------------------------
See everything from the browser to the database with AppDynamics
Get end-to-end visibility with application monitoring from AppDynamics
Isolate bottlenecks and diagnose root cause in seconds.
Start your free trial of AppDynamics Pro today!
http://pubads.g.doubleclick.net/gampad/clk?id=48808831&iu=/4140/ostg.clktrk
Trent Piepho July 18, 2013, 7:08 p.m. UTC | #5
On Thu, Jul 18, 2013 at 3:01 AM, Sourav Poddar <sourav.poddar@ti.com> wrote:
> +Required properties:
> +- compatible : should be "ti,dra7xxx-qspi".
> +- reg: Should contain QSPI registers location and length.
> +- #address-cells, #size-cells : Must be present if the device has sub-nodes
> +- ti,hwmods: Name of the hwmod associated to the QSPI

What is ti,hwmods?  It's not clear from the description.  It also
doesn't appear to be used in the driver.  At least, I did not find any
occurrence of "hwmods" in the driver code.

> +static inline unsigned long ti_qspi_readl_data(struct ti_qspi *qspi,
> +               unsigned long reg, int wlen)

"readl" means read LONG.  That's what the L is for.  But this does
different widths.

> +{
> +       switch (wlen) {
> +       case 8:
> +               return readw(qspi->base + reg);
> +               break;
wlen == 8, but readw == 16 bit read?

The break after the return isn't necessary.

> +       case 16:
> +               return readb(qspi->base + reg);
> +               break;
wlen == 16, but readb == 8 bit read?

> +       case 32:
> +               return readl(qspi->base + reg);

wlen == 32, readl == 32, this one makes sense, but....

> +static inline void ti_qspi_writel_data(struct ti_qspi *qspi,
> +               unsigned long val, unsigned long reg, int wlen)

> +       case 32:
> +               writeb(val, qspi->base + reg);
> +               break;

A 32 bit write uses an 8 bit write command, while read is 32 bits??

This doesn't make a lot of sense.  If it's actually correct, there
should be come kind of comment about it.

> +
> +static int ti_qspi_setup(struct spi_device *spi)
> +{

> +
> +       clk_ctrl_reg = ti_qspi_readl(qspi, QSPI_SPI_CLOCK_CNTRL_REG);
> +
> +       clk_ctrl_reg &= ~QSPI_CLK_EN;
> +
> +       /* disable SCLK */
> +       ti_qspi_writel(qspi, clk_ctrl_reg, QSPI_SPI_CLOCK_CNTRL_REG);

Did you read this from Documentation/spi/spi-summary?
                ** BUG ALERT:  for some reason the first version of
                ** many spi_master drivers seems to get this wrong.
                ** When you code setup(), ASSUME that the controller
                ** is actively processing transfers for another device.

> +static int ti_qspi_probe(struct platform_device *pdev)
> +{
> +
> +       master->mode_bits = SPI_CPOL | SPI_CPHA;

Does your device support full duplex?  It doesn't look like it does.
You should set the SPI_MASER_HALF_DUPLEX flag in master->flags.

> +
> +       if (!of_property_read_u32(np, "ti,spi-num-cs", &num_cs))
> +               master->num_chipselect = num_cs;

You didn't document this property.  How is this different than the
"num-cs" property already documented in spi-bus bindings?

> +       qspi->base = devm_ioremap_resource(&pdev->dev, r);
> +       if (IS_ERR(qspi->base)) {
> +               ret = -ENOMEM;

Shouldn't that be ret = PTR_ERR(qspi->base)

------------------------------------------------------------------------------
See everything from the browser to the database with AppDynamics
Get end-to-end visibility with application monitoring from AppDynamics
Isolate bottlenecks and diagnose root cause in seconds.
Start your free trial of AppDynamics Pro today!
http://pubads.g.doubleclick.net/gampad/clk?id=48808831&iu=/4140/ostg.clktrk
Nishanth Menon July 18, 2013, 8:59 p.m. UTC | #6
On 07/18/2013 03:39 PM, Mark Brown wrote:
> On Thu, Jul 18, 2013 at 12:08:30PM -0700, Trent Piepho wrote:
>> On Thu, Jul 18, 2013 at 3:01 AM, Sourav Poddar <sourav.poddar@ti.com> wrote:
>
>>> +- ti,hwmods: Name of the hwmod associated to the QSPI
>
>> What is ti,hwmods?  It's not clear from the description.  It also
>> doesn't appear to be used in the driver.  At least, I did not find any
>> occurrence of "hwmods" in the driver code.
>
> It's a generic thing for all OMAPs which is handled by the core SoC
> support transparently on TI chips.
>
Here are some ancient documentation for the same (pre-device tree) world[1]
bindings documentation is available: 
Documentation/devicetree/bindings/arm/omap/omap.txt


[1] http://omapedia.org/wiki/HWMOD
Poddar, Sourav July 19, 2013, 5:02 a.m. UTC | #7
On Friday 19 July 2013 12:38 AM, Trent Piepho wrote:
> On Thu, Jul 18, 2013 at 3:01 AM, Sourav Poddar<sourav.poddar@ti.com>  wrote:
>> +Required properties:
>> +- compatible : should be "ti,dra7xxx-qspi".
>> +- reg: Should contain QSPI registers location and length.
>> +- #address-cells, #size-cells : Must be present if the device has sub-nodes
>> +- ti,hwmods: Name of the hwmod associated to the QSPI
> What is ti,hwmods?  It's not clear from the description.  It also
> doesn't appear to be used in the driver.  At least, I did not find any
> occurrence of "hwmods" in the driver code.
>
>> +static inline unsigned long ti_qspi_readl_data(struct ti_qspi *qspi,
>> +               unsigned long reg, int wlen)
> "readl" means read LONG.  That's what the L is for.  But this does
> different widths.
>
>> +{
>> +       switch (wlen) {
>> +       case 8:
>> +               return readw(qspi->base + reg);
>> +               break;
> wlen == 8, but readw == 16 bit read?
Yes, I need to change this. should be readb.
> The break after the return isn't necessary.
>
>> +       case 16:
>> +               return readb(qspi->base + reg);
>> +               break;
> wlen == 16, but readb == 8 bit read?
>
same here.
>> +       case 32:
>> +               return readl(qspi->base + reg);
> wlen == 32, readl == 32, this one makes sense, but....
>
>> +static inline void ti_qspi_writel_data(struct ti_qspi *qspi,
>> +               unsigned long val, unsigned long reg, int wlen)
>> +       case 32:
>> +               writeb(val, qspi->base + reg);
>> +               break;
> A 32 bit write uses an 8 bit write command, while read is 32 bits??
>
> This doesn't make a lot of sense.  If it's actually correct, there
> should be come kind of comment about it.
>
Yes, I will change this in the next version.
>> +
>> +static int ti_qspi_setup(struct spi_device *spi)
>> +{
>> +
>> +       clk_ctrl_reg = ti_qspi_readl(qspi, QSPI_SPI_CLOCK_CNTRL_REG);
>> +
>> +       clk_ctrl_reg&= ~QSPI_CLK_EN;
>> +
>> +       /* disable SCLK */
>> +       ti_qspi_writel(qspi, clk_ctrl_reg, QSPI_SPI_CLOCK_CNTRL_REG);
> Did you read this from Documentation/spi/spi-summary?
>                  ** BUG ALERT:  for some reason the first version of
>                  ** many spi_master drivers seems to get this wrong.
>                  ** When you code setup(), ASSUME that the controller
>                  ** is actively processing transfers for another device.
>
But I see in this documentation, that setup is usually used for setting up
device clock rate, modes etc.
So, what do you recommend here, should we move clk settings to prepare
hardware ?
>> +static int ti_qspi_probe(struct platform_device *pdev)
>> +{
>> +
>> +       master->mode_bits = SPI_CPOL | SPI_CPHA;
> Does your device support full duplex?  It doesn't look like it does.
> You should set the SPI_MASER_HALF_DUPLEX flag in master->flags.
>
hmm. Ok, will add.
>> +
>> +       if (!of_property_read_u32(np, "ti,spi-num-cs",&num_cs))
>> +               master->num_chipselect = num_cs;
> You didn't document this property.  How is this different than the
> "num-cs" property already documented in spi-bus bindings?
>
Actually, it is no different. This also means the total number of 
chipselects.
I used it from omap mcspi. I will make this property in accordance with the
generic binding.
Will also send a seperate patch for omap mcspi.
>> +       qspi->base = devm_ioremap_resource(&pdev->dev, r);
>> +       if (IS_ERR(qspi->base)) {
>> +               ret = -ENOMEM;
> Shouldn't that be ret = PTR_ERR(qspi->base)
hmm.will change.


------------------------------------------------------------------------------
See everything from the browser to the database with AppDynamics
Get end-to-end visibility with application monitoring from AppDynamics
Isolate bottlenecks and diagnose root cause in seconds.
Start your free trial of AppDynamics Pro today!
http://pubads.g.doubleclick.net/gampad/clk?id=48808831&iu=/4140/ostg.clktrk
Poddar, Sourav July 19, 2013, 11:48 a.m. UTC | #8
Hi Felipe,
On Thursday 18 July 2013 04:54 PM, Felipe Balbi wrote:
> On Thu, Jul 18, 2013 at 04:48:41PM +0530, Sourav Poddar wrote:
>>>> +static void qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t)
>>>> +{
>>>> +	const u8 *txbuf;
>>>> +	int wlen, count;
>>>> +
>>>> +	count = t->len;
>>>> +	txbuf = t->tx_buf;
>>>> +	wlen = t->bits_per_word;
>>>> +
>>>> +	while (count--) {
>>>> +		dev_dbg(qspi->dev, "tx cmd %08x dc %08x data %02x\n",
>>>> +			qspi->cmd | QSPI_WR_SNGL, qspi->dc, *txbuf);
>>>> +		ti_qspi_writel_data(qspi, *txbuf++, QSPI_SPI_DATA_REG, wlen);
>>> you always increment by each byte. Here, if you used writel(), you wrote
>>> 4 bytes and should increment txbuf by 4.
>> hmm..got this point. Yes, my mistake, here I agree if wlen is not 8 bits
>> txbuf++ is not valid.
>>>   Same goes for read_data(),
>>> below. Another thing. Even though your wlen might be 8 bits, if you
>>> write 4 bytes to write, you can save a few CPU cycles by using writel().
>>>
>> Do you mean 4 words of 8 bits?
> yeah. Say you have wlen = 8 but the transfer length is 8 bytes (64
> bits). If you use writeb(), you will do 8 writes, if you use writel()
> you'll do 2 writes.
>
Just some more findings on this, after wlen bits are transferred we need 
an WC interrupt.
So, if I try to pack 4 words of 8bits and use readl/writel, there will 
be an interrupt after every
wlen bits transferred and things will get screwd up.

So, for 8 bits word we need to use readb, for 16 bits word readw.
>>>> +static int ti_qspi_start_transfer_one(struct spi_master *master,
>>>> +		struct spi_message *m)
>>>> +{
>>>> +	struct ti_qspi *qspi = spi_master_get_devdata(master);
>>>> +	struct spi_device *spi = m->spi;
>>>> +	struct spi_transfer *t;
>>>> +	int status = 0, ret;
>>>> +	int frame_length;
>>>> +
>>>> +	/* setup device control reg */
>>>> +	qspi->dc = 0;
>>>> +
>>>> +	if (spi->mode&   SPI_CPHA)
>>>> +		qspi->dc |= QSPI_CKPHA(spi->chip_select);
>>>> +	if (spi->mode&   SPI_CPOL)
>>>> +		qspi->dc |= QSPI_CKPOL(spi->chip_select);
>>>> +	if (spi->mode&   SPI_CS_HIGH)
>>>> +		qspi->dc |= QSPI_CSPOL(spi->chip_select);
>>>> +
>>>> +	frame_length = DIV_ROUND_UP(m->frame_length * spi->bits_per_word,
>>>> +				spi->bits_per_word);
>>> this calculation doesn't look correct.
>>>
>>> 	(m->frame_length * spi->bits_per_word) /
>>> 		spi->bits_per_word = m->frame_length
>>>
>>> What are you trying to achieve here ? frame_length should be counted in
>>> words right ? And we get that value in bytes. So what's the best
>>> calculation to convert bytes into words ? If you have 8 bits_per_word
>>> you don't need any calculation, but if you have 32 bits_per_word, you
>>> _do_ need something.
>>>
>> Yes, just derive this formulae with 8 bits per word in mind.
>> Will change.
>> It should be (m->frame_length * 8) / spi->bits_per_word
> right on. To make sure this will execute a little faster (you never know
> what several different versions of GCC will do), instead of multiplying
> by 8, left shift by 3.
>
>>> How will you achieve the number you want ? (hint: 1 byte == 8 bits)
>>>
>>> And btw, all of these mistakes pretty much tell me that this driver
>>> hasn't been tested. How have you tested this driver ?
>> After bootup, I checked for deive detting enumerated as /proc/mtd.
>> After which I am using mtdutils(erase, dump and write utilied to
>> check for the communication with the flash device.)
> alright, make that clear in your commit log.
>


------------------------------------------------------------------------------
See everything from the browser to the database with AppDynamics
Get end-to-end visibility with application monitoring from AppDynamics
Isolate bottlenecks and diagnose root cause in seconds.
Start your free trial of AppDynamics Pro today!
http://pubads.g.doubleclick.net/gampad/clk?id=48808831&iu=/4140/ostg.clktrk
Poddar, Sourav July 19, 2013, 11:55 a.m. UTC | #9
Hi Mark,
On Thursday 18 July 2013 04:12 PM, Mark Brown wrote:
> On Thu, Jul 18, 2013 at 03:31:26PM +0530, Sourav Poddar wrote:
>
>> QSPI is a kind of spi module that allows single,
>> dual and quad read access to external spi devices. The module
>> has a memory mapped interface which provide direct interface
>> for accessing data form external spi devices.
> Have you seen the ongoing thread about SPI buses with extra data lines?
> How does this driver fit in with that?
>
>> --- a/drivers/spi/Makefile
>> +++ b/drivers/spi/Makefile
>> @@ -46,6 +46,7 @@ obj-$(CONFIG_SPI_OCTEON)		+= spi-octeon.o
>>   obj-$(CONFIG_SPI_OMAP_UWIRE)		+= spi-omap-uwire.o
>>   obj-$(CONFIG_SPI_OMAP_100K)		+= spi-omap-100k.o
>>   obj-$(CONFIG_SPI_OMAP24XX)		+= spi-omap2-mcspi.o
>> +obj-$(CONFIG_QSPI_DRA7xxx)              += spi-ti-qspi.o
>>   obj-$(CONFIG_SPI_ORION)			+= spi-orion.o
>>   obj-$(CONFIG_SPI_PL022)			+= spi-pl022.o
>>   obj-$(CONFIG_SPI_PPC4xx)		+= spi-ppc4xx.o
> Please use SPI_ like the other drivers.
>
>> +static int ti_qspi_prepare_xfer(struct spi_master *master)
>> +{
>> +	struct ti_qspi *qspi = spi_master_get_devdata(master);
>> +	int ret;
>> +
>> +	ret = pm_runtime_get_sync(qspi->dev);
>> +	if (ret<  0) {
>> +		dev_err(qspi->dev, "pm_runtime_get_sync() failed\n");
>> +		return ret;
>> +	}
>> +
>> +	return 0;
>> +}
> This is a very common pattern, it should probably be factored out into
> the core, probably not even as ops but rather as an actual feature.
>

I see, every other driver doing it this way right now.
Is it ok, if I take your above idea as an seperate excercise after this 
patch.?
>> +	list_for_each_entry(t,&m->transfers, transfer_list) {
>> +		qspi->cmd |= QSPI_WLEN(t->bits_per_word);
>> +		qspi->cmd |= QSPI_WC_CMD_INT_EN;
>> +
>> +		ret = qspi_transfer_msg(qspi, t);
>> +		if (ret) {
>> +			dev_dbg(qspi->dev, "transfer message failed\n");
>> +			return -EINVAL;
>> +		}
>> +
>> +		m->actual_length += t->len;
>> +
>> +		if (list_is_last(&t->transfer_list,&m->transfers))
>> +			goto out;
>> +	}
> The use of list_is_last() here is *realy* strange - what's going on
> there?
>
>> +static irqreturn_t ti_qspi_isr(int irq, void *dev_id)
>> +{
>> +	struct ti_qspi *qspi = dev_id;
>> +	u16 mask, stat;
>> +
>> +	irqreturn_t ret = IRQ_HANDLED;
>> +
>> +	spin_lock(&qspi->lock);
>> +
>> +	stat = ti_qspi_readl(qspi, QSPI_SPI_STATUS_REG);
>> +	mask = ti_qspi_readl(qspi, QSPI_INTR_ENABLE_SET_REG);
>> +
>> +	if (stat&&  mask)
>> +		ret = IRQ_WAKE_THREAD;
>> +
>> +	spin_unlock(&qspi->lock);
>> +
>> +	return ret;
> According to the above code we might interrupt for masked events...
> that's a bit worrying isn't it?
>
>> +	ret = devm_request_threaded_irq(&pdev->dev, irq, ti_qspi_isr,
>> +			ti_qspi_threaded_isr, IRQF_NO_SUSPEND | IRQF_ONESHOT,
>> +			dev_name(&pdev->dev), qspi);
>> +	if (ret<  0) {
>> +		dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n",
>> +				irq);
>> +		goto free_master;
>> +	}
> Standard question about devm_request_threaded_irq() - how can we be
> certain it's safe to use during removal?


------------------------------------------------------------------------------
See everything from the browser to the database with AppDynamics
Get end-to-end visibility with application monitoring from AppDynamics
Isolate bottlenecks and diagnose root cause in seconds.
Start your free trial of AppDynamics Pro today!
http://pubads.g.doubleclick.net/gampad/clk?id=48808831&iu=/4140/ostg.clktrk
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/spi/ti_qspi.txt b/Documentation/devicetree/bindings/spi/ti_qspi.txt
new file mode 100644
index 0000000..398ef59
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/ti_qspi.txt
@@ -0,0 +1,22 @@ 
+TI QSPI controller.
+
+Required properties:
+- compatible : should be "ti,dra7xxx-qspi".
+- reg: Should contain QSPI registers location and length.
+- #address-cells, #size-cells : Must be present if the device has sub-nodes
+- ti,hwmods: Name of the hwmod associated to the QSPI
+
+Recommended properties:
+- spi-max-frequency: Definition as per
+                     Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Example:
+
+qspi: qspi@4b300000 {
+	compatible = "ti,dra7xxx-qspi";
+	reg = <0x4b300000 0x100>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+	spi-max-frequency = <25000000>;
+	ti,hwmods = "qspi";
+};
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 92a9345..e594fdb 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -285,6 +285,14 @@  config SPI_OMAP24XX
 	  SPI master controller for OMAP24XX and later Multichannel SPI
 	  (McSPI) modules.
 
+config QSPI_DRA7xxx
+	tristate "DRA7xxx QSPI controller support"
+	depends on ARCH_OMAP2PLUS || COMPILE_TEST
+	help
+	  QSPI master controller for DRA7xxx used for flash devices.
+	  This device supports single, dual and quad read support, while
+	  it only supports single write mode.
+
 config SPI_OMAP_100K
 	tristate "OMAP SPI 100K"
 	depends on ARCH_OMAP850 || ARCH_OMAP730
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 33f9c09..b3b4857 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -46,6 +46,7 @@  obj-$(CONFIG_SPI_OCTEON)		+= spi-octeon.o
 obj-$(CONFIG_SPI_OMAP_UWIRE)		+= spi-omap-uwire.o
 obj-$(CONFIG_SPI_OMAP_100K)		+= spi-omap-100k.o
 obj-$(CONFIG_SPI_OMAP24XX)		+= spi-omap2-mcspi.o
+obj-$(CONFIG_QSPI_DRA7xxx)              += spi-ti-qspi.o
 obj-$(CONFIG_SPI_ORION)			+= spi-orion.o
 obj-$(CONFIG_SPI_PL022)			+= spi-pl022.o
 obj-$(CONFIG_SPI_PPC4xx)		+= spi-ppc4xx.o
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
new file mode 100644
index 0000000..3cae731
--- /dev/null
+++ b/drivers/spi/spi-ti-qspi.c
@@ -0,0 +1,537 @@ 
+/*
+ * TI QSPI driver
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ * Author: Sourav Poddar <sourav.poddar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GPLv2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR /PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/omap-dma.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/consumer.h>
+
+#include <linux/spi/spi.h>
+
+struct ti_qspi_regs {
+	u32 clkctrl;
+};
+
+struct ti_qspi {
+	spinlock_t              lock;           /* IRQ synchronization */
+	struct spi_master	*master;
+	void __iomem            *base;
+	struct device           *dev;
+	struct completion       transfer_complete;
+	struct clk *fclk;
+	struct ti_qspi_regs     ctx_reg;
+	int device_type;
+	u32 spi_max_frequency;
+	u32 cmd;
+	u32 dc;
+};
+
+#define QSPI_PID			(0x0)
+#define QSPI_SYSCONFIG			(0x10)
+#define QSPI_INTR_STATUS_RAW_SET	(0x20)
+#define QSPI_INTR_STATUS_ENABLED_CLEAR	(0x24)
+#define QSPI_INTR_ENABLE_SET_REG	(0x28)
+#define QSPI_INTR_ENABLE_CLEAR_REG	(0x2c)
+#define QSPI_SPI_CLOCK_CNTRL_REG	(0x40)
+#define QSPI_SPI_DC_REG			(0x44)
+#define QSPI_SPI_CMD_REG		(0x48)
+#define QSPI_SPI_STATUS_REG		(0x4c)
+#define QSPI_SPI_DATA_REG		(0x50)
+#define QSPI_SPI_SETUP0_REG		(0x54)
+#define QSPI_SPI_SWITCH_REG		(0x64)
+#define QSPI_SPI_SETUP1_REG		(0x58)
+#define QSPI_SPI_SETUP2_REG		(0x5c)
+#define QSPI_SPI_SETUP3_REG		(0x60)
+#define QSPI_SPI_DATA_REG_1		(0x68)
+#define QSPI_SPI_DATA_REG_2		(0x6c)
+#define QSPI_SPI_DATA_REG_3		(0x70)
+
+#define QSPI_TIMEOUT			2000000
+
+#define QSPI_FCLK			192000000
+
+/* Clock Control */
+#define QSPI_CLK_EN			(1 << 31)
+#define QSPI_CLK_DIV_MAX		0xffff
+
+/* Command */
+#define QSPI_EN_CS(n)			(n << 28)
+#define QSPI_WLEN(n)			((n-1) << 19)
+#define QSPI_3_PIN			(1 << 18)
+#define QSPI_RD_SNGL			(1 << 16)
+#define QSPI_WR_SNGL			(2 << 16)
+#define QSPI_RD_QUAD			(7 << 16)
+#define QSPI_INVAL			(4 << 16)
+#define QSPI_WC_CMD_INT_EN			(1 << 14)
+#define QSPI_FLEN(n)			((n-1) << 0)
+
+/* INTERRUPT REGISTER */
+#define QSPI_WC_INT_EN				(1 << 1)
+#define QSPI_WC_INT_DISABLE			(1 << 1)
+
+/* Device Control */
+#define QSPI_DD(m, n)			(m << (3 + n*8))
+#define QSPI_CKPHA(n)			(1 << (2 + n*8))
+#define QSPI_CSPOL(n)			(1 << (1 + n*8))
+#define QSPI_CKPOL(n)			(1 << (n*8))
+
+/* Status */
+#define QSPI_WC				(1 << 1)
+#define QSPI_BUSY			(1 << 0)
+#define QSPI_WC_BUSY			(QSPI_WC | QSPI_BUSY)
+#define QSPI_XFER_DONE			QSPI_WC
+
+#define	QSPI_FRAME_MAX			0xfff
+
+#define SPI_AUTOSUSPEND_TIMEOUT         2000
+
+static inline unsigned long ti_qspi_readl(struct ti_qspi *qspi,
+		unsigned long reg)
+{
+	return readl(qspi->base + reg);
+}
+
+static inline void ti_qspi_writel(struct ti_qspi *qspi,
+		unsigned long val, unsigned long reg)
+{
+	writel(val, qspi->base + reg);
+}
+
+static inline unsigned long ti_qspi_readl_data(struct ti_qspi *qspi,
+		unsigned long reg, int wlen)
+{
+	switch (wlen) {
+	case 8:
+		return readw(qspi->base + reg);
+		break;
+	case 16:
+		return readb(qspi->base + reg);
+		break;
+	case 32:
+		return readl(qspi->base + reg);
+		break;
+	default:
+		return -EINVAL;
+	}
+}
+
+static inline void ti_qspi_writel_data(struct ti_qspi *qspi,
+		unsigned long val, unsigned long reg, int wlen)
+{
+	switch (wlen) {
+	case 8:
+		writew(val, qspi->base + reg);
+		break;
+	case 16:
+		writeb(val, qspi->base + reg);
+		break;
+	case 32:
+		writeb(val, qspi->base + reg);
+		break;
+	default:
+		dev_dbg(qspi->dev, "word lenght out of range");
+		break;
+	}
+}
+
+static int ti_qspi_setup(struct spi_device *spi)
+{
+	struct ti_qspi	*qspi = spi_master_get_devdata(spi->master);
+	struct ti_qspi_regs *ctx_reg = &qspi->ctx_reg;
+	int clk_div = 0, ret;
+	u32 clk_ctrl_reg, clk_rate, clk_mask;
+
+	clk_rate = clk_get_rate(qspi->fclk);
+
+	if (!qspi->spi_max_frequency) {
+		dev_err(qspi->dev, "spi max frequency not defined\n");
+		return -EINVAL;
+	} else {
+		clk_div = DIV_ROUND_UP(clk_rate, qspi->spi_max_frequency) - 1;
+	}
+
+	dev_dbg(qspi->dev, "%s: hz: %d, clock divider %d\n", __func__,
+			qspi->spi_max_frequency, clk_div);
+
+	ret = pm_runtime_get_sync(qspi->dev);
+	if (ret) {
+		dev_err(qspi->dev, "pm_runtime_get_sync() failed\n");
+		return ret;
+	}
+
+	clk_ctrl_reg = ti_qspi_readl(qspi, QSPI_SPI_CLOCK_CNTRL_REG);
+
+	clk_ctrl_reg &= ~QSPI_CLK_EN;
+
+	/* disable SCLK */
+	ti_qspi_writel(qspi, clk_ctrl_reg, QSPI_SPI_CLOCK_CNTRL_REG);
+
+	if (clk_div < 0) {
+		dev_dbg(qspi->dev, "%s: clock divider < 0, using /1 divider\n",
+				__func__);
+		return -EINVAL;
+	}
+
+	if (clk_div > QSPI_CLK_DIV_MAX) {
+		dev_dbg(qspi->dev, "%s: clock divider >%d , using /%d divider\n",
+			__func__, QSPI_CLK_DIV_MAX, QSPI_CLK_DIV_MAX + 1);
+		return -EINVAL;
+	}
+
+	/* enable SCLK */
+	clk_mask = QSPI_CLK_EN | clk_div;
+	ti_qspi_writel(qspi, clk_mask, QSPI_SPI_CLOCK_CNTRL_REG);
+	ctx_reg->clkctrl = clk_mask;
+
+	pm_runtime_mark_last_busy(qspi->dev);
+	ret = pm_runtime_put_autosuspend(qspi->dev);
+	if (ret < 0) {
+		dev_err(qspi->dev, "pm_runtime_get_sync() failed\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void ti_qspi_restore_ctx(struct ti_qspi *qspi)
+{
+	struct ti_qspi_regs *ctx_reg = &qspi->ctx_reg;
+
+	ti_qspi_writel(qspi, ctx_reg->clkctrl, QSPI_SPI_CLOCK_CNTRL_REG);
+}
+
+static int ti_qspi_prepare_xfer(struct spi_master *master)
+{
+	struct ti_qspi *qspi = spi_master_get_devdata(master);
+	int ret;
+
+	ret = pm_runtime_get_sync(qspi->dev);
+	if (ret < 0) {
+		dev_err(qspi->dev, "pm_runtime_get_sync() failed\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ti_qspi_unprepare_xfer(struct spi_master *master)
+{
+	struct ti_qspi *qspi = spi_master_get_devdata(master);
+	int ret;
+
+	pm_runtime_mark_last_busy(qspi->dev);
+	ret = pm_runtime_put_autosuspend(qspi->dev);
+	if (ret < 0) {
+		dev_err(qspi->dev, "pm_runtime_get_sync() failed\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t)
+{
+	const u8 *txbuf;
+	int wlen, count;
+
+	count = t->len;
+	txbuf = t->tx_buf;
+	wlen = t->bits_per_word;
+
+	while (count--) {
+		dev_dbg(qspi->dev, "tx cmd %08x dc %08x data %02x\n",
+			qspi->cmd | QSPI_WR_SNGL, qspi->dc, *txbuf);
+		ti_qspi_writel_data(qspi, *txbuf++, QSPI_SPI_DATA_REG, wlen);
+		ti_qspi_writel(qspi, qspi->dc, QSPI_SPI_DC_REG);
+		ti_qspi_writel(qspi, qspi->cmd | QSPI_WR_SNGL,
+			QSPI_SPI_CMD_REG);
+		ti_qspi_writel(qspi, QSPI_WC_INT_EN, QSPI_INTR_ENABLE_SET_REG);
+		wait_for_completion(&qspi->transfer_complete);
+	}
+}
+
+static void qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t)
+{
+	u8 *rxbuf;
+	int wlen, count;
+
+	count = t->len;
+	rxbuf = t->rx_buf;
+	wlen = t->bits_per_word;
+
+	while (count--) {
+		dev_dbg(qspi->dev, "rx cmd %08x dc %08x\n",
+			qspi->cmd | QSPI_RD_SNGL, qspi->dc);
+		ti_qspi_writel(qspi, qspi->dc, QSPI_SPI_DC_REG);
+		ti_qspi_writel(qspi, qspi->cmd | QSPI_RD_SNGL,
+			QSPI_SPI_CMD_REG);
+		ti_qspi_writel(qspi, QSPI_WC_INT_EN, QSPI_INTR_ENABLE_SET_REG);
+		wait_for_completion(&qspi->transfer_complete);
+		*rxbuf++ = ti_qspi_readl_data(qspi, QSPI_SPI_DATA_REG, wlen);
+		dev_dbg(qspi->dev, "rx done, read %02x\n", *(rxbuf-1));
+	}
+}
+
+static int qspi_transfer_msg(struct ti_qspi *qspi, struct spi_transfer *t)
+{
+	if (t->tx_buf)
+		qspi_write_msg(qspi, t);
+	if (t->rx_buf)
+		qspi_read_msg(qspi, t);
+
+	return 0;
+}
+
+static int ti_qspi_start_transfer_one(struct spi_master *master,
+		struct spi_message *m)
+{
+	struct ti_qspi *qspi = spi_master_get_devdata(master);
+	struct spi_device *spi = m->spi;
+	struct spi_transfer *t;
+	int status = 0, ret;
+	int frame_length;
+
+	/* setup device control reg */
+	qspi->dc = 0;
+
+	if (spi->mode & SPI_CPHA)
+		qspi->dc |= QSPI_CKPHA(spi->chip_select);
+	if (spi->mode & SPI_CPOL)
+		qspi->dc |= QSPI_CKPOL(spi->chip_select);
+	if (spi->mode & SPI_CS_HIGH)
+		qspi->dc |= QSPI_CSPOL(spi->chip_select);
+
+	frame_length = DIV_ROUND_UP(m->frame_length * spi->bits_per_word,
+				spi->bits_per_word);
+
+	frame_length = clamp(frame_length, 0, QSPI_FRAME_MAX);
+
+	/* setup command reg */
+	qspi->cmd = 0;
+	qspi->cmd |= QSPI_EN_CS(spi->chip_select);
+	qspi->cmd |= QSPI_FLEN(frame_length);
+
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		qspi->cmd |= QSPI_WLEN(t->bits_per_word);
+		qspi->cmd |= QSPI_WC_CMD_INT_EN;
+
+		ret = qspi_transfer_msg(qspi, t);
+		if (ret) {
+			dev_dbg(qspi->dev, "transfer message failed\n");
+			return -EINVAL;
+		}
+
+		m->actual_length += t->len;
+
+		if (list_is_last(&t->transfer_list, &m->transfers))
+			goto out;
+	}
+
+out:
+	m->status = status;
+	spi_finalize_current_message(master);
+
+	ti_qspi_writel(qspi, qspi->cmd | QSPI_INVAL, QSPI_SPI_CMD_REG);
+
+	return status;
+}
+
+static irqreturn_t ti_qspi_isr(int irq, void *dev_id)
+{
+	struct ti_qspi *qspi = dev_id;
+	u16 mask, stat;
+
+	irqreturn_t ret = IRQ_HANDLED;
+
+	spin_lock(&qspi->lock);
+
+	stat = ti_qspi_readl(qspi, QSPI_SPI_STATUS_REG);
+	mask = ti_qspi_readl(qspi, QSPI_INTR_ENABLE_SET_REG);
+
+	if (stat && mask)
+		ret = IRQ_WAKE_THREAD;
+
+	spin_unlock(&qspi->lock);
+
+	return ret;
+}
+
+static irqreturn_t ti_qspi_threaded_isr(int this_irq, void *dev_id)
+{
+	struct ti_qspi *qspi = dev_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&qspi->lock, flags);
+
+	ti_qspi_writel(qspi, QSPI_WC_INT_DISABLE, QSPI_INTR_ENABLE_CLEAR_REG);
+	ti_qspi_writel(qspi, QSPI_WC_INT_DISABLE,
+			QSPI_INTR_STATUS_ENABLED_CLEAR);
+
+	complete(&qspi->transfer_complete);
+
+	spin_unlock_irqrestore(&qspi->lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static int ti_qspi_runtime_resume(struct device *dev)
+{
+	struct ti_qspi      *qspi;
+	struct spi_master       *master;
+
+	master = dev_get_drvdata(dev);
+	qspi = spi_master_get_devdata(master);
+	ti_qspi_restore_ctx(qspi);
+
+	return 0;
+}
+
+static const struct of_device_id ti_qspi_match[] = {
+	{.compatible = "ti,dra7xxx-qspi" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, dra7xxx_qspi_match);
+
+static int ti_qspi_probe(struct platform_device *pdev)
+{
+	struct  ti_qspi *qspi;
+	struct spi_master *master;
+	struct resource         *r;
+	struct device_node *np = pdev->dev.of_node;
+	u32 max_freq;
+	int ret = 0, num_cs, irq;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(*qspi));
+	if (!master)
+		return -ENOMEM;
+
+	master->mode_bits = SPI_CPOL | SPI_CPHA;
+
+	master->num_chipselect = 1;
+	master->bus_num = -1;
+	master->setup = ti_qspi_setup;
+	master->prepare_transfer_hardware = ti_qspi_prepare_xfer;
+	master->transfer_one_message = ti_qspi_start_transfer_one;
+	master->unprepare_transfer_hardware = ti_qspi_unprepare_xfer;
+	master->dev.of_node = pdev->dev.of_node;
+	master->bits_per_word_mask = BIT(32 - 1) | BIT(16 - 1) | BIT(8 - 1);
+
+	if (!of_property_read_u32(np, "ti,spi-num-cs", &num_cs))
+		master->num_chipselect = num_cs;
+
+	platform_set_drvdata(pdev, master);
+
+	qspi = spi_master_get_devdata(master);
+	qspi->master = master;
+	qspi->dev = &pdev->dev;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (r == NULL) {
+		ret = -ENODEV;
+		goto free_master;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no irq resource?\n");
+		return irq;
+	}
+
+	spin_lock_init(&qspi->lock);
+
+	qspi->base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(qspi->base)) {
+		ret = -ENOMEM;
+		goto free_master;
+	}
+
+	ret = devm_request_threaded_irq(&pdev->dev, irq, ti_qspi_isr,
+			ti_qspi_threaded_isr, IRQF_NO_SUSPEND | IRQF_ONESHOT,
+			dev_name(&pdev->dev), qspi);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n",
+				irq);
+		goto free_master;
+	}
+
+	qspi->fclk = devm_clk_get(&pdev->dev, "fck");
+	if (IS_ERR(qspi->fclk)) {
+		ret = PTR_ERR(qspi->fclk);
+		dev_err(&pdev->dev, "could not get clk: %d\n", ret);
+	}
+
+	init_completion(&qspi->transfer_complete);
+
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT);
+	pm_runtime_enable(&pdev->dev);
+
+	if (!of_property_read_u32(np, "spi-max-frequency", &max_freq))
+		qspi->spi_max_frequency = max_freq;
+
+	ret = spi_register_master(master);
+	if (ret)
+		goto free_master;
+
+	return ret;
+
+free_master:
+	spi_master_put(master);
+	return ret;
+}
+
+static int ti_qspi_remove(struct platform_device *pdev)
+{
+	struct	ti_qspi *qspi = platform_get_drvdata(pdev);
+
+	spi_unregister_master(qspi->master);
+
+	return 0;
+}
+
+static const struct dev_pm_ops ti_qspi_pm_ops = {
+	.runtime_resume = ti_qspi_runtime_resume,
+};
+
+static struct platform_driver ti_qspi_driver = {
+	.probe	= ti_qspi_probe,
+	.remove	= ti_qspi_remove,
+	.driver = {
+		.name	= "ti,dra7xxx-qspi",
+		.owner	= THIS_MODULE,
+		.pm =   &ti_qspi_pm_ops,
+		.of_match_table = ti_qspi_match,
+	}
+};
+
+module_platform_driver(ti_qspi_driver);
+
+MODULE_AUTHOR("Sourav Poddar <sourav.poddar@ti.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("TI QSPI controller driver");