diff mbox series

spi: spi-imx: Revert "spi: spi-imx: add PIO polling support"

Message ID 20221111003032.82371-1-festevam@gmail.com (mailing list archive)
State New, archived
Headers show
Series spi: spi-imx: Revert "spi: spi-imx: add PIO polling support" | expand

Commit Message

Fabio Estevam Nov. 11, 2022, 12:30 a.m. UTC
This reverts commit 07e7593877882fbd07c55b26b7dcf88760449323.

Since commit 07e759387788 ("spi: spi-imx: add PIO polling support") it is
no longer possible to do successful SPI NOR programming on
imx8mm-kontron-n801x-som, even though the copy operation returns no errors
to the user.

After programming the SPI NOR flash from Linux, the board fails to
boot from SPI NOR, leading to an unbootable system.

Revert it to fix the regression.

Cc: stable@kernel.org
Fixes: 07e759387788 ("spi: spi-imx: add PIO polling support")
Signed-off-by: Fabio Estevam <festevam@gmail.com>
---
 drivers/spi/spi-imx.c | 66 -------------------------------------------
 1 file changed, 66 deletions(-)

Comments

David Jander Nov. 11, 2022, 9:50 a.m. UTC | #1
On Thu, 10 Nov 2022 21:30:32 -0300
Fabio Estevam <festevam@gmail.com> wrote:

> This reverts commit 07e7593877882fbd07c55b26b7dcf88760449323.
> 
> Since commit 07e759387788 ("spi: spi-imx: add PIO polling support") it is
> no longer possible to do successful SPI NOR programming on
> imx8mm-kontron-n801x-som, even though the copy operation returns no errors
> to the user.
> 
> After programming the SPI NOR flash from Linux, the board fails to
> boot from SPI NOR, leading to an unbootable system.
> 
> Revert it to fix the regression.

Thanks for letting us know of this bug. I just tested latest mainline on an
i.MX6Q board I have with SPI NOR flash, and it seems to work fine without this
revert, so I guess there is something different about your situation that
breaks this. I'd like to know what it is and fix it rather than revert this
patch which is very valuable for a lot of applications.

Could you help me find out more about what is happening?
Are we talking about arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi?
What are the specific symptoms (besides not booting)? What is the content of
NOR flash after erasing and programming a new image? Is it empty or are
contents just incorrect or randomly corrupted? Does erase work?
Are you using SDMA? If so, what firmware version?
Do you still have problems if you set polling_limit_us to 0?
Do you still have problems if you set polling_limit_us to a very big number,
lets say 1000000?

The effect of this patch is that it will cause short SPI transfers to have a
lot less latency than without this patch, so could it be that we are looking
at a timing related bug in the MTD driver, or some other timing issue?
Your SPI clock is 80MHz, but the datasheet of the MACRONIX MX25R1635F
specifies a maximum clock of 33MHz. Is your NOR flash chip capable of this
clock-rate?

> Cc: stable@kernel.org
> Fixes: 07e759387788 ("spi: spi-imx: add PIO polling support")
> Signed-off-by: Fabio Estevam <festevam@gmail.com>
> ---
>  drivers/spi/spi-imx.c | 66 -------------------------------------------
>  1 file changed, 66 deletions(-)
> 
> diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
> index a4bda03d3a8e..b946d98a8d66 100644
> --- a/drivers/spi/spi-imx.c
> +++ b/drivers/spi/spi-imx.c
> @@ -31,12 +31,6 @@ static bool use_dma = true;
>  module_param(use_dma, bool, 0644);
>  MODULE_PARM_DESC(use_dma, "Enable usage of DMA when available (default)");
>  
> -/* define polling limits */
> -static unsigned int polling_limit_us = 30;
> -module_param(polling_limit_us, uint, 0664);
> -MODULE_PARM_DESC(polling_limit_us,
> -		 "time in us to run a transfer in polling mode\n");
> -
>  #define MXC_RPM_TIMEOUT		2000 /* 2000ms */
>  
>  #define MXC_CSPIRXDATA		0x00
> @@ -1490,54 +1484,6 @@ static int spi_imx_pio_transfer(struct spi_device *spi,
>  	return 0;
>  }
>  
> -static int spi_imx_poll_transfer(struct spi_device *spi,
> -				 struct spi_transfer *transfer)
> -{
> -	struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
> -	unsigned long timeout;
> -
> -	spi_imx->tx_buf = transfer->tx_buf;
> -	spi_imx->rx_buf = transfer->rx_buf;
> -	spi_imx->count = transfer->len;
> -	spi_imx->txfifo = 0;
> -	spi_imx->remainder = 0;
> -
> -	/* fill in the fifo before timeout calculations if we are
> -	 * interrupted here, then the data is getting transferred by
> -	 * the HW while we are interrupted
> -	 */
> -	spi_imx_push(spi_imx);
> -
> -	timeout = spi_imx_calculate_timeout(spi_imx, transfer->len) + jiffies;
> -	while (spi_imx->txfifo) {
> -		/* RX */
> -		while (spi_imx->txfifo &&
> -		       spi_imx->devtype_data->rx_available(spi_imx)) {
> -			spi_imx->rx(spi_imx);
> -			spi_imx->txfifo--;
> -		}
> -
> -		/* TX */
> -		if (spi_imx->count) {
> -			spi_imx_push(spi_imx);
> -			continue;
> -		}
> -
> -		if (spi_imx->txfifo &&
> -		    time_after(jiffies, timeout)) {
> -
> -			dev_err_ratelimited(&spi->dev,
> -					    "timeout period reached: jiffies: %lu- falling back to interrupt mode\n",
> -					    jiffies - timeout);
> -
> -			/* fall back to interrupt mode */
> -			return spi_imx_pio_transfer(spi, transfer);
> -		}
> -	}
> -
> -	return 0;
> -}
> -
>  static int spi_imx_pio_transfer_slave(struct spi_device *spi,
>  				      struct spi_transfer *transfer)
>  {
> @@ -1587,7 +1533,6 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
>  				struct spi_transfer *transfer)
>  {
>  	struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
> -	unsigned long hz_per_byte, byte_limit;
>  
>  	spi_imx_setupxfer(spi, transfer);
>  	transfer->effective_speed_hz = spi_imx->spi_bus_clk;
> @@ -1599,17 +1544,6 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
>  	if (spi_imx->slave_mode)
>  		return spi_imx_pio_transfer_slave(spi, transfer);
>  
> -	/*
> -	 * Calculate the estimated time in us the transfer runs. Find
> -	 * the number of Hz per byte per polling limit.
> -	 */
> -	hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) / polling_limit_us : 0;
> -	byte_limit = hz_per_byte ? transfer->effective_speed_hz / hz_per_byte : 1;
> -
> -	/* run in polling mode for short transfers */
> -	if (transfer->len < byte_limit)
> -		return spi_imx_poll_transfer(spi, transfer);
> -
>  	if (spi_imx->usedma)
>  		return spi_imx_dma_transfer(spi_imx, transfer);
>  

Best regards,
Fabio Estevam Nov. 11, 2022, 11:33 a.m. UTC | #2
Hi David,

On Fri, Nov 11, 2022 at 6:50 AM David Jander <david@protonic.nl> wrote:

> The effect of this patch is that it will cause short SPI transfers to have a
> lot less latency than without this patch, so could it be that we are looking
> at a timing related bug in the MTD driver, or some other timing issue?
> Your SPI clock is 80MHz, but the datasheet of the MACRONIX MX25R1635F
> specifies a maximum clock of 33MHz. Is your NOR flash chip capable of this
> clock-rate?

Thanks for your suggestions.

I have tried passing spi-max-frequency = <33000000>, and I don't see
the failure anymore.

Looking at the MX25R1635F datasheet the maximum SPI frequency is:

80MHz: when L/H bit is 1 - High Performance mode.
33MHz: when L/H bit is 0 - Ultra Low Power mode.

"L/H switch bit The Low Power / High Performance bit is a volatile bit.
User can change the value of L/H switch bit to keep Ultra Low Power
mode or High Performance mode.
Please check Ordering Information for the L/H Switch default support"

Frieder,

Could it be that the MX25R1635F part that on the Kontron imx8mm SoM has the
Ultra Low Power Mode selected by default?

If that's the case, I can send a patch that changes the
spi-max-frequency from 80MHz to 33MHz.

Thanks
David Jander Nov. 11, 2022, 12:59 p.m. UTC | #3
Dear Fabio,

On Fri, 11 Nov 2022 08:33:03 -0300
Fabio Estevam <festevam@gmail.com> wrote:

> Hi David,
> 
> On Fri, Nov 11, 2022 at 6:50 AM David Jander <david@protonic.nl> wrote:
> 
> > The effect of this patch is that it will cause short SPI transfers to have a
> > lot less latency than without this patch, so could it be that we are looking
> > at a timing related bug in the MTD driver, or some other timing issue?
> > Your SPI clock is 80MHz, but the datasheet of the MACRONIX MX25R1635F
> > specifies a maximum clock of 33MHz. Is your NOR flash chip capable of this
> > clock-rate?  
> 
> Thanks for your suggestions.
> 
> I have tried passing spi-max-frequency = <33000000>, and I don't see
> the failure anymore.
> 
> Looking at the MX25R1635F datasheet the maximum SPI frequency is:
> 
> 80MHz: when L/H bit is 1 - High Performance mode.
> 33MHz: when L/H bit is 0 - Ultra Low Power mode.
> 
> "L/H switch bit The Low Power / High Performance bit is a volatile bit.
> User can change the value of L/H switch bit to keep Ultra Low Power
> mode or High Performance mode.
> Please check Ordering Information for the L/H Switch default support"

Oh, my bad, sorry. I didn't read far enough into the DS. I just wanted to point
out that AFAIK, if you use a clock higher than 33MHz, you probably also need
"m25p,fast-read" in the DT:

https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/mtd/spi-nor/core.c#n2550

Not sure if that can cause the issues you are seeing though.

Best regards,
Frieder Schrempf Nov. 14, 2022, 8:30 a.m. UTC | #4
Hi Fabio, hi David,

On 11.11.22 13:59, David Jander wrote:
> 
> Dear Fabio,
> 
> On Fri, 11 Nov 2022 08:33:03 -0300
> Fabio Estevam <festevam@gmail.com> wrote:
> 
>> Hi David,
>>
>> On Fri, Nov 11, 2022 at 6:50 AM David Jander <david@protonic.nl> wrote:
>>
>>> The effect of this patch is that it will cause short SPI transfers to have a
>>> lot less latency than without this patch, so could it be that we are looking
>>> at a timing related bug in the MTD driver, or some other timing issue?

This was also my first suspicion when I originally discovered a similar
(or probably the same) issue on 5.19 with wrong data being read from the
SPI NOR. Reducing the clock to 40 MHz fixed the read in my case.

>>> Your SPI clock is 80MHz, but the datasheet of the MACRONIX MX25R1635F
>>> specifies a maximum clock of 33MHz. Is your NOR flash chip capable of this
>>> clock-rate?  
>>
>> Thanks for your suggestions.
>>
>> I have tried passing spi-max-frequency = <33000000>, and I don't see
>> the failure anymore.
>>
>> Looking at the MX25R1635F datasheet the maximum SPI frequency is:
>>
>> 80MHz: when L/H bit is 1 - High Performance mode.
>> 33MHz: when L/H bit is 0 - Ultra Low Power mode.
>>
>> "L/H switch bit The Low Power / High Performance bit is a volatile bit.
>> User can change the value of L/H switch bit to keep Ultra Low Power
>> mode or High Performance mode.
>> Please check Ordering Information for the L/H Switch default support"
> 
> Oh, my bad, sorry. I didn't read far enough into the DS. I just wanted to point
> out that AFAIK, if you use a clock higher than 33MHz, you probably also need
> "m25p,fast-read" in the DT:

I already tried changing the pin configuration for the SPI pins (slew
rate, drive strength), setting spi-[rx/tx]-delay-us and enabling
m25p,fast-read. Unfortunately nothing of this worked.

According to our hardware department we are using the MX25R1635FZUIH0
which has "High Performance Mode" enabled by default and should be
capable of using a 80 MHz clock.

As far as I know Fabio also discovered that disabling SDMA also fixes
the problem.

I guess I will try to repeat some tests on latest master and see if
there is anything that makes things work again without reducing the
clock. If anyone has some more ideas of how to fix this properly, please
let me know. If nothing else helps we could also reduce the SPI clock.

Thanks
Frieder
Mark Brown Nov. 14, 2022, 11:29 a.m. UTC | #5
On Mon, Nov 14, 2022 at 09:30:26AM +0100, Frieder Schrempf wrote:

> As far as I know Fabio also discovered that disabling SDMA also fixes
> the problem.

> I guess I will try to repeat some tests on latest master and see if
> there is anything that makes things work again without reducing the
> clock. If anyone has some more ideas of how to fix this properly, please
> let me know. If nothing else helps we could also reduce the SPI clock.

It sounds like the commit can stay and that everyone is happy
that the issue is that the the commit made things run faster and
exposed some other misconfiguration for these systems?
Fabio Estevam Nov. 14, 2022, 1:01 p.m. UTC | #6
Hi Frieder,

On Mon, Nov 14, 2022 at 5:30 AM Frieder Schrempf
<frieder.schrempf@kontron.de> wrote:

> According to our hardware department we are using the MX25R1635FZUIH0
> which has "High Performance Mode" enabled by default and should be
> capable of using a 80 MHz clock.

Thanks for the confirmation.

> As far as I know Fabio also discovered that disabling SDMA also fixes
> the problem.

Correct, if I disable DMA, the SPI NOR can read and write at 80MHz successfully.
Frieder Schrempf Nov. 15, 2022, 10:51 a.m. UTC | #7
On 14.11.22 12:29, Mark Brown wrote:
> On Mon, Nov 14, 2022 at 09:30:26AM +0100, Frieder Schrempf wrote:
> 
>> As far as I know Fabio also discovered that disabling SDMA also fixes
>> the problem.
> 
>> I guess I will try to repeat some tests on latest master and see if
>> there is anything that makes things work again without reducing the
>> clock. If anyone has some more ideas of how to fix this properly, please
>> let me know. If nothing else helps we could also reduce the SPI clock.
> 
> It sounds like the commit can stay and that everyone is happy
> that the issue is that the the commit made things run faster and
> exposed some other misconfiguration for these systems?

Honestly I'm not really sure how to proceed.

My first impression was to keep the PIO polling support with its
benefits if there's just this single issue with the SPI NOR on our board
and assuming that the performance improvements uncovered a bug somewhere
else. But at the moment I'm not quite sure this is really the case.

I did another test on v6.1-rc5 and disabling either PIO polling
(spi-imx.polling_limit_us=0) or DMA (spi-imx.use_dma=0), or both of them
makes reading the SPI NOR work again.

Looking at spi_imx_transfer_one() this means that doing PIO polling
transfers for short messages combined with normal PIO transfers for
longer messages works. The same for DMA transfers only (short and long
messages).

The problematic case seems to be having PIO polling transfers for short
messages and DMA transfers for long messages probably interleaved in
some way. And this sounds more like a problem that is not really
specific to our SPI NOR use case and as I said so far I didn't find a
way to fix it other than reducing the SPI clock significantly.

David, Marc, do you have any idea if this could be a general problem
with the PIO polling implementation?
Frieder Schrempf Nov. 15, 2022, 12:09 p.m. UTC | #8
On 15.11.22 11:51, Frieder Schrempf wrote:
> On 14.11.22 12:29, Mark Brown wrote:
>> On Mon, Nov 14, 2022 at 09:30:26AM +0100, Frieder Schrempf wrote:
>>
>>> As far as I know Fabio also discovered that disabling SDMA also fixes
>>> the problem.
>>
>>> I guess I will try to repeat some tests on latest master and see if
>>> there is anything that makes things work again without reducing the
>>> clock. If anyone has some more ideas of how to fix this properly, please
>>> let me know. If nothing else helps we could also reduce the SPI clock.
>>
>> It sounds like the commit can stay and that everyone is happy
>> that the issue is that the the commit made things run faster and
>> exposed some other misconfiguration for these systems?
> 
> Honestly I'm not really sure how to proceed.
> 
> My first impression was to keep the PIO polling support with its
> benefits if there's just this single issue with the SPI NOR on our board
> and assuming that the performance improvements uncovered a bug somewhere
> else. But at the moment I'm not quite sure this is really the case.
> 
> I did another test on v6.1-rc5 and disabling either PIO polling
> (spi-imx.polling_limit_us=0) or DMA (spi-imx.use_dma=0), or both of them
> makes reading the SPI NOR work again.
> 
> Looking at spi_imx_transfer_one() this means that doing PIO polling
> transfers for short messages combined with normal PIO transfers for
> longer messages works. The same for DMA transfers only (short and long
> messages).
> 
> The problematic case seems to be having PIO polling transfers for short
> messages and DMA transfers for long messages probably interleaved in
> some way. And this sounds more like a problem that is not really
> specific to our SPI NOR use case and as I said so far I didn't find a
> way to fix it other than reducing the SPI clock significantly.
> 
> David, Marc, do you have any idea if this could be a general problem
> with the PIO polling implementation?

Further debugging reveals, that for polling_limit_us=30 the calculated
byte_limit (8104) is well beyond the maximum size of the data transfers
from the SPI NOR (4096 bytes). So I don't see any DMA transfers being
triggered which makes it even more strange why disabling DMA resolves
the issue.
Marc Kleine-Budde Nov. 15, 2022, 12:55 p.m. UTC | #9
On 15.11.2022 11:51:53, Frieder Schrempf wrote:
> On 14.11.22 12:29, Mark Brown wrote:
> > On Mon, Nov 14, 2022 at 09:30:26AM +0100, Frieder Schrempf wrote:
> > 
> >> As far as I know Fabio also discovered that disabling SDMA also fixes
> >> the problem.
> > 
> >> I guess I will try to repeat some tests on latest master and see if
> >> there is anything that makes things work again without reducing the
> >> clock. If anyone has some more ideas of how to fix this properly, please
> >> let me know. If nothing else helps we could also reduce the SPI clock.
> > 
> > It sounds like the commit can stay and that everyone is happy
> > that the issue is that the the commit made things run faster and
> > exposed some other misconfiguration for these systems?
> 
> Honestly I'm not really sure how to proceed.
> 
> My first impression was to keep the PIO polling support with its
> benefits if there's just this single issue with the SPI NOR on our board
> and assuming that the performance improvements uncovered a bug somewhere
> else. But at the moment I'm not quite sure this is really the case.
> 
> I did another test on v6.1-rc5 and disabling either PIO polling
> (spi-imx.polling_limit_us=0) or DMA (spi-imx.use_dma=0), or both of them
> makes reading the SPI NOR work again.

That was a good hint, I think I've found something.

Can you check if this fixes your problem? Just a quick hack to, a proper
solution needs some more love.

diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 30d82cc7300b..76021b9bb445 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -1270,9 +1270,22 @@ static int spi_imx_setupxfer(struct spi_device *spi,
                spi_imx->dynamic_burst = 0;
        }
 
-       if (spi_imx_can_dma(spi_imx->controller, spi, t))
-               spi_imx->usedma = true;
-       else
+       if (spi_imx_can_dma(spi_imx->controller, spi, t)) {
+               unsigned long hz_per_byte, byte_limit;
+
+               /*
+                * Calculate the estimated time in us the transfer runs. Find
+                * the number of Hz per byte per polling limit.
+                */
+               hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) / polling_limit_us : 0;
+               byte_limit = hz_per_byte ? t->effective_speed_hz / hz_per_byte : 1;
+
+               /* run in polling mode for short transfers */
+               if (t->len < byte_limit)
+                       spi_imx->usedma = false;
+               else
+                       spi_imx->usedma = true;
+       } else
                spi_imx->usedma = false;
 
        spi_imx->rx_only = ((t->tx_buf == NULL)
@@ -1597,8 +1610,8 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
        struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
        unsigned long hz_per_byte, byte_limit;
 
-       spi_imx_setupxfer(spi, transfer);
        transfer->effective_speed_hz = spi_imx->spi_bus_clk;
+       spi_imx_setupxfer(spi, transfer);
 
        /* flush rxfifo before transfer */
        while (spi_imx->devtype_data->rx_available(spi_imx))

Marc
Frieder Schrempf Nov. 15, 2022, 2:46 p.m. UTC | #10
Hi Marc,

On 15.11.22 13:55, Marc Kleine-Budde wrote:
> On 15.11.2022 11:51:53, Frieder Schrempf wrote:
>> On 14.11.22 12:29, Mark Brown wrote:
>>> On Mon, Nov 14, 2022 at 09:30:26AM +0100, Frieder Schrempf wrote:
>>>
>>>> As far as I know Fabio also discovered that disabling SDMA also fixes
>>>> the problem.
>>>
>>>> I guess I will try to repeat some tests on latest master and see if
>>>> there is anything that makes things work again without reducing the
>>>> clock. If anyone has some more ideas of how to fix this properly, please
>>>> let me know. If nothing else helps we could also reduce the SPI clock.
>>>
>>> It sounds like the commit can stay and that everyone is happy
>>> that the issue is that the the commit made things run faster and
>>> exposed some other misconfiguration for these systems?
>>
>> Honestly I'm not really sure how to proceed.
>>
>> My first impression was to keep the PIO polling support with its
>> benefits if there's just this single issue with the SPI NOR on our board
>> and assuming that the performance improvements uncovered a bug somewhere
>> else. But at the moment I'm not quite sure this is really the case.
>>
>> I did another test on v6.1-rc5 and disabling either PIO polling
>> (spi-imx.polling_limit_us=0) or DMA (spi-imx.use_dma=0), or both of them
>> makes reading the SPI NOR work again.
> 
> That was a good hint, I think I've found something.
> 
> Can you check if this fixes your problem? Just a quick hack to, a proper
> solution needs some more love.
> 
> diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
> index 30d82cc7300b..76021b9bb445 100644
> --- a/drivers/spi/spi-imx.c
> +++ b/drivers/spi/spi-imx.c
> @@ -1270,9 +1270,22 @@ static int spi_imx_setupxfer(struct spi_device *spi,
>                 spi_imx->dynamic_burst = 0;
>         }
>  
> -       if (spi_imx_can_dma(spi_imx->controller, spi, t))
> -               spi_imx->usedma = true;
> -       else
> +       if (spi_imx_can_dma(spi_imx->controller, spi, t)) {
> +               unsigned long hz_per_byte, byte_limit;
> +
> +               /*
> +                * Calculate the estimated time in us the transfer runs. Find
> +                * the number of Hz per byte per polling limit.
> +                */
> +               hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) / polling_limit_us : 0;
> +               byte_limit = hz_per_byte ? t->effective_speed_hz / hz_per_byte : 1;
> +
> +               /* run in polling mode for short transfers */
> +               if (t->len < byte_limit)
> +                       spi_imx->usedma = false;
> +               else
> +                       spi_imx->usedma = true;
> +       } else
>                 spi_imx->usedma = false;
>  
>         spi_imx->rx_only = ((t->tx_buf == NULL)
> @@ -1597,8 +1610,8 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
>         struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
>         unsigned long hz_per_byte, byte_limit;
>  
> -       spi_imx_setupxfer(spi, transfer);
>         transfer->effective_speed_hz = spi_imx->spi_bus_clk;
> +       spi_imx_setupxfer(spi, transfer);
>  
>         /* flush rxfifo before transfer */
>         while (spi_imx->devtype_data->rx_available(spi_imx))
> 

Thanks for the patch, but unfortunately this doesn't help. I did some
more debugging and it looks like there are two problems.

In my case on i.MX8MM the SPI is fed by a 50 MHz peripheral clock.
Requesting 80 MHz for the SPI NOR triggers the fspi > fin condition in
mx51_ecspi_clkdiv() [1] which in turn leaves *fres uninitialized causing
spi_imx->spi_bus_clk to be set to an arbitrary/random value in
mx51_ecspi_prepare_transfer() [2].

This in turn messes up the calculation for the PIO polling byte limit.
In my case the limit was usually somewhere around 8000 bytes, so the
4096 byte SPI NOR messages get transferred via PIO polling.

Having large and inefficient polling transfers shouldn't be a problem
and lead to corrupted data, but I suspect that it doesn't work because
the transfer size exceeds the FIFO size in this case.

If my conclusions are correct there are two fixes required (though for
my use case each one of the alone is enough to make things work):

1. Make sure spi_bus_clk is correct even if the requested bus clock
exceeds the input clock.

2. Limit byte_limit for PIO polling calculation to a maximum of
fifo_size, so we don't try to poll for transfers that don't fit into the
FIFO.

Both fixes are quite simple (see diff below) and if you agree I will
send them as formal patches.

Thanks
Frieder

[1]
https://elixir.bootlin.com/linux/latest/source/drivers/spi/spi-imx.c#L447
[2]
https://elixir.bootlin.com/linux/latest/source/drivers/spi/spi-imx.c#L650

--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -445,7 +445,7 @@ static unsigned int mx51_ecspi_clkdiv(struct
spi_imx_data *spi_imx,
        unsigned int fin = spi_imx->spi_clk;

        if (unlikely(fspi > fin))
-               return 0;
+               fspi = fin;

        post = fls(fin) - fls(fspi);
        if (fin > fspi << post)
@@ -1613,6 +1613,7 @@ static int spi_imx_transfer_one(struct
spi_controller *controller,
         */
        hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) /
polling_limit_us : 0;
        byte_limit = hz_per_byte ? transfer->effective_speed_hz /
hz_per_byte : 1;
+       byte_limit = min(byte_limit, spi_imx->devtype_data->fifo_size);

        /* run in polling mode for short transfers */
        if (transfer->len < byte_limit)
Fabio Estevam Nov. 15, 2022, 3:45 p.m. UTC | #11
Hi Frieder,

On Tue, Nov 15, 2022 at 11:46 AM Frieder Schrempf
<frieder.schrempf@kontron.de> wrote:

> Both fixes are quite simple (see diff below) and if you agree I will
> send them as formal patches.
>
> Thanks
> Frieder
>
> [1]
> https://elixir.bootlin.com/linux/latest/source/drivers/spi/spi-imx.c#L447
> [2]
> https://elixir.bootlin.com/linux/latest/source/drivers/spi/spi-imx.c#L650
>
> --- a/drivers/spi/spi-imx.c
> +++ b/drivers/spi/spi-imx.c
> @@ -445,7 +445,7 @@ static unsigned int mx51_ecspi_clkdiv(struct
> spi_imx_data *spi_imx,
>         unsigned int fin = spi_imx->spi_clk;
>
>         if (unlikely(fspi > fin))
> -               return 0;
> +               fspi = fin;
>
>         post = fls(fin) - fls(fspi);
>         if (fin > fspi << post)
> @@ -1613,6 +1613,7 @@ static int spi_imx_transfer_one(struct
> spi_controller *controller,
>          */
>         hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) /
> polling_limit_us : 0;
>         byte_limit = hz_per_byte ? transfer->effective_speed_hz /
> hz_per_byte : 1;
> +       byte_limit = min(byte_limit, spi_imx->devtype_data->fifo_size);

Here, I had to add a cast to avoid a build warning:

byte_limit = min(byte_limit, (unsigned long)spi_imx->devtype_data->fifo_size);

SPI NOR programming worked fine with your proposed changes, thanks:

Tested-by: Fabio Estevam <festevam@gmail.com>

On Tue, Nov 15, 2022 at 11:46 AM Frieder Schrempf
<frieder.schrempf@kontron.de> wrote:
>
> Hi Marc,
>
> On 15.11.22 13:55, Marc Kleine-Budde wrote:
> > On 15.11.2022 11:51:53, Frieder Schrempf wrote:
> >> On 14.11.22 12:29, Mark Brown wrote:
> >>> On Mon, Nov 14, 2022 at 09:30:26AM +0100, Frieder Schrempf wrote:
> >>>
> >>>> As far as I know Fabio also discovered that disabling SDMA also fixes
> >>>> the problem.
> >>>
> >>>> I guess I will try to repeat some tests on latest master and see if
> >>>> there is anything that makes things work again without reducing the
> >>>> clock. If anyone has some more ideas of how to fix this properly, please
> >>>> let me know. If nothing else helps we could also reduce the SPI clock.
> >>>
> >>> It sounds like the commit can stay and that everyone is happy
> >>> that the issue is that the the commit made things run faster and
> >>> exposed some other misconfiguration for these systems?
> >>
> >> Honestly I'm not really sure how to proceed.
> >>
> >> My first impression was to keep the PIO polling support with its
> >> benefits if there's just this single issue with the SPI NOR on our board
> >> and assuming that the performance improvements uncovered a bug somewhere
> >> else. But at the moment I'm not quite sure this is really the case.
> >>
> >> I did another test on v6.1-rc5 and disabling either PIO polling
> >> (spi-imx.polling_limit_us=0) or DMA (spi-imx.use_dma=0), or both of them
> >> makes reading the SPI NOR work again.
> >
> > That was a good hint, I think I've found something.
> >
> > Can you check if this fixes your problem? Just a quick hack to, a proper
> > solution needs some more love.
> >
> > diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
> > index 30d82cc7300b..76021b9bb445 100644
> > --- a/drivers/spi/spi-imx.c
> > +++ b/drivers/spi/spi-imx.c
> > @@ -1270,9 +1270,22 @@ static int spi_imx_setupxfer(struct spi_device *spi,
> >                 spi_imx->dynamic_burst = 0;
> >         }
> >
> > -       if (spi_imx_can_dma(spi_imx->controller, spi, t))
> > -               spi_imx->usedma = true;
> > -       else
> > +       if (spi_imx_can_dma(spi_imx->controller, spi, t)) {
> > +               unsigned long hz_per_byte, byte_limit;
> > +
> > +               /*
> > +                * Calculate the estimated time in us the transfer runs. Find
> > +                * the number of Hz per byte per polling limit.
> > +                */
> > +               hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) / polling_limit_us : 0;
> > +               byte_limit = hz_per_byte ? t->effective_speed_hz / hz_per_byte : 1;
> > +
> > +               /* run in polling mode for short transfers */
> > +               if (t->len < byte_limit)
> > +                       spi_imx->usedma = false;
> > +               else
> > +                       spi_imx->usedma = true;
> > +       } else
> >                 spi_imx->usedma = false;
> >
> >         spi_imx->rx_only = ((t->tx_buf == NULL)
> > @@ -1597,8 +1610,8 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
> >         struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
> >         unsigned long hz_per_byte, byte_limit;
> >
> > -       spi_imx_setupxfer(spi, transfer);
> >         transfer->effective_speed_hz = spi_imx->spi_bus_clk;
> > +       spi_imx_setupxfer(spi, transfer);
> >
> >         /* flush rxfifo before transfer */
> >         while (spi_imx->devtype_data->rx_available(spi_imx))
> >
>
> Thanks for the patch, but unfortunately this doesn't help. I did some
> more debugging and it looks like there are two problems.
>
> In my case on i.MX8MM the SPI is fed by a 50 MHz peripheral clock.
> Requesting 80 MHz for the SPI NOR triggers the fspi > fin condition in
> mx51_ecspi_clkdiv() [1] which in turn leaves *fres uninitialized causing
> spi_imx->spi_bus_clk to be set to an arbitrary/random value in
> mx51_ecspi_prepare_transfer() [2].
>
> This in turn messes up the calculation for the PIO polling byte limit.
> In my case the limit was usually somewhere around 8000 bytes, so the
> 4096 byte SPI NOR messages get transferred via PIO polling.
>
> Having large and inefficient polling transfers shouldn't be a problem
> and lead to corrupted data, but I suspect that it doesn't work because
> the transfer size exceeds the FIFO size in this case.
>
> If my conclusions are correct there are two fixes required (though for
> my use case each one of the alone is enough to make things work):
>
> 1. Make sure spi_bus_clk is correct even if the requested bus clock
> exceeds the input clock.
>
> 2. Limit byte_limit for PIO polling calculation to a maximum of
> fifo_size, so we don't try to poll for transfers that don't fit into the
> FIFO.
>
> Both fixes are quite simple (see diff below) and if you agree I will
> send them as formal patches.
>
> Thanks
> Frieder
>
> [1]
> https://elixir.bootlin.com/linux/latest/source/drivers/spi/spi-imx.c#L447
> [2]
> https://elixir.bootlin.com/linux/latest/source/drivers/spi/spi-imx.c#L650
>
> --- a/drivers/spi/spi-imx.c
> +++ b/drivers/spi/spi-imx.c
> @@ -445,7 +445,7 @@ static unsigned int mx51_ecspi_clkdiv(struct
> spi_imx_data *spi_imx,
>         unsigned int fin = spi_imx->spi_clk;
>
>         if (unlikely(fspi > fin))
> -               return 0;
> +               fspi = fin;
>
>         post = fls(fin) - fls(fspi);
>         if (fin > fspi << post)
> @@ -1613,6 +1613,7 @@ static int spi_imx_transfer_one(struct
> spi_controller *controller,
>          */
>         hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) /
> polling_limit_us : 0;
>         byte_limit = hz_per_byte ? transfer->effective_speed_hz /
> hz_per_byte : 1;
> +       byte_limit = min(byte_limit, spi_imx->devtype_data->fifo_size);
>
>         /* run in polling mode for short transfers */
>         if (transfer->len < byte_limit)
Marc Kleine-Budde Nov. 15, 2022, 3:46 p.m. UTC | #12
On 15.11.2022 15:46:28, Frieder Schrempf wrote:
> Hi Marc,
> 
> On 15.11.22 13:55, Marc Kleine-Budde wrote:
> > On 15.11.2022 11:51:53, Frieder Schrempf wrote:
> >> On 14.11.22 12:29, Mark Brown wrote:
> >>> On Mon, Nov 14, 2022 at 09:30:26AM +0100, Frieder Schrempf wrote:
> >>>
> >>>> As far as I know Fabio also discovered that disabling SDMA also fixes
> >>>> the problem.
> >>>
> >>>> I guess I will try to repeat some tests on latest master and see if
> >>>> there is anything that makes things work again without reducing the
> >>>> clock. If anyone has some more ideas of how to fix this properly, please
> >>>> let me know. If nothing else helps we could also reduce the SPI clock.
> >>>
> >>> It sounds like the commit can stay and that everyone is happy
> >>> that the issue is that the the commit made things run faster and
> >>> exposed some other misconfiguration for these systems?
> >>
> >> Honestly I'm not really sure how to proceed.
> >>
> >> My first impression was to keep the PIO polling support with its
> >> benefits if there's just this single issue with the SPI NOR on our board
> >> and assuming that the performance improvements uncovered a bug somewhere
> >> else. But at the moment I'm not quite sure this is really the case.
> >>
> >> I did another test on v6.1-rc5 and disabling either PIO polling
> >> (spi-imx.polling_limit_us=0) or DMA (spi-imx.use_dma=0), or both of them
> >> makes reading the SPI NOR work again.
> > 
> > That was a good hint, I think I've found something.
> > 
> > Can you check if this fixes your problem? Just a quick hack to, a proper
> > solution needs some more love.
> > 
> > diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
> > index 30d82cc7300b..76021b9bb445 100644
> > --- a/drivers/spi/spi-imx.c
> > +++ b/drivers/spi/spi-imx.c
> > @@ -1270,9 +1270,22 @@ static int spi_imx_setupxfer(struct spi_device *spi,
> >                 spi_imx->dynamic_burst = 0;
> >         }
> >  
> > -       if (spi_imx_can_dma(spi_imx->controller, spi, t))
> > -               spi_imx->usedma = true;
> > -       else
> > +       if (spi_imx_can_dma(spi_imx->controller, spi, t)) {
> > +               unsigned long hz_per_byte, byte_limit;
> > +
> > +               /*
> > +                * Calculate the estimated time in us the transfer runs. Find
> > +                * the number of Hz per byte per polling limit.
> > +                */
> > +               hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) / polling_limit_us : 0;
> > +               byte_limit = hz_per_byte ? t->effective_speed_hz / hz_per_byte : 1;
> > +
> > +               /* run in polling mode for short transfers */
> > +               if (t->len < byte_limit)
> > +                       spi_imx->usedma = false;
> > +               else
> > +                       spi_imx->usedma = true;
> > +       } else
> >                 spi_imx->usedma = false;
> >  
> >         spi_imx->rx_only = ((t->tx_buf == NULL)
> > @@ -1597,8 +1610,8 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
> >         struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
> >         unsigned long hz_per_byte, byte_limit;
> >  
> > -       spi_imx_setupxfer(spi, transfer);
> >         transfer->effective_speed_hz = spi_imx->spi_bus_clk;
> > +       spi_imx_setupxfer(spi, transfer);
> >  
> >         /* flush rxfifo before transfer */
> >         while (spi_imx->devtype_data->rx_available(spi_imx))
> > 
> 
> Thanks for the patch, but unfortunately this doesn't help. I did some
> more debugging and it looks like there are two problems.
> 
> In my case on i.MX8MM the SPI is fed by a 50 MHz peripheral clock.
> Requesting 80 MHz for the SPI NOR triggers the fspi > fin condition in
> mx51_ecspi_clkdiv() [1] which in turn leaves *fres uninitialized causing
> spi_imx->spi_bus_clk to be set to an arbitrary/random value in
> mx51_ecspi_prepare_transfer() [2].
> 
> This in turn messes up the calculation for the PIO polling byte limit.
> In my case the limit was usually somewhere around 8000 bytes, so the
> 4096 byte SPI NOR messages get transferred via PIO polling.
> 
> Having large and inefficient polling transfers shouldn't be a problem
> and lead to corrupted data, but I suspect that it doesn't work because
> the transfer size exceeds the FIFO size in this case.

no - exceeding the FIFO size it not a problem. If you limit polling to
FIFO size you effectively disable it for your use case.

> If my conclusions are correct there are two fixes required (though for
> my use case each one of the alone is enough to make things work):
> 
> 1. Make sure spi_bus_clk is correct even if the requested bus clock
> exceeds the input clock.

With a proper clock rate, and the default 30µs limit results in no
polling for you.

> 2. Limit byte_limit for PIO polling calculation to a maximum of
> fifo_size, so we don't try to poll for transfers that don't fit into the
> FIFO.

One of the performance benefits of polling is that you don't get all the
IRQs needed for refilling the FIFO, please keep this as is.

> Both fixes are quite simple (see diff below) and if you agree I will
> send them as formal patches.

Please just the first one. But let's fix polling support, too. I'll send
a patch.

> 
> Thanks
> Frieder
> 
> [1]
> https://elixir.bootlin.com/linux/latest/source/drivers/spi/spi-imx.c#L447
> [2]
> https://elixir.bootlin.com/linux/latest/source/drivers/spi/spi-imx.c#L650
> 
> --- a/drivers/spi/spi-imx.c
> +++ b/drivers/spi/spi-imx.c
> @@ -445,7 +445,7 @@ static unsigned int mx51_ecspi_clkdiv(struct
> spi_imx_data *spi_imx,
>         unsigned int fin = spi_imx->spi_clk;
> 
>         if (unlikely(fspi > fin))
> -               return 0;
> +               fspi = fin;

This should equal:
fspi = min(fspi, fin);

Good catch! Please make this a separate patch and send it upstream!

Can you enable debugging and post the output of the dev_dbg() in that
function.

> 
>         post = fls(fin) - fls(fspi);
>         if (fin > fspi << post)
> @@ -1613,6 +1613,7 @@ static int spi_imx_transfer_one(struct
> spi_controller *controller,
>          */
>         hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) /
> polling_limit_us : 0;
>         byte_limit = hz_per_byte ? transfer->effective_speed_hz /
> hz_per_byte : 1;
> +       byte_limit = min(byte_limit, spi_imx->devtype_data->fifo_size);
> 
>         /* run in polling mode for short transfers */
>         if (transfer->len < byte_limit)
> 

Marc
Frieder Schrempf Nov. 15, 2022, 4:05 p.m. UTC | #13
On 15.11.22 16:46, Marc Kleine-Budde wrote:
> On 15.11.2022 15:46:28, Frieder Schrempf wrote:
>> Hi Marc,
>>
>> On 15.11.22 13:55, Marc Kleine-Budde wrote:
>>> On 15.11.2022 11:51:53, Frieder Schrempf wrote:
>>>> On 14.11.22 12:29, Mark Brown wrote:
>>>>> On Mon, Nov 14, 2022 at 09:30:26AM +0100, Frieder Schrempf wrote:
>>>>>
>>>>>> As far as I know Fabio also discovered that disabling SDMA also fixes
>>>>>> the problem.
>>>>>
>>>>>> I guess I will try to repeat some tests on latest master and see if
>>>>>> there is anything that makes things work again without reducing the
>>>>>> clock. If anyone has some more ideas of how to fix this properly, please
>>>>>> let me know. If nothing else helps we could also reduce the SPI clock.
>>>>>
>>>>> It sounds like the commit can stay and that everyone is happy
>>>>> that the issue is that the the commit made things run faster and
>>>>> exposed some other misconfiguration for these systems?
>>>>
>>>> Honestly I'm not really sure how to proceed.
>>>>
>>>> My first impression was to keep the PIO polling support with its
>>>> benefits if there's just this single issue with the SPI NOR on our board
>>>> and assuming that the performance improvements uncovered a bug somewhere
>>>> else. But at the moment I'm not quite sure this is really the case.
>>>>
>>>> I did another test on v6.1-rc5 and disabling either PIO polling
>>>> (spi-imx.polling_limit_us=0) or DMA (spi-imx.use_dma=0), or both of them
>>>> makes reading the SPI NOR work again.
>>>
>>> That was a good hint, I think I've found something.
>>>
>>> Can you check if this fixes your problem? Just a quick hack to, a proper
>>> solution needs some more love.
>>>
>>> diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
>>> index 30d82cc7300b..76021b9bb445 100644
>>> --- a/drivers/spi/spi-imx.c
>>> +++ b/drivers/spi/spi-imx.c
>>> @@ -1270,9 +1270,22 @@ static int spi_imx_setupxfer(struct spi_device *spi,
>>>                 spi_imx->dynamic_burst = 0;
>>>         }
>>>  
>>> -       if (spi_imx_can_dma(spi_imx->controller, spi, t))
>>> -               spi_imx->usedma = true;
>>> -       else
>>> +       if (spi_imx_can_dma(spi_imx->controller, spi, t)) {
>>> +               unsigned long hz_per_byte, byte_limit;
>>> +
>>> +               /*
>>> +                * Calculate the estimated time in us the transfer runs. Find
>>> +                * the number of Hz per byte per polling limit.
>>> +                */
>>> +               hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) / polling_limit_us : 0;
>>> +               byte_limit = hz_per_byte ? t->effective_speed_hz / hz_per_byte : 1;
>>> +
>>> +               /* run in polling mode for short transfers */
>>> +               if (t->len < byte_limit)
>>> +                       spi_imx->usedma = false;
>>> +               else
>>> +                       spi_imx->usedma = true;
>>> +       } else
>>>                 spi_imx->usedma = false;
>>>  
>>>         spi_imx->rx_only = ((t->tx_buf == NULL)
>>> @@ -1597,8 +1610,8 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
>>>         struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
>>>         unsigned long hz_per_byte, byte_limit;
>>>  
>>> -       spi_imx_setupxfer(spi, transfer);
>>>         transfer->effective_speed_hz = spi_imx->spi_bus_clk;
>>> +       spi_imx_setupxfer(spi, transfer);
>>>  
>>>         /* flush rxfifo before transfer */
>>>         while (spi_imx->devtype_data->rx_available(spi_imx))
>>>
>>
>> Thanks for the patch, but unfortunately this doesn't help. I did some
>> more debugging and it looks like there are two problems.
>>
>> In my case on i.MX8MM the SPI is fed by a 50 MHz peripheral clock.
>> Requesting 80 MHz for the SPI NOR triggers the fspi > fin condition in
>> mx51_ecspi_clkdiv() [1] which in turn leaves *fres uninitialized causing
>> spi_imx->spi_bus_clk to be set to an arbitrary/random value in
>> mx51_ecspi_prepare_transfer() [2].
>>
>> This in turn messes up the calculation for the PIO polling byte limit.
>> In my case the limit was usually somewhere around 8000 bytes, so the
>> 4096 byte SPI NOR messages get transferred via PIO polling.
>>
>> Having large and inefficient polling transfers shouldn't be a problem
>> and lead to corrupted data, but I suspect that it doesn't work because
>> the transfer size exceeds the FIFO size in this case.
> 
> no - exceeding the FIFO size it not a problem. If you limit polling to
> FIFO size you effectively disable it for your use case.
> 
>> If my conclusions are correct there are two fixes required (though for
>> my use case each one of the alone is enough to make things work):
>>
>> 1. Make sure spi_bus_clk is correct even if the requested bus clock
>> exceeds the input clock.
> 
> With a proper clock rate, and the default 30µs limit results in no
> polling for you.
> 
>> 2. Limit byte_limit for PIO polling calculation to a maximum of
>> fifo_size, so we don't try to poll for transfers that don't fit into the
>> FIFO.
> 
> One of the performance benefits of polling is that you don't get all the
> IRQs needed for refilling the FIFO, please keep this as is.
> 
>> Both fixes are quite simple (see diff below) and if you agree I will
>> send them as formal patches.
> 
> Please just the first one. But let's fix polling support, too. I'll send
> a patch.

In the meantime I already found out that the second point and my fix is
wrong. Thanks for clarifying.

> 
>>
>> Thanks
>> Frieder
>>
>> [1]
>> https://elixir.bootlin.com/linux/latest/source/drivers/spi/spi-imx.c#L447
>> [2]
>> https://elixir.bootlin.com/linux/latest/source/drivers/spi/spi-imx.c#L650
>>
>> --- a/drivers/spi/spi-imx.c
>> +++ b/drivers/spi/spi-imx.c
>> @@ -445,7 +445,7 @@ static unsigned int mx51_ecspi_clkdiv(struct
>> spi_imx_data *spi_imx,
>>         unsigned int fin = spi_imx->spi_clk;
>>
>>         if (unlikely(fspi > fin))
>> -               return 0;
>> +               fspi = fin;
> 
> This should equal:
> fspi = min(fspi, fin);

Of course!

> 
> Good catch! Please make this a separate patch and send it upstream!

Will do.

> Can you enable debugging and post the output of the dev_dbg() in that
> function.

Without the fix there's no output as the dev_dbg() is skipped, with the
fix there is this in my case:

spi_imx 30820000.spi: mx51_ecspi_clkdiv: fin: 50000000, fspi: 50000000,
post: 0, pre: 0
Marc Kleine-Budde Nov. 15, 2022, 4:52 p.m. UTC | #14
On 15.11.2022 15:46:28, Frieder Schrempf wrote:
> > diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
> > index 30d82cc7300b..76021b9bb445 100644
> > --- a/drivers/spi/spi-imx.c
> > +++ b/drivers/spi/spi-imx.c
> > @@ -1270,9 +1270,22 @@ static int spi_imx_setupxfer(struct spi_device *spi,
> >                 spi_imx->dynamic_burst = 0;
> >         }
> >  
> > -       if (spi_imx_can_dma(spi_imx->controller, spi, t))
> > -               spi_imx->usedma = true;
> > -       else
> > +       if (spi_imx_can_dma(spi_imx->controller, spi, t)) {
> > +               unsigned long hz_per_byte, byte_limit;
> > +
> > +               /*
> > +                * Calculate the estimated time in us the transfer runs. Find
> > +                * the number of Hz per byte per polling limit.
> > +                */
> > +               hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) / polling_limit_us : 0;
> > +               byte_limit = hz_per_byte ? t->effective_speed_hz / hz_per_byte : 1;
> > +
> > +               /* run in polling mode for short transfers */
> > +               if (t->len < byte_limit)
> > +                       spi_imx->usedma = false;
> > +               else
> > +                       spi_imx->usedma = true;
> > +       } else
> >                 spi_imx->usedma = false;
> >  
> >         spi_imx->rx_only = ((t->tx_buf == NULL)
> > @@ -1597,8 +1610,8 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
> >         struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
> >         unsigned long hz_per_byte, byte_limit;
> >  
> > -       spi_imx_setupxfer(spi, transfer);
> >         transfer->effective_speed_hz = spi_imx->spi_bus_clk;
> > +       spi_imx_setupxfer(spi, transfer);
> >  
> >         /* flush rxfifo before transfer */
> >         while (spi_imx->devtype_data->rx_available(spi_imx))
> > 
> 
> Thanks for the patch, but unfortunately this doesn't help. I did some
> more debugging and it looks like there are two problems.

Can you try this one?

diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 30d82cc7300b..d45da1d0ac1d 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -1607,6 +1607,13 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
        if (spi_imx->slave_mode)
                return spi_imx_pio_transfer_slave(spi, transfer);
 
+       /*
+        * If we decided in spi_imx_can_dma() that we want to do a DMA
+        * transfer, the message has already been mapped, so we have
+        * to do the DMA transfer now.
+        */
+       if (spi_imx->usedma)
+               return spi_imx_dma_transfer(spi_imx, transfer);
        /*
         * Calculate the estimated time in us the transfer runs. Find
         * the number of Hz per byte per polling limit.
@@ -1618,9 +1625,6 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
        if (transfer->len < byte_limit)
                return spi_imx_poll_transfer(spi, transfer);
 
-       if (spi_imx->usedma)
-               return spi_imx_dma_transfer(spi_imx, transfer);
-
        return spi_imx_pio_transfer(spi, transfer);
 }

The problem is: we decide on DMA in spi_imx_can_dma() the SPI frameworks
maps the message, and then calls spi_imx_transfer_one(). We cannot
operate with the CPU in the memory mapped to the DMA engine.

This should fix the problem without any additional patches.

regards,
Marc
Frieder Schrempf Nov. 15, 2022, 5:11 p.m. UTC | #15
On 15.11.22 17:52, Marc Kleine-Budde wrote:
> On 15.11.2022 15:46:28, Frieder Schrempf wrote:
>>> diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
>>> index 30d82cc7300b..76021b9bb445 100644
>>> --- a/drivers/spi/spi-imx.c
>>> +++ b/drivers/spi/spi-imx.c
>>> @@ -1270,9 +1270,22 @@ static int spi_imx_setupxfer(struct spi_device *spi,
>>>                 spi_imx->dynamic_burst = 0;
>>>         }
>>>  
>>> -       if (spi_imx_can_dma(spi_imx->controller, spi, t))
>>> -               spi_imx->usedma = true;
>>> -       else
>>> +       if (spi_imx_can_dma(spi_imx->controller, spi, t)) {
>>> +               unsigned long hz_per_byte, byte_limit;
>>> +
>>> +               /*
>>> +                * Calculate the estimated time in us the transfer runs. Find
>>> +                * the number of Hz per byte per polling limit.
>>> +                */
>>> +               hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) / polling_limit_us : 0;
>>> +               byte_limit = hz_per_byte ? t->effective_speed_hz / hz_per_byte : 1;
>>> +
>>> +               /* run in polling mode for short transfers */
>>> +               if (t->len < byte_limit)
>>> +                       spi_imx->usedma = false;
>>> +               else
>>> +                       spi_imx->usedma = true;
>>> +       } else
>>>                 spi_imx->usedma = false;
>>>  
>>>         spi_imx->rx_only = ((t->tx_buf == NULL)
>>> @@ -1597,8 +1610,8 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
>>>         struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
>>>         unsigned long hz_per_byte, byte_limit;
>>>  
>>> -       spi_imx_setupxfer(spi, transfer);
>>>         transfer->effective_speed_hz = spi_imx->spi_bus_clk;
>>> +       spi_imx_setupxfer(spi, transfer);
>>>  
>>>         /* flush rxfifo before transfer */
>>>         while (spi_imx->devtype_data->rx_available(spi_imx))
>>>
>>
>> Thanks for the patch, but unfortunately this doesn't help. I did some
>> more debugging and it looks like there are two problems.
> 
> Can you try this one?
> 
> diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
> index 30d82cc7300b..d45da1d0ac1d 100644
> --- a/drivers/spi/spi-imx.c
> +++ b/drivers/spi/spi-imx.c
> @@ -1607,6 +1607,13 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
>         if (spi_imx->slave_mode)
>                 return spi_imx_pio_transfer_slave(spi, transfer);
>  
> +       /*
> +        * If we decided in spi_imx_can_dma() that we want to do a DMA
> +        * transfer, the message has already been mapped, so we have
> +        * to do the DMA transfer now.
> +        */
> +       if (spi_imx->usedma)
> +               return spi_imx_dma_transfer(spi_imx, transfer);
>         /*
>          * Calculate the estimated time in us the transfer runs. Find
>          * the number of Hz per byte per polling limit.
> @@ -1618,9 +1625,6 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
>         if (transfer->len < byte_limit)
>                 return spi_imx_poll_transfer(spi, transfer);
>  
> -       if (spi_imx->usedma)
> -               return spi_imx_dma_transfer(spi_imx, transfer);
> -
>         return spi_imx_pio_transfer(spi, transfer);
>  }
> 
> The problem is: we decide on DMA in spi_imx_can_dma() the SPI frameworks
> maps the message, and then calls spi_imx_transfer_one(). We cannot
> operate with the CPU in the memory mapped to the DMA engine.
> 
> This should fix the problem without any additional patches.

This does fix the issue. My previous patch had the same result, as
spi_imx_can_dma() returns false if transfer size is below fifo_size. But
of course this is the correct fix with the correct explanation.
Fabio Estevam Nov. 15, 2022, 5:17 p.m. UTC | #16
Hi Marc,

On Tue, Nov 15, 2022 at 1:52 PM Marc Kleine-Budde <mkl@pengutronix.de> wrote:

> Can you try this one?

It works for me too, thanks:

Tested-by: Fabio Estevam <festevam@gmail.com>
Marc Kleine-Budde Nov. 15, 2022, 6:01 p.m. UTC | #17
On 15.11.2022 18:11:07, Frieder Schrempf wrote:
> On 15.11.22 17:52, Marc Kleine-Budde wrote:
> > On 15.11.2022 15:46:28, Frieder Schrempf wrote:
> >>> diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
> >>> index 30d82cc7300b..76021b9bb445 100644
> >>> --- a/drivers/spi/spi-imx.c
> >>> +++ b/drivers/spi/spi-imx.c
> >>> @@ -1270,9 +1270,22 @@ static int spi_imx_setupxfer(struct spi_device *spi,
> >>>                 spi_imx->dynamic_burst = 0;
> >>>         }
> >>>  
> >>> -       if (spi_imx_can_dma(spi_imx->controller, spi, t))
> >>> -               spi_imx->usedma = true;
> >>> -       else
> >>> +       if (spi_imx_can_dma(spi_imx->controller, spi, t)) {
> >>> +               unsigned long hz_per_byte, byte_limit;
> >>> +
> >>> +               /*
> >>> +                * Calculate the estimated time in us the transfer runs. Find
> >>> +                * the number of Hz per byte per polling limit.
> >>> +                */
> >>> +               hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) / polling_limit_us : 0;
> >>> +               byte_limit = hz_per_byte ? t->effective_speed_hz / hz_per_byte : 1;
> >>> +
> >>> +               /* run in polling mode for short transfers */
> >>> +               if (t->len < byte_limit)
> >>> +                       spi_imx->usedma = false;
> >>> +               else
> >>> +                       spi_imx->usedma = true;
> >>> +       } else
> >>>                 spi_imx->usedma = false;
> >>>  
> >>>         spi_imx->rx_only = ((t->tx_buf == NULL)
> >>> @@ -1597,8 +1610,8 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
> >>>         struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
> >>>         unsigned long hz_per_byte, byte_limit;
> >>>  
> >>> -       spi_imx_setupxfer(spi, transfer);
> >>>         transfer->effective_speed_hz = spi_imx->spi_bus_clk;
> >>> +       spi_imx_setupxfer(spi, transfer);
> >>>  
> >>>         /* flush rxfifo before transfer */
> >>>         while (spi_imx->devtype_data->rx_available(spi_imx))
> >>>
> >>
> >> Thanks for the patch, but unfortunately this doesn't help. I did some
> >> more debugging and it looks like there are two problems.
> > 
> > Can you try this one?
> > 
> > diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
> > index 30d82cc7300b..d45da1d0ac1d 100644
> > --- a/drivers/spi/spi-imx.c
> > +++ b/drivers/spi/spi-imx.c
> > @@ -1607,6 +1607,13 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
> >         if (spi_imx->slave_mode)
> >                 return spi_imx_pio_transfer_slave(spi, transfer);
> >  
> > +       /*
> > +        * If we decided in spi_imx_can_dma() that we want to do a DMA
> > +        * transfer, the message has already been mapped, so we have
> > +        * to do the DMA transfer now.
> > +        */
> > +       if (spi_imx->usedma)
> > +               return spi_imx_dma_transfer(spi_imx, transfer);
> >         /*
> >          * Calculate the estimated time in us the transfer runs. Find
> >          * the number of Hz per byte per polling limit.
> > @@ -1618,9 +1625,6 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
> >         if (transfer->len < byte_limit)
> >                 return spi_imx_poll_transfer(spi, transfer);
> >  
> > -       if (spi_imx->usedma)
> > -               return spi_imx_dma_transfer(spi_imx, transfer);
> > -
> >         return spi_imx_pio_transfer(spi, transfer);
> >  }
> > 
> > The problem is: we decide on DMA in spi_imx_can_dma() the SPI frameworks
> > maps the message, and then calls spi_imx_transfer_one(). We cannot
> > operate with the CPU in the memory mapped to the DMA engine.
> > 
> > This should fix the problem without any additional patches.
> 
> This does fix the issue.

\o/

> My previous patch had the same result, as
> spi_imx_can_dma() returns false if transfer size is below fifo_size. But
> of course this is the correct fix with the correct explanation.

Ok, but I don't want to limit polling on <= FIFO sized transfers :)

Marc
diff mbox series

Patch

diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index a4bda03d3a8e..b946d98a8d66 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -31,12 +31,6 @@  static bool use_dma = true;
 module_param(use_dma, bool, 0644);
 MODULE_PARM_DESC(use_dma, "Enable usage of DMA when available (default)");
 
-/* define polling limits */
-static unsigned int polling_limit_us = 30;
-module_param(polling_limit_us, uint, 0664);
-MODULE_PARM_DESC(polling_limit_us,
-		 "time in us to run a transfer in polling mode\n");
-
 #define MXC_RPM_TIMEOUT		2000 /* 2000ms */
 
 #define MXC_CSPIRXDATA		0x00
@@ -1490,54 +1484,6 @@  static int spi_imx_pio_transfer(struct spi_device *spi,
 	return 0;
 }
 
-static int spi_imx_poll_transfer(struct spi_device *spi,
-				 struct spi_transfer *transfer)
-{
-	struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
-	unsigned long timeout;
-
-	spi_imx->tx_buf = transfer->tx_buf;
-	spi_imx->rx_buf = transfer->rx_buf;
-	spi_imx->count = transfer->len;
-	spi_imx->txfifo = 0;
-	spi_imx->remainder = 0;
-
-	/* fill in the fifo before timeout calculations if we are
-	 * interrupted here, then the data is getting transferred by
-	 * the HW while we are interrupted
-	 */
-	spi_imx_push(spi_imx);
-
-	timeout = spi_imx_calculate_timeout(spi_imx, transfer->len) + jiffies;
-	while (spi_imx->txfifo) {
-		/* RX */
-		while (spi_imx->txfifo &&
-		       spi_imx->devtype_data->rx_available(spi_imx)) {
-			spi_imx->rx(spi_imx);
-			spi_imx->txfifo--;
-		}
-
-		/* TX */
-		if (spi_imx->count) {
-			spi_imx_push(spi_imx);
-			continue;
-		}
-
-		if (spi_imx->txfifo &&
-		    time_after(jiffies, timeout)) {
-
-			dev_err_ratelimited(&spi->dev,
-					    "timeout period reached: jiffies: %lu- falling back to interrupt mode\n",
-					    jiffies - timeout);
-
-			/* fall back to interrupt mode */
-			return spi_imx_pio_transfer(spi, transfer);
-		}
-	}
-
-	return 0;
-}
-
 static int spi_imx_pio_transfer_slave(struct spi_device *spi,
 				      struct spi_transfer *transfer)
 {
@@ -1587,7 +1533,6 @@  static int spi_imx_transfer_one(struct spi_controller *controller,
 				struct spi_transfer *transfer)
 {
 	struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
-	unsigned long hz_per_byte, byte_limit;
 
 	spi_imx_setupxfer(spi, transfer);
 	transfer->effective_speed_hz = spi_imx->spi_bus_clk;
@@ -1599,17 +1544,6 @@  static int spi_imx_transfer_one(struct spi_controller *controller,
 	if (spi_imx->slave_mode)
 		return spi_imx_pio_transfer_slave(spi, transfer);
 
-	/*
-	 * Calculate the estimated time in us the transfer runs. Find
-	 * the number of Hz per byte per polling limit.
-	 */
-	hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) / polling_limit_us : 0;
-	byte_limit = hz_per_byte ? transfer->effective_speed_hz / hz_per_byte : 1;
-
-	/* run in polling mode for short transfers */
-	if (transfer->len < byte_limit)
-		return spi_imx_poll_transfer(spi, transfer);
-
 	if (spi_imx->usedma)
 		return spi_imx_dma_transfer(spi_imx, transfer);