diff mbox

fsl_ssi.c: Roberto's problem: ssi hangs after some number of samples

Message ID 563A253C.7080709@tekno-soft.it (mailing list archive)
State New, archived
Headers show

Commit Message

Roberto Fichera Nov. 4, 2015, 3:33 p.m. UTC
On 11/03/2015 10:26 PM, Caleb Crome wrote:
>>> or SDMA have missed the request signals from SSI.
>> This is my current thought. However since the SSI is not operating at so
>> high
>> rate and the Cabel's problem seems going to a solution then I think
>> there is something
>> else I'm missing.
> Is it possible that the event type below in the reference manual
> section 55.10.5 is happening?  It looks like the SDMA script is
> supposed to deal with it.  Perhaps there's a bug in the script?
>
> 55.10.5 External DMA Requests Mirror (SDMACORE_EVENTS)
> NOTE
> This register is very useful in the case of DMA requests that are
> active when a peripheral FIFO level is above the programmed
> watermark. The activation of the DMA request (rising edge) is
> detected by the SDMA logic and it can enable one or several
> channels. One of the channels accesses the peripheral and reads
> or writes a number of data that matches the watermark level
> (for example, if the watermark is four words, the channel reads
> or writes four words).
> If the channel is effectively executed long after the DMA
> request was received, reading or writing the watermark number
> of data may not be sufficient to reset the DMA request (for
> example, if the FIFO watermark is four and at the channel
> execution it already contains nine pieces of data). This means
> no new rising edge may be detected by the SDMA, although
> there still remains transfers to perform. Therefore, if the
> channel were terminated at that time, it would not be restarted,
> causing potential overrun or underrun of the peripheral.
> The proposed mechanism is for the channel to check this
> register after it has performed the "watermark" number of
> accesses to the peripheral. If the bit for the DMA request that
> triggers this channel is set, it means there is still another
> watermark number of data to transfer. This goes on until the bit
> is cleared. The same script can be used for multiple channels
> that require this behavior. The script can determine its channel
> number from the CCR register and infer the corresponding
> DMA request bit to check. It needs a reference table that is
> coherent with the request-channel matrix that the ARM
> platform programmed.
>

Maybe this is the cause! I've explored a bit what's going on with my DMA stall and
I've found that both RX and TX channels are getting an error, at least this is what
the EVTERR register is reporting. See the log below:

root@voneus-domus-imx6sx:~# cat /proc/domus_ssi_stats
SSI TDM Info:
        IPG clk=66000000
        SSI baudclk=8192000
        ssi_phy=0x02028000
        irq=21
        fifo_depth=10
        tdm_frame_rate=8000
        tdm_slots=32 (real 2)
        tdm_word_size=8
        tdm_slots_enabled=00000000000000000000000000000011
        clk_frequency=2048000
        clock_running=yes
        DMA=yes
        Dual FIFO=no
        *RX DMA frame count=36795*
        RX DMA addr=0x9ef0e000
        RX DMA buffer len=16
        *TX DMA frame count=36795*
        TX DMA addr=0x9ee49000
        TX DMA buffer len=16

SSI Registers:
        ssi_scr=0x0000109f
        ssi_sier=0x00500504
        ssi_stcr=0x000002e8
        ssi_srcr=0x00000288
        ssi_stccr=0x00007f01
        ssi_srccr=0x00007f01
        ssi_sfcsr=0x00aaf0aa
        ssi_stmsk=0xfffffffc
        ssi_srmsk=0xfffffffc

SDMA RX channel:
SDMA channel 4 status
        SDMA_H_STATSTOP=0x00000000
        SDMA_H_START=0x00000000
        SDMA_H_EVTOVR=0x00000001
        SDMA_H_EVTPEND=0x0000001a
        SDMA_H_EVTERR=0x00000000
        SDMA_H_DSPOVR=0xffffffff
        SDMA_H_HOSTOVR=0x00000000
        SDMA_H_INTR=0x00000000
        SDMA_H_INTRMSK=0x00000018
        SDMACORE_EVENTS=0x00000000
        SDMACORE_EVENTS2=0x00000000

SDMA EVTERR channel counters:
*        003 = 1**
**        004 = 1*

SDMA TX channel:
SDMA channel 3 status
        SDMA_H_STATSTOP=0x00000000
        SDMA_H_START=0x00000000
        SDMA_H_EVTOVR=0x00000001
        SDMA_H_EVTPEND=0x0000001a
        SDMA_H_EVTERR=0x00000000
        SDMA_H_DSPOVR=0xffffffff
        SDMA_H_HOSTOVR=0x00000000
        SDMA_H_INTR=0x00000000
        SDMA_H_INTRMSK=0x00000018
        SDMACORE_EVENTS=0x00000000
        SDMACORE_EVENTS2=0x00000000

SDMA EVTERR channel counters:
*        003 = 1**
**        004 = 1*

root@voneus-domus-imx6sx:~# cat /proc/interrupts
           CPU0
 16:      39485       GPC  55 Edge      i.MX Timer Tick
 20:       2310       GPC  26 Edge      2020000.serial
 21:          0       GPC  46 Edge      ssi-tdm
 39:          0  gpio-mxc  12 Edge      si3217x-irq
123:          0  gpio-mxc  28 Edge      2194000.usdhc cd
266:          0       GPC  49 Edge      imx_thermal
271:          0       GPC  19 Edge      rtc alarm
*277:      36800       GPC   2 Edge      sdma*
278:          0       GPC  43 Edge      2184000.usb
279:       2994       GPC  23 Edge      mmc0
280:        245       GPC  25 Edge      mmc1
283:          9       GPC 109 Edge      21e4000.qspi
284:          0       GPC  27 Edge      21e8000.serial
287:      50274       GPC  18 Edge      228c000.ecspi
IPI0:          0  CPU wakeup interrupts
IPI1:          0  Timer broadcast interrupts
IPI2:          0  Rescheduling interrupts
IPI3:          0  Function call interrupts
IPI4:          0  Single function call interrupts
IPI5:          0  CPU stop interrupts
IPI6:          1  IRQ work interrupts
IPI7:          0  completion interrupts
Err:          0

This is the relevant part of the dmesg

[  993.283774] dahdi: Version:
[  993.317161] dahdi: Telephony Interface Registered on major 196
[  997.013802] si3217x_audmux_probe: AUDMUX base is 0xa0b10000
*[  999.313971] sdma_disable_channel: Disabling EVTERR for channel 3**
**[  999.320620] sdma_disable_channel: Disabling EVTERR for channel 4*
[  999.326866] si3217x_ssi_probe: SSI base is 0xa0b20000 clock rate is 2048000Hz, TDM Frame rate 8000Hz, channels 32
having 8 bits word length
[ 1002.733512] si3217x_probe: SPI setup mode 3, 8 bits/w, 10000000 Hz max
[ 1002.740962] RX: prepare for the DMA.
*[ 1002.744960] sdma_enable_channel: Enabling EVTERR for channel 4*
[ 1002.752305] TX: prepare for the DMA.
*[ 1002.756111] sdma_enable_channel: Enabling EVTERR for channel 3*
[ 1002.762739] si3217x_ssi_set_clock: BIT_CLK=8192000, IPGCLK=66000000, PM=1
[ 1003.278453] Si3217x: isVerifiedProslic : chan(0) REG PCMTXHI VAL = 00
[ 1003.285624] Si3217x: isVerifiedProslic : Not a VDAA chan(0) REG PCMMODE VAL = 05
[ 1003.306492] SLIC verification OK
[ 1003.310461] SPI ret=0, MSTRSTAT=0x1f
[ 1003.314068]  PCLK_VALID       = 1
[ 1003.317115]  FS_VALID         = 1
[ 1003.319850]  FS_DETECT        = 1
[ 1003.322661]  PLL_LOCK         = 1
[ 1003.325385]  SRAM_CLR         = 1
[ 1003.328181]  PCLK_FAULT       = 0
[ 1003.331083]  FS_FAULT         = 0
[ 1003.333807]  PLL_FAULT        = 0
[ 1003.341485] Si3217x: isVerifiedProslic : chan(0) REG PCMTXHI VAL = 00
[ 1003.349140] Si3217x: isVerifiedProslic : Not a VDAA chan(0) REG PCMMODE VAL = 05
[ 1003.356601] Si3217x: Channel 0 : Type = PROSLIC
[ 1003.362536] Si3217x: isVerifiedProslic : chan(1) REG PCMTXHI VAL = 40
[ 1003.369863] Si3217x: Channel 1 : Type = DAA
[ 1003.374358] si3217x: Channel 0 : Type = 26
[ 1003.378515] si3217x: Channel 0 : Rev  = 1
[ 1003.385623] Si3217x: loading patch: 12102012
[ 1007.624105] Si3217x: Channel 0 : VBAT Up = 62.754 v
[ 1008.556990] Si3217x: PCMStart
[ 1008.560526] Channel 0: FXS model Si32178
[ 1008.567449] Channel 1: FXO model Si32919 rev A
[ 1008.591259] Found: Quadplay FXS/FXO Card

Basically for every DMA channel attached to a SSI peripheral I will enable the corresponding EVTERR bit
for the given channel in order to detect if a DMA overflow condition might happen or not.

With the patch below I'm able to see the error happening. And more likely it happen just just afterwards
the EVTERR notify the problem to the ISR. At this point the DMA simply stalls due to some problems, most
likely because the SSI FIFO is in overflow or underflow condition. I will do add the code to dump the SSI
registers once EVTERR is triggered.

I think that at this point we should in theory restart the DMA channel, but however how to fix this and
why this is happening?

Comments

Roberto Fichera Nov. 4, 2015, 4:53 p.m. UTC | #1
On 11/04/2015 04:33 PM, Roberto Fichera wrote:
> On 11/03/2015 10:26 PM, Caleb Crome wrote:
>>>> or SDMA have missed the request signals from SSI.
>>> This is my current thought. However since the SSI is not operating at so
>>> high
>>> rate and the Cabel's problem seems going to a solution then I think
>>> there is something
>>> else I'm missing.
>> Is it possible that the event type below in the reference manual
>> section 55.10.5 is happening?  It looks like the SDMA script is
>> supposed to deal with it.  Perhaps there's a bug in the script?
>>
>> 55.10.5 External DMA Requests Mirror (SDMACORE_EVENTS)
>> NOTE
>> This register is very useful in the case of DMA requests that are
>> active when a peripheral FIFO level is above the programmed
>> watermark. The activation of the DMA request (rising edge) is
>> detected by the SDMA logic and it can enable one or several
>> channels. One of the channels accesses the peripheral and reads
>> or writes a number of data that matches the watermark level
>> (for example, if the watermark is four words, the channel reads
>> or writes four words).
>> If the channel is effectively executed long after the DMA
>> request was received, reading or writing the watermark number
>> of data may not be sufficient to reset the DMA request (for
>> example, if the FIFO watermark is four and at the channel
>> execution it already contains nine pieces of data). This means
>> no new rising edge may be detected by the SDMA, although
>> there still remains transfers to perform. Therefore, if the
>> channel were terminated at that time, it would not be restarted,
>> causing potential overrun or underrun of the peripheral.
>> The proposed mechanism is for the channel to check this
>> register after it has performed the "watermark" number of
>> accesses to the peripheral. If the bit for the DMA request that
>> triggers this channel is set, it means there is still another
>> watermark number of data to transfer. This goes on until the bit
>> is cleared. The same script can be used for multiple channels
>> that require this behavior. The script can determine its channel
>> number from the CCR register and infer the corresponding
>> DMA request bit to check. It needs a reference table that is
>> coherent with the request-channel matrix that the ARM
>> platform programmed.
>>
> Maybe this is the cause! I've explored a bit what's going on with my DMA stall and
> I've found that both RX and TX channels are getting an error, at least this is what
> the EVTERR register is reporting. See the log below:
>
> root@voneus-domus-imx6sx:~# cat /proc/domus_ssi_stats
> SSI TDM Info:
>         IPG clk=66000000
>         SSI baudclk=8192000
>         ssi_phy=0x02028000
>         irq=21
>         fifo_depth=10
>         tdm_frame_rate=8000
>         tdm_slots=32 (real 2)
>         tdm_word_size=8
>         tdm_slots_enabled=00000000000000000000000000000011
>         clk_frequency=2048000
>         clock_running=yes
>         DMA=yes
>         Dual FIFO=no
>         *RX DMA frame count=36795*
>         RX DMA addr=0x9ef0e000
>         RX DMA buffer len=16
>         *TX DMA frame count=36795*
>         TX DMA addr=0x9ee49000
>         TX DMA buffer len=16
>
> SSI Registers:
>         ssi_scr=0x0000109f
>         ssi_sier=0x00500504
>         ssi_stcr=0x000002e8
>         ssi_srcr=0x00000288
>         ssi_stccr=0x00007f01
>         ssi_srccr=0x00007f01
>         ssi_sfcsr=0x00aaf0aa
>         ssi_stmsk=0xfffffffc
>         ssi_srmsk=0xfffffffc
>
> SDMA RX channel:
> SDMA channel 4 status
>         SDMA_H_STATSTOP=0x00000000
>         SDMA_H_START=0x00000000
>         SDMA_H_EVTOVR=0x00000001
>         SDMA_H_EVTPEND=0x0000001a
>         SDMA_H_EVTERR=0x00000000
>         SDMA_H_DSPOVR=0xffffffff
>         SDMA_H_HOSTOVR=0x00000000
>         SDMA_H_INTR=0x00000000
>         SDMA_H_INTRMSK=0x00000018
>         SDMACORE_EVENTS=0x00000000
>         SDMACORE_EVENTS2=0x00000000
>
> SDMA EVTERR channel counters:
> *        003 = 1**
> **        004 = 1*
>
> SDMA TX channel:
> SDMA channel 3 status
>         SDMA_H_STATSTOP=0x00000000
>         SDMA_H_START=0x00000000
>         SDMA_H_EVTOVR=0x00000001
>         SDMA_H_EVTPEND=0x0000001a
>         SDMA_H_EVTERR=0x00000000
>         SDMA_H_DSPOVR=0xffffffff
>         SDMA_H_HOSTOVR=0x00000000
>         SDMA_H_INTR=0x00000000
>         SDMA_H_INTRMSK=0x00000018
>         SDMACORE_EVENTS=0x00000000
>         SDMACORE_EVENTS2=0x00000000
>
> SDMA EVTERR channel counters:
> *        003 = 1**
> **        004 = 1*
>
> root@voneus-domus-imx6sx:~# cat /proc/interrupts
>            CPU0
>  16:      39485       GPC  55 Edge      i.MX Timer Tick
>  20:       2310       GPC  26 Edge      2020000.serial
>  21:          0       GPC  46 Edge      ssi-tdm
>  39:          0  gpio-mxc  12 Edge      si3217x-irq
> 123:          0  gpio-mxc  28 Edge      2194000.usdhc cd
> 266:          0       GPC  49 Edge      imx_thermal
> 271:          0       GPC  19 Edge      rtc alarm
> *277:      36800       GPC   2 Edge      sdma*
> 278:          0       GPC  43 Edge      2184000.usb
> 279:       2994       GPC  23 Edge      mmc0
> 280:        245       GPC  25 Edge      mmc1
> 283:          9       GPC 109 Edge      21e4000.qspi
> 284:          0       GPC  27 Edge      21e8000.serial
> 287:      50274       GPC  18 Edge      228c000.ecspi
> IPI0:          0  CPU wakeup interrupts
> IPI1:          0  Timer broadcast interrupts
> IPI2:          0  Rescheduling interrupts
> IPI3:          0  Function call interrupts
> IPI4:          0  Single function call interrupts
> IPI5:          0  CPU stop interrupts
> IPI6:          1  IRQ work interrupts
> IPI7:          0  completion interrupts
> Err:          0
>
> This is the relevant part of the dmesg
>
> [  993.283774] dahdi: Version:
> [  993.317161] dahdi: Telephony Interface Registered on major 196
> [  997.013802] si3217x_audmux_probe: AUDMUX base is 0xa0b10000
> *[  999.313971] sdma_disable_channel: Disabling EVTERR for channel 3**
> **[  999.320620] sdma_disable_channel: Disabling EVTERR for channel 4*
> [  999.326866] si3217x_ssi_probe: SSI base is 0xa0b20000 clock rate is 2048000Hz, TDM Frame rate 8000Hz, channels 32
> having 8 bits word length
> [ 1002.733512] si3217x_probe: SPI setup mode 3, 8 bits/w, 10000000 Hz max
> [ 1002.740962] RX: prepare for the DMA.
> *[ 1002.744960] sdma_enable_channel: Enabling EVTERR for channel 4*
> [ 1002.752305] TX: prepare for the DMA.
> *[ 1002.756111] sdma_enable_channel: Enabling EVTERR for channel 3*
> [ 1002.762739] si3217x_ssi_set_clock: BIT_CLK=8192000, IPGCLK=66000000, PM=1
> [ 1003.278453] Si3217x: isVerifiedProslic : chan(0) REG PCMTXHI VAL = 00
> [ 1003.285624] Si3217x: isVerifiedProslic : Not a VDAA chan(0) REG PCMMODE VAL = 05
> [ 1003.306492] SLIC verification OK
> [ 1003.310461] SPI ret=0, MSTRSTAT=0x1f
> [ 1003.314068]  PCLK_VALID       = 1
> [ 1003.317115]  FS_VALID         = 1
> [ 1003.319850]  FS_DETECT        = 1
> [ 1003.322661]  PLL_LOCK         = 1
> [ 1003.325385]  SRAM_CLR         = 1
> [ 1003.328181]  PCLK_FAULT       = 0
> [ 1003.331083]  FS_FAULT         = 0
> [ 1003.333807]  PLL_FAULT        = 0
> [ 1003.341485] Si3217x: isVerifiedProslic : chan(0) REG PCMTXHI VAL = 00
> [ 1003.349140] Si3217x: isVerifiedProslic : Not a VDAA chan(0) REG PCMMODE VAL = 05
> [ 1003.356601] Si3217x: Channel 0 : Type = PROSLIC
> [ 1003.362536] Si3217x: isVerifiedProslic : chan(1) REG PCMTXHI VAL = 40
> [ 1003.369863] Si3217x: Channel 1 : Type = DAA
> [ 1003.374358] si3217x: Channel 0 : Type = 26
> [ 1003.378515] si3217x: Channel 0 : Rev  = 1
> [ 1003.385623] Si3217x: loading patch: 12102012
> [ 1007.624105] Si3217x: Channel 0 : VBAT Up = 62.754 v
> [ 1008.556990] Si3217x: PCMStart
> [ 1008.560526] Channel 0: FXS model Si32178
> [ 1008.567449] Channel 1: FXO model Si32919 rev A
> [ 1008.591259] Found: Quadplay FXS/FXO Card
>
> Basically for every DMA channel attached to a SSI peripheral I will enable the corresponding EVTERR bit
> for the given channel in order to detect if a DMA overflow condition might happen or not.
>
> With the patch below I'm able to see the error happening. And more likely it happen just just afterwards
> the EVTERR notify the problem to the ISR. At this point the DMA simply stalls due to some problems, most
> likely because the SSI FIFO is in overflow or underflow condition. I will do add the code to dump the SSI
> registers once EVTERR is triggered.

After apply the changes to dump the SSI register once the ISR is triggered by EVTERR the situation is
the following:

[   57.426204] SSI Registers:
[   57.428955]  ssi_scr=0x0000109f
[   57.432117]  ssi_sier=0x00500504
[   57.435361]  ssi_stcr=0x000002e8
[   57.438603]  ssi_srcr=0x00000288
[   57.441845]  ssi_stccr=0x00007f01
[   57.445175]  ssi_srccr=0x00007f01
[   57.448504]  ssi_sfcsr=0x00aaf0aa
[   57.451833]  ssi_stmsk=0xfffffffc
[   57.455164]  ssi_srmsk=0xfffffffc

Both TX and RX FIFO watermarks are set to maxburst + 2 = 10 in this run,
the SFCSR register reports TX FIFO empty and RX FIFO full and SIER is asserting
the related flags associated to this condition.

So why this is happening and the DMA transfer is not triggered? Maybe trying to increase
the DMA priority might solve this problem?
Caleb Crome Nov. 4, 2015, 5:41 p.m. UTC | #2
On Wed, Nov 4, 2015 at 8:53 AM, Roberto Fichera <kernel@tekno-soft.it> wrote:
> On 11/04/2015 04:33 PM, Roberto Fichera wrote:
>>         *RX DMA frame count=36795*
>>         RX DMA addr=0x9ef0e000
>>         RX DMA buffer len=16
>>         *TX DMA frame count=36795*
>>         TX DMA addr=0x9ee49000
>>         TX DMA buffer len=16


I notice your DMA buffer length is very short, which would require
very frequency interrupts to service, right?   Can you increase your
DMA buffer length by increasing the period size?  Maybe that would
help reduce the likelyhood of a failure.  Although it would be good to
be able to recover from a failure gracefully.
-Caleb
Roberto Fichera Nov. 4, 2015, 5:52 p.m. UTC | #3
On 11/04/2015 06:41 PM, Caleb Crome wrote:
> On Wed, Nov 4, 2015 at 8:53 AM, Roberto Fichera <kernel@tekno-soft.it> wrote:
>> On 11/04/2015 04:33 PM, Roberto Fichera wrote:
>>>         *RX DMA frame count=36795*
>>>         RX DMA addr=0x9ef0e000
>>>         RX DMA buffer len=16
>>>         *TX DMA frame count=36795*
>>>         TX DMA addr=0x9ee49000
>>>         TX DMA buffer len=16
>
> I notice your DMA buffer length is very short, which would require
> very frequency interrupts to service, right?   Can you increase your
> DMA buffer length by increasing the period size?  Maybe that would
> help reduce the likelyhood of a failure.  

Not sure if this can help! SSI TDM is set to 32 slots but only 2 are masked. So every frame only
2 slots are pushed/pulled from the FIFOs. maxburst is set to pull a block of 8 elements from the
FIFOs, DMA cyclic call it period. So the full DMA buffer is filled in buffer size / period DMA iterations.

> Although it would be good to
> be able to recover from a failure gracefully.

Yep! I will try to restart the DMA channels from a failure to see if this might work or not.

> -Caleb
> _______________________________________________
> Alsa-devel mailing list
> Alsa-devel@alsa-project.org
> http://mailman.alsa-project.org/mailman/listinfo/alsa-devel
>
Nicolin Chen Nov. 4, 2015, 5:58 p.m. UTC | #4
On Wed, Nov 04, 2015 at 04:33:16PM +0100, Roberto Fichera wrote:
 
> With the patch below I'm able to see the error happening. And more likely it happen just just afterwards
> the EVTERR notify the problem to the ISR. At this point the DMA simply stalls due to some problems, most
> likely because the SSI FIFO is in overflow or underflow condition. I will do add the code to dump the SSI

No, SSI FIFO under/overflow were caused by the DMA stall as their
channels got error out -- SSI was still consuming the TX FIFO and
filling the RX FIFO while DMA didn't move the data at all so SSI
FIFOs got under/overflowed.

> I think that at this point we should in theory restart the DMA channel, but however how to fix this and
> why this is happening?

According to Reference Manual:
----

1) The CHNERR[i] bit is set when a DMA request that triggers channel
i is received through the corresponding input pins and the EP[i]
bit is already set;

2) Externally triggered channel pending flag EP[i] is set by the
scheduler when the channel was activated by a DMA request. It can
be cleared by the ith channel script.

----

It looks like your system made another DMA request while the SDMA
was still in the middle of the transaction for the same channel.
I guess you should find a way to make less frequent DMA requests,
making higher watermarks and larger burst size for example.
Roberto Fichera Nov. 4, 2015, 6:09 p.m. UTC | #5
On 11/04/2015 06:58 PM, Nicolin Chen wrote:
> On Wed, Nov 04, 2015 at 04:33:16PM +0100, Roberto Fichera wrote:
>  
>> With the patch below I'm able to see the error happening. And more likely it happen just just afterwards
>> the EVTERR notify the problem to the ISR. At this point the DMA simply stalls due to some problems, most
>> likely because the SSI FIFO is in overflow or underflow condition. I will do add the code to dump the SSI
> No, SSI FIFO under/overflow were caused by the DMA stall as their
> channels got error out -- SSI was still consuming the TX FIFO and
> filling the RX FIFO while DMA didn't move the data at all so SSI
> FIFOs got under/overflowed.

Yeah! The SSI is correctly reporting this problem because DMA is stalled for some reason.

>
>> I think that at this point we should in theory restart the DMA channel, but however how to fix this and
>> why this is happening?
> According to Reference Manual:
> ----
>
> 1) The CHNERR[i] bit is set when a DMA request that triggers channel
> i is received through the corresponding input pins and the EP[i]
> bit is already set;
>
> 2) Externally triggered channel pending flag EP[i] is set by the
> scheduler when the channel was activated by a DMA request. It can
> be cleared by the ith channel script.
>
> ----
>
> It looks like your system made another DMA request while the SDMA
> was still in the middle of the transaction for the same channel.

I don't know because this is something I cannot control. I'm just setting up 2 cyclic dma
for both TX and RX before to set both RDMAE and TDMAE and nothing else. The rest
is done by the SDMA ISR.

> I guess you should find a way to make less frequent DMA requests,
> making higher watermarks and larger burst size for example.
>
I can try to increase the FIFO watermark up to 15 elements, but after that
I don't have more choices. The problem is that the TDM has to run continuously
because the SLIC need it. I can eventually enable and disable the DMA requests
depending by the active channels for optimization, but the problem still,why the
DMA transfer is not triggered?
Nicolin Chen Nov. 4, 2015, 6:11 p.m. UTC | #6
On Wed, Nov 04, 2015 at 06:52:12PM +0100, Roberto Fichera wrote:

> > I notice your DMA buffer length is very short, which would require
> > very frequency interrupts to service, right?   Can you increase your
> > DMA buffer length by increasing the period size?  Maybe that would
> > help reduce the likelyhood of a failure.  
> 
> Not sure if this can help! SSI TDM is set to 32 slots but only 2 are masked. So every frame only
> 2 slots are pushed/pulled from the FIFOs. maxburst is set to pull a block of 8 elements from the
> FIFOs, DMA cyclic call it period. So the full DMA buffer is filled in buffer size / period DMA iterations.

A smaller period size may course DMA buffer over/underrun -- ALSA
over/underrun even though it seems that you didn't report any.

And apparently SDMA would get more interrupt/callbacks. I would
suggest a larger period size and buffer size although I cannot
tie your problem with the size. But you may try.
Nicolin Chen Nov. 4, 2015, 6:18 p.m. UTC | #7
On Wed, Nov 04, 2015 at 07:09:14PM +0100, Roberto Fichera wrote:

> I can try to increase the FIFO watermark up to 15 elements, but after that
> I don't have more choices. The problem is that the TDM has to run continuously
> because the SLIC need it. I can eventually enable and disable the DMA requests
> depending by the active channels for optimization, but the problem still,why the
> DMA transfer is not triggered?
 
There's nothing you can do from SSI side. If you don't clear the
Channel Error bit, the channel might be still hanging there until
you clear the bit or reset the channel. You may read SDMA chapter
for details.
Roberto Fichera Nov. 4, 2015, 9:47 p.m. UTC | #8
On 11/04/2015 07:11 PM, Nicolin Chen wrote:
> On Wed, Nov 04, 2015 at 06:52:12PM +0100, Roberto Fichera wrote:
>
>>> I notice your DMA buffer length is very short, which would require
>>> very frequency interrupts to service, right?   Can you increase your
>>> DMA buffer length by increasing the period size?  Maybe that would
>>> help reduce the likelyhood of a failure.  
>> Not sure if this can help! SSI TDM is set to 32 slots but only 2 are masked. So every frame only
>> 2 slots are pushed/pulled from the FIFOs. maxburst is set to pull a block of 8 elements from the
>> FIFOs, DMA cyclic call it period. So the full DMA buffer is filled in buffer size / period DMA iterations.
> A smaller period size may course DMA buffer over/underrun -- ALSA
> over/underrun even though it seems that you didn't report any.
>
> And apparently SDMA would get more interrupt/callbacks. I would
> suggest a larger period size and buffer size although I cannot
> tie your problem with the size. But you may try.

Do you mean something like 2KB buffer and a period of 64 or 128 frames?
SDMA is supposed to wait the FIFO capacity
to complete a period, right?
Roberto Fichera Nov. 4, 2015, 9:48 p.m. UTC | #9
On 11/04/2015 07:18 PM, Nicolin Chen wrote:
> On Wed, Nov 04, 2015 at 07:09:14PM +0100, Roberto Fichera wrote:
>
>> I can try to increase the FIFO watermark up to 15 elements, but after that
>> I don't have more choices. The problem is that the TDM has to run continuously
>> because the SLIC need it. I can eventually enable and disable the DMA requests
>> depending by the active channels for optimization, but the problem still,why the
>> DMA transfer is not triggered?
>  
> There's nothing you can do from SSI side. If you don't clear the
> Channel Error bit, the channel might be still hanging there until
> you clear the bit or reset the channel. You may read SDMA chapter
> for details.

Ok! Will have a look tomorrow morning.
Roberto Fichera Nov. 5, 2015, 10:03 a.m. UTC | #10
On 11/04/2015 07:11 PM, Nicolin Chen wrote:
> On Wed, Nov 04, 2015 at 06:52:12PM +0100, Roberto Fichera wrote:
>
>>> I notice your DMA buffer length is very short, which would require
>>> very frequency interrupts to service, right?   Can you increase your
>>> DMA buffer length by increasing the period size?  Maybe that would
>>> help reduce the likelyhood of a failure.  
>> Not sure if this can help! SSI TDM is set to 32 slots but only 2 are masked. So every frame only
>> 2 slots are pushed/pulled from the FIFOs. maxburst is set to pull a block of 8 elements from the
>> FIFOs, DMA cyclic call it period. So the full DMA buffer is filled in buffer size / period DMA iterations.
> A smaller period size may course DMA buffer over/underrun -- ALSA
> over/underrun even though it seems that you didn't report any.
>
> And apparently SDMA would get more interrupt/callbacks. I would
> suggest a larger period size and buffer size although I cannot
> tie your problem with the size. But you may try.
>

Following your suggestion, I've increased the buffer size to 2K and set the period to fifo_length - 2 (13),
with that I'm now running substantially smooth except 3 EVTERR on RX DMA over 4 million of interrupts.

Thanks Nicolin! I'm quite happy now!
Fabio Estevam Nov. 5, 2015, 11:30 a.m. UTC | #11
On Thu, Nov 5, 2015 at 8:03 AM, Roberto Fichera <kernel@tekno-soft.it> wrote:

> Following your suggestion, I've increased the buffer size to 2K and set the period to fifo_length - 2 (13),
> with that I'm now running substantially smooth except 3 EVTERR on RX DMA over 4 million of interrupts.
>
> Thanks Nicolin! I'm quite happy now!

That's good progress, Roberto.

It would be nice if you and Caleb could post the patches to the mailing list.
Roberto Fichera Nov. 5, 2015, 11:48 a.m. UTC | #12
On 11/05/2015 12:30 PM, Fabio Estevam wrote:
> On Thu, Nov 5, 2015 at 8:03 AM, Roberto Fichera <kernel@tekno-soft.it> wrote:
>
>> Following your suggestion, I've increased the buffer size to 2K and set the period to fifo_length - 2 (13),
>> with that I'm now running substantially smooth except 3 EVTERR on RX DMA over 4 million of interrupts.
>>
>> Thanks Nicolin! I'm quite happy now!
> That's good progress, Roberto.
>
> It would be nice if you and Caleb could post the patches to the mailing list.
>

Indeed! Now the TDM is stable, I've also found the reason of the EVTERRs, which was related to some stale
code I've used to enable and disable both RDMAE and TDMAE bits to try to reset the transfers.
Once removed that code everything is looks ok now.

Regarding patches, well, from my side there isn't nothing special compared to the original fsl_ssi.c code.
I'm basically running against a very skinny fsl_ssi.c version, I've just setup a bit larger DMA buffer, from
16bytes to 2K, and now reduced the DMA period to 8 because I'm mostly comfortable with that size to simplify
sampling exchange against DAHDI subsystem within my DMA callbacks.

In a few words, my problem was related due to a DMA buffer too small.

What eventually might be interesting to have is the INTRMASK and EVTERR DMA setting to trigger DMA
related errors, but I guess this need to be discussed elsewhere.
diff mbox

Patch

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 9d375bc..b9faf2d 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -50,6 +50,8 @@ 
 
 /* SDMA registers */
 #define SDMA_H_C0PTR        0x000
+#define SDMACORE_EVENTS        0x005
+#define SDMACORE_EVENTS2    0x01f
 #define SDMA_H_INTR        0x004
 #define SDMA_H_STATSTOP        0x008
 #define SDMA_H_START        0x00c
@@ -385,6 +387,7 @@  struct sdma_engine {
     const struct sdma_driver_data    *drvdata;
     u32                spba_start_addr;
     u32                spba_end_addr;
+        u32                             evterrchannel[MAX_DMA_CHANNELS];
 };
 
 static struct sdma_driver_data sdma_imx31 = {
@@ -562,7 +565,17 @@  static int sdma_config_ownership(struct sdma_channel *sdmac,
 
 static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
 {
+        struct sdma_channel *sdmac = &sdma->channel[channel];
+
     writel(BIT(channel), sdma->regs + SDMA_H_START);
+
+        if ( sdmac->peripheral_type == IMX_DMATYPE_SSI_SP )
+        {
+            u32 msk = readl(sdma->regs + SDMA_H_INTRMSK);
+
+            writel(msk | BIT(channel), sdma->regs + SDMA_H_INTRMSK);
+            printk("%s: Enabling EVTERR for channel %d\n", __func__, channel);
+        }
 }
 
 /*
@@ -725,12 +738,26 @@  static irqreturn_t sdma_int_handler(int irq, void *dev_id)
 {
     struct sdma_engine *sdma = dev_id;
     unsigned long stat;
+        u32 evterr;
+
+        /* read the EVTERR register */
+        evterr = readl_relaxed(sdma->regs + SDMA_H_EVTERR);
+        if ( evterr )
+        {
+           int bitnr;
+
+           for_each_set_bit(bitnr, &evterr, sizeof(u32) * BITS_PER_BYTE)
+                sdma->evterrchannel[bitnr]++;
+        }
 
     stat = readl_relaxed(sdma->regs + SDMA_H_INTR);
     /* not interested in channel 0 interrupts */
     stat &= ~1;
     writel_relaxed(stat, sdma->regs + SDMA_H_INTR);
 
+        /* we are interested only to channels not in error status */
+        stat &= ~evterr;
+
     while (stat) {
         int channel = fls(stat) - 1;
         struct sdma_channel *sdmac = &sdma->channel[channel];
@@ -908,6 +935,14 @@  static int sdma_disable_channel(struct dma_chan *chan)
     writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP);
     sdmac->status = DMA_ERROR;
 
+        if ( sdmac->peripheral_type == IMX_DMATYPE_SSI_SP )
+        {
+            u32 msk = readl(sdma->regs + SDMA_H_INTRMSK);
+
+            writel(msk & ~(BIT(channel)), sdma->regs + SDMA_H_INTRMSK);
+            printk("%s: Disabling EVTERR for channel %d\n", __func__, channel);
+        }
+
     return 0;
 }
 
@@ -1650,6 +1685,44 @@  static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec,
     return dma_request_channel(mask, sdma_filter_fn, &data);
 }
 
+#include <linux/seq_file.h>
+
+#define SDMA_SHOW_REG(reg) \
+    do { \
+                u32 _val = readl(sdma->regs + reg); \
+        seq_printf(s, "\t" #reg "=0x%08lx\n", _val); \
+    } while (0)
+
+void sdma_show_chan_status(struct seq_file *s, struct dma_chan *chan)
+{
+    struct sdma_channel *sdmac = to_sdma_chan(chan);
+    struct sdma_engine *sdma = sdmac->sdma;
+        int i;
+
+        seq_printf(s, "SDMA channel %d status\n", sdmac->channel);
+        SDMA_SHOW_REG(SDMA_H_STATSTOP);
+        SDMA_SHOW_REG(SDMA_H_START);
+        SDMA_SHOW_REG(SDMA_H_EVTOVR);
+        SDMA_SHOW_REG(SDMA_H_EVTPEND);
+        SDMA_SHOW_REG(SDMA_H_EVTERR);
+        SDMA_SHOW_REG(SDMA_H_DSPOVR);
+        SDMA_SHOW_REG(SDMA_H_HOSTOVR);
+        SDMA_SHOW_REG(SDMA_H_INTR);
+        SDMA_SHOW_REG(SDMA_H_INTRMSK);
+        SDMA_SHOW_REG(SDMACORE_EVENTS);
+        SDMA_SHOW_REG(SDMACORE_EVENTS2);
+
+        seq_printf(s, "\nSDMA EVTERR channel counters:\n");
+        for(i=0; i < MAX_DMA_CHANNELS; i++)
+        {
+            if (sdma->evterrchannel[i])
+            {
+                seq_printf(s, "\t%03d = %lu\n", i, sdma->evterrchannel[i]);
+            }
+        }
+}
+EXPORT_SYMBOL_GPL(sdma_show_chan_status);
+
 static int sdma_probe(struct platform_device *pdev)
 {
     const struct of_device_id *of_id =