diff mbox series

[v2,1/2] dmaengine: xilinx: dpdma: Fix race condition in vsync IRQ

Message ID 20240228042124.3074044-2-vishal.sagar@amd.com (mailing list archive)
State Changes Requested
Headers show
Series Xilinx DPDMA fixes and cyclic dma mode support | expand

Commit Message

Sagar, Vishal Feb. 28, 2024, 4:21 a.m. UTC
From: Neel Gandhi <neel.gandhi@xilinx.com>

The vchan_next_desc() function, called from
xilinx_dpdma_chan_queue_transfer(), must be called with
virt_dma_chan.lock held. This isn't correctly handled in all code paths,
resulting in a race condition between the .device_issue_pending()
handler and the IRQ handler which causes DMA to randomly stop. Fix it by
taking the lock around xilinx_dpdma_chan_queue_transfer() calls that are
missing it.

Signed-off-by: Neel Gandhi <neel.gandhi@amd.com>
Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Vishal Sagar <vishal.sagar@amd.com>

Link: https://lore.kernel.org/all/20220122121407.11467-1-neel.gandhi@xilinx.com
---
 drivers/dma/xilinx/xilinx_dpdma.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

Comments

Tomi Valkeinen March 12, 2024, 8:33 a.m. UTC | #1
Hi,

On 28/02/2024 06:21, Vishal Sagar wrote:
> From: Neel Gandhi <neel.gandhi@xilinx.com>
> 
> The vchan_next_desc() function, called from
> xilinx_dpdma_chan_queue_transfer(), must be called with
> virt_dma_chan.lock held. This isn't correctly handled in all code paths,
> resulting in a race condition between the .device_issue_pending()
> handler and the IRQ handler which causes DMA to randomly stop. Fix it by
> taking the lock around xilinx_dpdma_chan_queue_transfer() calls that are
> missing it.
> 
> Signed-off-by: Neel Gandhi <neel.gandhi@amd.com>
> Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
> Signed-off-by: Vishal Sagar <vishal.sagar@amd.com>
> 
> Link: https://lore.kernel.org/all/20220122121407.11467-1-neel.gandhi@xilinx.com
> ---
>   drivers/dma/xilinx/xilinx_dpdma.c | 10 ++++++++--
>   1 file changed, 8 insertions(+), 2 deletions(-)

This fixes a lockdep warning:

WARNING: CPU: 1 PID: 466 at drivers/dma/xilinx/xilinx_dpdma.c:834

Afaics, this issue has been around since the initial commit, in v5.10, 
and the fix applies on top of v5.10. I have tested this on v6.2, which 
is where the DP support was added to the board I have.

So I think you can add:

Fixes: 7cbb0c63de3f ("dmaengine: xilinx: dpdma: Add the Xilinx 
DisplayPort DMA engine driver")

  Tomi

> diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
> index b82815e64d24..28d9af8f00f0 100644
> --- a/drivers/dma/xilinx/xilinx_dpdma.c
> +++ b/drivers/dma/xilinx/xilinx_dpdma.c
> @@ -1097,12 +1097,14 @@ static void xilinx_dpdma_chan_vsync_irq(struct  xilinx_dpdma_chan *chan)
>   	 * Complete the active descriptor, if any, promote the pending
>   	 * descriptor to active, and queue the next transfer, if any.
>   	 */
> +	spin_lock(&chan->vchan.lock);
>   	if (chan->desc.active)
>   		vchan_cookie_complete(&chan->desc.active->vdesc);
>   	chan->desc.active = pending;
>   	chan->desc.pending = NULL;
>   
>   	xilinx_dpdma_chan_queue_transfer(chan);
> +	spin_unlock(&chan->vchan.lock);
>   
>   out:
>   	spin_unlock_irqrestore(&chan->lock, flags);
> @@ -1264,10 +1266,12 @@ static void xilinx_dpdma_issue_pending(struct dma_chan *dchan)
>   	struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
>   	unsigned long flags;
>   
> -	spin_lock_irqsave(&chan->vchan.lock, flags);
> +	spin_lock_irqsave(&chan->lock, flags);
> +	spin_lock(&chan->vchan.lock);
>   	if (vchan_issue_pending(&chan->vchan))
>   		xilinx_dpdma_chan_queue_transfer(chan);
> -	spin_unlock_irqrestore(&chan->vchan.lock, flags);
> +	spin_unlock(&chan->vchan.lock);
> +	spin_unlock_irqrestore(&chan->lock, flags);
>   }
>   
>   static int xilinx_dpdma_config(struct dma_chan *dchan,
> @@ -1495,7 +1499,9 @@ static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t)
>   		    XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id);
>   
>   	spin_lock_irqsave(&chan->lock, flags);
> +	spin_lock(&chan->vchan.lock);
>   	xilinx_dpdma_chan_queue_transfer(chan);
> +	spin_unlock(&chan->vchan.lock);
>   	spin_unlock_irqrestore(&chan->lock, flags);
>   }
>
Sagar, Vishal March 12, 2024, 3:56 p.m. UTC | #2
[AMD Official Use Only - General]

Hi Tomi,

> -----Original Message-----
> From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
> Sent: Tuesday, March 12, 2024 1:34 AM
> To: Sagar, Vishal <vishal.sagar@amd.com>;
> laurent.pinchart@ideasonboard.com; vkoul@kernel.org
> Cc: Simek, Michal <michal.simek@amd.com>; dmaengine@vger.kernel.org;
> linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org;
> Allagadapa, Varunkumar <varunkumar.allagadapa@amd.com>
> Subject: Re: [PATCH v2 1/2] dmaengine: xilinx: dpdma: Fix race condition in
> vsync IRQ
>
> Hi,
>
> On 28/02/2024 06:21, Vishal Sagar wrote:
> > From: Neel Gandhi <neel.gandhi@xilinx.com>
> >
> > The vchan_next_desc() function, called from
> > xilinx_dpdma_chan_queue_transfer(), must be called with
> > virt_dma_chan.lock held. This isn't correctly handled in all code paths,
> > resulting in a race condition between the .device_issue_pending()
> > handler and the IRQ handler which causes DMA to randomly stop. Fix it by
> > taking the lock around xilinx_dpdma_chan_queue_transfer() calls that are
> > missing it.
> >
> > Signed-off-by: Neel Gandhi <neel.gandhi@amd.com>
> > Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
> > Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
> > Signed-off-by: Vishal Sagar <vishal.sagar@amd.com>
> >
> > Link: https://lore.kernel.org/all/20220122121407.11467-1-
> neel.gandhi@xilinx.com
> > ---
> >   drivers/dma/xilinx/xilinx_dpdma.c | 10 ++++++++--
> >   1 file changed, 8 insertions(+), 2 deletions(-)
>
> This fixes a lockdep warning:
>
> WARNING: CPU: 1 PID: 466 at drivers/dma/xilinx/xilinx_dpdma.c:834
>
> Afaics, this issue has been around since the initial commit, in v5.10,
> and the fix applies on top of v5.10. I have tested this on v6.2, which
> is where the DP support was added to the board I have.
>
> So I think you can add:
>
> Fixes: 7cbb0c63de3f ("dmaengine: xilinx: dpdma: Add the Xilinx
> DisplayPort DMA engine driver")
>
>   Tomi
>

<snip>

Thanks for going through the patch.
I will add this to the commit message and resend v3.
I am still waiting for more reviews to happen.

Regards
Vishal Sagar
Sean Anderson March 12, 2024, 5:46 p.m. UTC | #3
Hi Vishal,

On 2/27/24 23:21, Vishal Sagar wrote:
> From: Neel Gandhi <neel.gandhi@xilinx.com>
> 
> The vchan_next_desc() function, called from
> xilinx_dpdma_chan_queue_transfer(), must be called with
> virt_dma_chan.lock held. This isn't correctly handled in all code paths,
> resulting in a race condition between the .device_issue_pending()
> handler and the IRQ handler which causes DMA to randomly stop. Fix it by
> taking the lock around xilinx_dpdma_chan_queue_transfer() calls that are
> missing it.
> 
> Signed-off-by: Neel Gandhi <neel.gandhi@amd.com>
> Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
> Signed-off-by: Vishal Sagar <vishal.sagar@amd.com>
> 
> Link: https://cas5-0-urlprotect.trendmicro.com:443/wis/clicktime/v1/query?url=https%3a%2f%2flore.kernel.org%2fall%2f20220122121407.11467%2d1%2dneel.gandhi%40xilinx.com&umid=a486940f-2fe3-47f4-9b3f-416e59036eab&auth=d807158c60b7d2502abde8a2fc01f40662980862-a75e22540e8429d70f26093b45d38995a0e6e1e8
> ---
>  drivers/dma/xilinx/xilinx_dpdma.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
> index b82815e64d24..28d9af8f00f0 100644
> --- a/drivers/dma/xilinx/xilinx_dpdma.c
> +++ b/drivers/dma/xilinx/xilinx_dpdma.c
> @@ -1097,12 +1097,14 @@ static void xilinx_dpdma_chan_vsync_irq(struct  xilinx_dpdma_chan *chan)
>          * Complete the active descriptor, if any, promote the pending
>          * descriptor to active, and queue the next transfer, if any.
>          */
> +       spin_lock(&chan->vchan.lock);
>         if (chan->desc.active)
>                 vchan_cookie_complete(&chan->desc.active->vdesc);
>         chan->desc.active = pending;
>         chan->desc.pending = NULL;
> 
>         xilinx_dpdma_chan_queue_transfer(chan);
> +       spin_unlock(&chan->vchan.lock);
> 
>  out:
>         spin_unlock_irqrestore(&chan->lock, flags);
> @@ -1264,10 +1266,12 @@ static void xilinx_dpdma_issue_pending(struct dma_chan *dchan)
>         struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
>         unsigned long flags;
> 
> -       spin_lock_irqsave(&chan->vchan.lock, flags);
> +       spin_lock_irqsave(&chan->lock, flags);
> +       spin_lock(&chan->vchan.lock);
>         if (vchan_issue_pending(&chan->vchan))
>                 xilinx_dpdma_chan_queue_transfer(chan);
> -       spin_unlock_irqrestore(&chan->vchan.lock, flags);
> +       spin_unlock(&chan->vchan.lock);
> +       spin_unlock_irqrestore(&chan->lock, flags);
>  }
> 
>  static int xilinx_dpdma_config(struct dma_chan *dchan,
> @@ -1495,7 +1499,9 @@ static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t)
>                     XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id);
> 
>         spin_lock_irqsave(&chan->lock, flags);
> +       spin_lock(&chan->vchan.lock);
>         xilinx_dpdma_chan_queue_transfer(chan);
> +       spin_unlock(&chan->vchan.lock);
>         spin_unlock_irqrestore(&chan->lock, flags);
>  }

I also ran into this issue and came up with the same fix [1].

Reviewed-by: Sean Anderson <sean.anderson@linux.dev>

[1] https://lore.kernel.org/dmaengine/20240308210034.3634938-2-sean.anderson@linux.dev/
Tomi Valkeinen March 27, 2024, 12:32 p.m. UTC | #4
On 28/02/2024 06:21, Vishal Sagar wrote:
> From: Neel Gandhi <neel.gandhi@xilinx.com>
> 
> The vchan_next_desc() function, called from
> xilinx_dpdma_chan_queue_transfer(), must be called with
> virt_dma_chan.lock held. This isn't correctly handled in all code paths,
> resulting in a race condition between the .device_issue_pending()
> handler and the IRQ handler which causes DMA to randomly stop. Fix it by
> taking the lock around xilinx_dpdma_chan_queue_transfer() calls that are
> missing it.
> 
> Signed-off-by: Neel Gandhi <neel.gandhi@amd.com>
> Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
> Signed-off-by: Vishal Sagar <vishal.sagar@amd.com>

Sean posted almost identical, but very slightly better patch, for this, 
so I think we can pick that one instead.

  Tomi

> 
> Link: https://lore.kernel.org/all/20220122121407.11467-1-neel.gandhi@xilinx.com
> ---
>   drivers/dma/xilinx/xilinx_dpdma.c | 10 ++++++++--
>   1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
> index b82815e64d24..28d9af8f00f0 100644
> --- a/drivers/dma/xilinx/xilinx_dpdma.c
> +++ b/drivers/dma/xilinx/xilinx_dpdma.c
> @@ -1097,12 +1097,14 @@ static void xilinx_dpdma_chan_vsync_irq(struct  xilinx_dpdma_chan *chan)
>   	 * Complete the active descriptor, if any, promote the pending
>   	 * descriptor to active, and queue the next transfer, if any.
>   	 */
> +	spin_lock(&chan->vchan.lock);
>   	if (chan->desc.active)
>   		vchan_cookie_complete(&chan->desc.active->vdesc);
>   	chan->desc.active = pending;
>   	chan->desc.pending = NULL;
>   
>   	xilinx_dpdma_chan_queue_transfer(chan);
> +	spin_unlock(&chan->vchan.lock);
>   
>   out:
>   	spin_unlock_irqrestore(&chan->lock, flags);
> @@ -1264,10 +1266,12 @@ static void xilinx_dpdma_issue_pending(struct dma_chan *dchan)
>   	struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
>   	unsigned long flags;
>   
> -	spin_lock_irqsave(&chan->vchan.lock, flags);
> +	spin_lock_irqsave(&chan->lock, flags);
> +	spin_lock(&chan->vchan.lock);
>   	if (vchan_issue_pending(&chan->vchan))
>   		xilinx_dpdma_chan_queue_transfer(chan);
> -	spin_unlock_irqrestore(&chan->vchan.lock, flags);
> +	spin_unlock(&chan->vchan.lock);
> +	spin_unlock_irqrestore(&chan->lock, flags);
>   }
>   
>   static int xilinx_dpdma_config(struct dma_chan *dchan,
> @@ -1495,7 +1499,9 @@ static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t)
>   		    XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id);
>   
>   	spin_lock_irqsave(&chan->lock, flags);
> +	spin_lock(&chan->vchan.lock);
>   	xilinx_dpdma_chan_queue_transfer(chan);
> +	spin_unlock(&chan->vchan.lock);
>   	spin_unlock_irqrestore(&chan->lock, flags);
>   }
>
diff mbox series

Patch

diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
index b82815e64d24..28d9af8f00f0 100644
--- a/drivers/dma/xilinx/xilinx_dpdma.c
+++ b/drivers/dma/xilinx/xilinx_dpdma.c
@@ -1097,12 +1097,14 @@  static void xilinx_dpdma_chan_vsync_irq(struct  xilinx_dpdma_chan *chan)
 	 * Complete the active descriptor, if any, promote the pending
 	 * descriptor to active, and queue the next transfer, if any.
 	 */
+	spin_lock(&chan->vchan.lock);
 	if (chan->desc.active)
 		vchan_cookie_complete(&chan->desc.active->vdesc);
 	chan->desc.active = pending;
 	chan->desc.pending = NULL;
 
 	xilinx_dpdma_chan_queue_transfer(chan);
+	spin_unlock(&chan->vchan.lock);
 
 out:
 	spin_unlock_irqrestore(&chan->lock, flags);
@@ -1264,10 +1266,12 @@  static void xilinx_dpdma_issue_pending(struct dma_chan *dchan)
 	struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
 	unsigned long flags;
 
-	spin_lock_irqsave(&chan->vchan.lock, flags);
+	spin_lock_irqsave(&chan->lock, flags);
+	spin_lock(&chan->vchan.lock);
 	if (vchan_issue_pending(&chan->vchan))
 		xilinx_dpdma_chan_queue_transfer(chan);
-	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+	spin_unlock(&chan->vchan.lock);
+	spin_unlock_irqrestore(&chan->lock, flags);
 }
 
 static int xilinx_dpdma_config(struct dma_chan *dchan,
@@ -1495,7 +1499,9 @@  static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t)
 		    XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id);
 
 	spin_lock_irqsave(&chan->lock, flags);
+	spin_lock(&chan->vchan.lock);
 	xilinx_dpdma_chan_queue_transfer(chan);
+	spin_unlock(&chan->vchan.lock);
 	spin_unlock_irqrestore(&chan->lock, flags);
 }