diff mbox

[v2,2/3] dmaeninge: xilinx_dma: Fix bug in multiple frame stores scenario in vdma

Message ID 1482483135-14767-3-git-send-email-appanad@xilinx.com (mailing list archive)
State New, archived
Headers show

Commit Message

Appana Durga Kedareswara rao Dec. 23, 2016, 8:52 a.m. UTC
When VDMA is configured for more than one frame in the h/w
for example h/w is configured for n number of frames and user
Submits n number of frames and triggered the DMA using issue_pending API.
In the current driver flow we are submitting one frame at a time
but we should submit all the n number of frames at one time as the h/w
Is configured for n number of frames.

This patch fixes this issue.

Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
---
Changes for v2:
---> Fixed race conditions in the driver as suggested by Jose Abreu
---> Fixed unnecessray if else checks in the vdma_start_transfer
     as suggested by Laurent Pinchart.

 drivers/dma/xilinx/xilinx_dma.c | 54 +++++++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 23 deletions(-)

Comments

Jose Abreu Dec. 23, 2016, 3:36 p.m. UTC | #1
Hi Kedar,


On 23-12-2016 08:52, Kedareswara rao Appana wrote:
> When VDMA is configured for more than one frame in the h/w
> for example h/w is configured for n number of frames and user
> Submits n number of frames and triggered the DMA using issue_pending API.
> In the current driver flow we are submitting one frame at a time
> but we should submit all the n number of frames at one time as the h/w
> Is configured for n number of frames.
>
> This patch fixes this issue.
>
> Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
> ---
> Changes for v2:
> ---> Fixed race conditions in the driver as suggested by Jose Abreu
> ---> Fixed unnecessray if else checks in the vdma_start_transfer
>      as suggested by Laurent Pinchart.
>
>  drivers/dma/xilinx/xilinx_dma.c | 54 +++++++++++++++++++++++------------------
>  1 file changed, 31 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
> index be7eb41..cf9edd8 100644
> --- a/drivers/dma/xilinx/xilinx_dma.c
> +++ b/drivers/dma/xilinx/xilinx_dma.c
> @@ -1052,25 +1052,38 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
>  	if (chan->has_sg) {
>  		dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
>  				tail_segment->phys);
> +		list_splice_tail_init(&chan->pending_list, &chan->active_list);
> +		chan->desc_pendingcount = 0;
>  	} else {
>  		struct xilinx_vdma_tx_segment *segment, *last = NULL;
> -		int i = 0;
> +		int i = 0, j = 0;
>  
>  		if (chan->desc_submitcount < chan->num_frms)
>  			i = chan->desc_submitcount;
>  
> -		list_for_each_entry(segment, &desc->segments, node) {
> -			if (chan->ext_addr)
> -				vdma_desc_write_64(chan,
> -					XILINX_VDMA_REG_START_ADDRESS_64(i++),
> -					segment->hw.buf_addr,
> -					segment->hw.buf_addr_msb);
> -			else
> -				vdma_desc_write(chan,
> -					XILINX_VDMA_REG_START_ADDRESS(i++),
> -					segment->hw.buf_addr);
> -
> -			last = segment;
> +		for (j = 0; j < chan->num_frms; ) {
> +			list_for_each_entry(segment, &desc->segments, node) {
> +				if (chan->ext_addr)
> +					vdma_desc_write_64(chan,
> +					  XILINX_VDMA_REG_START_ADDRESS_64(i++),
> +					  segment->hw.buf_addr,
> +					  segment->hw.buf_addr_msb);
> +				else
> +					vdma_desc_write(chan,
> +					    XILINX_VDMA_REG_START_ADDRESS(i++),
> +					    segment->hw.buf_addr);
> +
> +				last = segment;
> +			}
> +			list_del(&desc->node);
> +			list_add_tail(&desc->node, &chan->active_list);
> +			j++;
> +			if (list_empty(&chan->pending_list) ||
> +			    (i == chan->num_frms))
> +				break;
> +			desc = list_first_entry(&chan->pending_list,
> +						struct xilinx_dma_tx_descriptor,
> +						node);
>  		}
>  
>  		if (!last)
> @@ -1081,20 +1094,14 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
>  		vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
>  				last->hw.stride);
>  		vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize);

What if we reach here and j < num_frms? It can happen because if
the pending list is empty the loop breaks. Then VDMA will start
with framebuffers having address 0x0. You should only program
vsize when j > num_frms or when we have already previously set
the framebuffers (i.e. when submitcount has already been
incremented num_frms at least once).

Best regards,
Jose Miguel Abreu

> -	}
>  
> -	chan->idle = false;
> -	if (!chan->has_sg) {
> -		list_del(&desc->node);
> -		list_add_tail(&desc->node, &chan->active_list);
> -		chan->desc_submitcount++;
> -		chan->desc_pendingcount--;
> +		chan->desc_submitcount += j;
> +		chan->desc_pendingcount -= j;
>  		if (chan->desc_submitcount == chan->num_frms)
>  			chan->desc_submitcount = 0;
> -	} else {
> -		list_splice_tail_init(&chan->pending_list, &chan->active_list);
> -		chan->desc_pendingcount = 0;
>  	}
> +
> +	chan->idle = false;
>  }
>  
>  /**
> @@ -1342,6 +1349,7 @@ static int xilinx_dma_reset(struct xilinx_dma_chan *chan)
>  
>  	chan->err = false;
>  	chan->idle = true;
> +	chan->desc_submitcount = 0;
>  
>  	return err;
>  }
Appana Durga Kedareswara rao Jan. 2, 2017, 11:46 a.m. UTC | #2
Hi Jose Miguel Abreu,
	
	Thanks for the review....
Sorry for the delay in the reply please see comments inline...

> >  	if (chan->has_sg) {
> >  		dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
> >  				tail_segment->phys);
> > +		list_splice_tail_init(&chan->pending_list, &chan->active_list);
> > +		chan->desc_pendingcount = 0;
> >  	} else {
> >  		struct xilinx_vdma_tx_segment *segment, *last = NULL;
> > -		int i = 0;
> > +		int i = 0, j = 0;
> >
> >  		if (chan->desc_submitcount < chan->num_frms)
> >  			i = chan->desc_submitcount;
> >
> > -		list_for_each_entry(segment, &desc->segments, node) {
> > -			if (chan->ext_addr)
> > -				vdma_desc_write_64(chan,
> > -
> 	XILINX_VDMA_REG_START_ADDRESS_64(i++),
> > -					segment->hw.buf_addr,
> > -					segment->hw.buf_addr_msb);
> > -			else
> > -				vdma_desc_write(chan,
> > -
> 	XILINX_VDMA_REG_START_ADDRESS(i++),
> > -					segment->hw.buf_addr);
> > -
> > -			last = segment;
> > +		for (j = 0; j < chan->num_frms; ) {
> > +			list_for_each_entry(segment, &desc->segments, node)
> {
> > +				if (chan->ext_addr)
> > +					vdma_desc_write_64(chan,
> > +
> XILINX_VDMA_REG_START_ADDRESS_64(i++),
> > +					  segment->hw.buf_addr,
> > +					  segment->hw.buf_addr_msb);
> > +				else
> > +					vdma_desc_write(chan,
> > +
> XILINX_VDMA_REG_START_ADDRESS(i++),
> > +					    segment->hw.buf_addr);
> > +
> > +				last = segment;
> > +			}
> > +			list_del(&desc->node);
> > +			list_add_tail(&desc->node, &chan->active_list);
> > +			j++;
> > +			if (list_empty(&chan->pending_list) ||
> > +			    (i == chan->num_frms))
> > +				break;
> > +			desc = list_first_entry(&chan->pending_list,
> > +						struct
> xilinx_dma_tx_descriptor,
> > +						node);
> >  		}
> >
> >  		if (!last)
> > @@ -1081,20 +1094,14 @@ static void xilinx_vdma_start_transfer(struct
> xilinx_dma_chan *chan)
> >  		vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
> >  				last->hw.stride);
> >  		vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last-
> >hw.vsize);
> 
> What if we reach here and j < num_frms? It can happen because if
> the pending list is empty the loop breaks. Then VDMA will start
> with framebuffers having address 0x0. You should only program
> vsize when j > num_frms or when we have already previously set
> the framebuffers (i.e. when submitcount has already been
> incremented num_frms at least once).

In case of j < num_frms VDMA won't start the frame buffer having address 0
As we are programming the VDMA buffer address based on the desc_submitcount value only i.e. i.

Code snippet in the driver doing this:
                         list_for_each_entry(segment, &desc->segments, node) {
                                if (chan->ext_addr)
                                        vdma_desc_write_64(chan,
                                          XILINX_VDMA_REG_START_ADDRESS_64(i++),
                                          segment->hw.buf_addr,
                                          segment->hw.buf_addr_msb);
                                else
                                        vdma_desc_write(chan,
                                            XILINX_VDMA_REG_START_ADDRESS(i++),
                                            segment->hw.buf_addr);

                                last = segment;
                        } 

Initially desc_submitcount will be zero.
Let's assume h/w is configured for 10 frames and user submitted only 5 frames.
And triggered the VDMA h/w using issue_pending in this case desc_submitcount will be 5.
desc_submitcount will become zero again only when
User submits more frames than h/w capable or user submit frames up to h/w capable.

If my understanding is wrong could you please elaborate the race condition that you are talking above?
	
Regards,
Kedar.

> 
> Best regards,
> Jose Miguel Abreu
>
Jose Abreu Jan. 2, 2017, 1:34 p.m. UTC | #3
Hi Kedar,


On 02-01-2017 11:46, Appana Durga Kedareswara Rao wrote:
> Hi Jose Miguel Abreu,
> 	
> 	Thanks for the review....
> Sorry for the delay in the reply please see comments inline...
>
>>

[snip]

>> What if we reach here and j < num_frms? It can happen because if
>> the pending list is empty the loop breaks. Then VDMA will start
>> with framebuffers having address 0x0. You should only program
>> vsize when j > num_frms or when we have already previously set
>> the framebuffers (i.e. when submitcount has already been
>> incremented num_frms at least once).
> In case of j < num_frms VDMA won't start the frame buffer having address 0
> As we are programming the VDMA buffer address based on the desc_submitcount value only i.e. i.
>
> Code snippet in the driver doing this:
>                          list_for_each_entry(segment, &desc->segments, node) {
>                                 if (chan->ext_addr)
>                                         vdma_desc_write_64(chan,
>                                           XILINX_VDMA_REG_START_ADDRESS_64(i++),
>                                           segment->hw.buf_addr,
>                                           segment->hw.buf_addr_msb);
>                                 else
>                                         vdma_desc_write(chan,
>                                             XILINX_VDMA_REG_START_ADDRESS(i++),
>                                             segment->hw.buf_addr);
>
>                                 last = segment;
>                         } 
>
> Initially desc_submitcount will be zero.
> Let's assume h/w is configured for 10 frames and user submitted only 5 frames.
> And triggered the VDMA h/w using issue_pending in this case desc_submitcount will be 5.
> desc_submitcount will become zero again only when
> User submits more frames than h/w capable or user submit frames up to h/w capable.
>
> If my understanding is wrong could you please elaborate the race condition that you are talking above?

I just noticed there is a write to XILINX_DMA_REG_FRMSTORE which,
by the description in the VDMA databook, allows to modify the
total number of framebuffers.

Does it correct this situation: Lets assume VDMA has 10
framebuffers in HW and user only submits 5 descriptors. Then
vsize will be programmed and VDMA will start. The VDMA will start
in fb 1, then 2, ... until 5 its all ok. But then after the fb 5
the VDMA will jump to fb 6, this fb will have no address because
the user only submitted 5 addresses so VDMA will try to write to
location 0x0 of system mem (if using S2MM channel). ?

If so then there is no race condition, but the HW image that I
have does not have this register enabled so I was getting this
result (memory corruption because not all framebuffers had
addresses set).

Best regards,
Jose Miguel Abreu

> 	
> Regards,
> Kedar.
>
>> Best regards,
>> Jose Miguel Abreu
>>
Appana Durga Kedareswara rao Jan. 2, 2017, 4 p.m. UTC | #4
Hi Jose Miguel Abreu,

	Thanks for the review...

[snip]...
> 
> I just noticed there is a write to XILINX_DMA_REG_FRMSTORE which, by the
> description in the VDMA databook, allows to modify the total number of
> framebuffers.
> 
> Does it correct this situation: Lets assume VDMA has 10 framebuffers in HW and
> user only submits 5 descriptors. Then vsize will be programmed and VDMA will
> start. The VDMA will start in fb 1, then 2, ... until 5 its all ok. But then after the fb
> 5 the VDMA will jump to fb 6, this fb will have no address because the user only
> submitted 5 addresses so VDMA will try to write to location 0x0 of system mem
> (if using S2MM channel). ?
> 
> If so then there is no race condition, but the HW image that I have does not
> have this register enabled so I was getting this result (memory corruption
> because not all framebuffers had addresses set).

Thanks for the explanation. 
Agree the issue that you mentioned won't come when XILINX_DMA_REG_FRMSTORE
(C_ENABLE_DEBUG_INFO_5 and C_ENABLE_DEBUG_INFO_13) Register is enabled in the IP.
But this register won't get enabled with the default IP configuration (C_ENABLE_DEBUG_INFO_5 and C_ENABLE_DEBUG_INFO_13).

When user is not enabled XILINX_DMA_REG_FRMSTORE in the h/w and submits frames less than h/w capable.
The solution that I am thinking is to throw an error in the driver saying that either enable the
num frame store feature in the IP or submit the frames up to h/w capable what do you think???

Regards,
Kedar.

> 
> Best regards,
> Jose Miguel Abreu
> 
> >
> > Regards,
> > Kedar.
> >
> >> Best regards,
> >> Jose Miguel Abreu
> >>
Jose Abreu Jan. 3, 2017, 1:51 p.m. UTC | #5
Hi Kedar,


On 02-01-2017 16:00, Appana Durga Kedareswara Rao wrote:
> Hi Jose Miguel Abreu,
>
> 	Thanks for the review...
>
> [snip]...
>> I just noticed there is a write to XILINX_DMA_REG_FRMSTORE which, by the
>> description in the VDMA databook, allows to modify the total number of
>> framebuffers.
>>
>> Does it correct this situation: Lets assume VDMA has 10 framebuffers in HW and
>> user only submits 5 descriptors. Then vsize will be programmed and VDMA will
>> start. The VDMA will start in fb 1, then 2, ... until 5 its all ok. But then after the fb
>> 5 the VDMA will jump to fb 6, this fb will have no address because the user only
>> submitted 5 addresses so VDMA will try to write to location 0x0 of system mem
>> (if using S2MM channel). ?
>>
>> If so then there is no race condition, but the HW image that I have does not
>> have this register enabled so I was getting this result (memory corruption
>> because not all framebuffers had addresses set).
> Thanks for the explanation. 
> Agree the issue that you mentioned won't come when XILINX_DMA_REG_FRMSTORE
> (C_ENABLE_DEBUG_INFO_5 and C_ENABLE_DEBUG_INFO_13) Register is enabled in the IP.
> But this register won't get enabled with the default IP configuration (C_ENABLE_DEBUG_INFO_5 and C_ENABLE_DEBUG_INFO_13).
>
> When user is not enabled XILINX_DMA_REG_FRMSTORE in the h/w and submits frames less than h/w capable.
> The solution that I am thinking is to throw an error in the driver saying that either enable the
> num frame store feature in the IP or submit the frames up to h/w capable what do you think???

Sounds fine by me.

Best regards,
Jose Miguel Abreu

>
> Regards,
> Kedar.
>
>> Best regards,
>> Jose Miguel Abreu
>>
>>> Regards,
>>> Kedar.
>>>
>>>> Best regards,
>>>> Jose Miguel Abreu
>>>>
Appana Durga Kedareswara rao Jan. 4, 2017, 6:57 a.m. UTC | #6
Hi Jose Miguel Abreu,

	Thanks for the review...

> >> If so then there is no race condition, but the HW image that I have
> >> does not have this register enabled so I was getting this result
> >> (memory corruption because not all framebuffers had addresses set).
> > Thanks for the explanation.
> > Agree the issue that you mentioned won't come when
> > XILINX_DMA_REG_FRMSTORE
> > (C_ENABLE_DEBUG_INFO_5 and C_ENABLE_DEBUG_INFO_13) Register is
> enabled in the IP.
> > But this register won't get enabled with the default IP configuration
> (C_ENABLE_DEBUG_INFO_5 and C_ENABLE_DEBUG_INFO_13).
> >
> > When user is not enabled XILINX_DMA_REG_FRMSTORE in the h/w and
> submits frames less than h/w capable.
> > The solution that I am thinking is to throw an error in the driver
> > saying that either enable the num frame store feature in the IP or submit the
> frames up to h/w capable what do you think???
> 
> Sounds fine by me.

Thanks posted the v3 series please review when you have some time...

Regards,
Kedar.

> 
> Best regards,
> Jose Miguel Abreu
> 
> >
> > Regards,
> > Kedar.
> >
> >> Best regards,
> >> Jose Miguel Abreu
> >>
> >>> Regards,
> >>> Kedar.
> >>>
> >>>> Best regards,
> >>>> Jose Miguel Abreu
> >>>>
diff mbox

Patch

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index be7eb41..cf9edd8 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -1052,25 +1052,38 @@  static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 	if (chan->has_sg) {
 		dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
 				tail_segment->phys);
+		list_splice_tail_init(&chan->pending_list, &chan->active_list);
+		chan->desc_pendingcount = 0;
 	} else {
 		struct xilinx_vdma_tx_segment *segment, *last = NULL;
-		int i = 0;
+		int i = 0, j = 0;
 
 		if (chan->desc_submitcount < chan->num_frms)
 			i = chan->desc_submitcount;
 
-		list_for_each_entry(segment, &desc->segments, node) {
-			if (chan->ext_addr)
-				vdma_desc_write_64(chan,
-					XILINX_VDMA_REG_START_ADDRESS_64(i++),
-					segment->hw.buf_addr,
-					segment->hw.buf_addr_msb);
-			else
-				vdma_desc_write(chan,
-					XILINX_VDMA_REG_START_ADDRESS(i++),
-					segment->hw.buf_addr);
-
-			last = segment;
+		for (j = 0; j < chan->num_frms; ) {
+			list_for_each_entry(segment, &desc->segments, node) {
+				if (chan->ext_addr)
+					vdma_desc_write_64(chan,
+					  XILINX_VDMA_REG_START_ADDRESS_64(i++),
+					  segment->hw.buf_addr,
+					  segment->hw.buf_addr_msb);
+				else
+					vdma_desc_write(chan,
+					    XILINX_VDMA_REG_START_ADDRESS(i++),
+					    segment->hw.buf_addr);
+
+				last = segment;
+			}
+			list_del(&desc->node);
+			list_add_tail(&desc->node, &chan->active_list);
+			j++;
+			if (list_empty(&chan->pending_list) ||
+			    (i == chan->num_frms))
+				break;
+			desc = list_first_entry(&chan->pending_list,
+						struct xilinx_dma_tx_descriptor,
+						node);
 		}
 
 		if (!last)
@@ -1081,20 +1094,14 @@  static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 		vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
 				last->hw.stride);
 		vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize);
-	}
 
-	chan->idle = false;
-	if (!chan->has_sg) {
-		list_del(&desc->node);
-		list_add_tail(&desc->node, &chan->active_list);
-		chan->desc_submitcount++;
-		chan->desc_pendingcount--;
+		chan->desc_submitcount += j;
+		chan->desc_pendingcount -= j;
 		if (chan->desc_submitcount == chan->num_frms)
 			chan->desc_submitcount = 0;
-	} else {
-		list_splice_tail_init(&chan->pending_list, &chan->active_list);
-		chan->desc_pendingcount = 0;
 	}
+
+	chan->idle = false;
 }
 
 /**
@@ -1342,6 +1349,7 @@  static int xilinx_dma_reset(struct xilinx_dma_chan *chan)
 
 	chan->err = false;
 	chan->idle = true;
+	chan->desc_submitcount = 0;
 
 	return err;
 }