diff mbox

[2/3] dmaeninge: xilinx_dma: Fix bug in multiple frame stores scenario in vdma

Message ID 1481814682-31780-3-git-send-email-appanad@xilinx.com (mailing list archive)
State New, archived
Headers show

Commit Message

Appana Durga Kedareswara rao Dec. 15, 2016, 3:11 p.m. UTC
When VDMA is configured for more than one frame in the h/w
for example h/w is configured for n number of frames and user
Submits n number of frames and triggered the DMA using issue_pending API.
In the current driver flow we are submitting one frame at a time
but we should submit all the n number of frames at one time as the h/w
Is configured for n number of frames.

This patch fixes this issue.

Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
---
 drivers/dma/xilinx/xilinx_dma.c | 43 +++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

Comments

Jose Abreu Dec. 15, 2016, 4:10 p.m. UTC | #1
Hi Kedar,


On 15-12-2016 15:11, Kedareswara rao Appana wrote:
> When VDMA is configured for more than one frame in the h/w
> for example h/w is configured for n number of frames and user
> Submits n number of frames and triggered the DMA using issue_pending API.
> In the current driver flow we are submitting one frame at a time
> but we should submit all the n number of frames at one time as the h/w
> Is configured for n number of frames.
>
> This patch fixes this issue.
>
> Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
> ---
>  drivers/dma/xilinx/xilinx_dma.c | 43 +++++++++++++++++++++++++----------------
>  1 file changed, 26 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
> index 736c2a3..4f3fa94 100644
> --- a/drivers/dma/xilinx/xilinx_dma.c
> +++ b/drivers/dma/xilinx/xilinx_dma.c
> @@ -1087,23 +1087,33 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
>  				tail_segment->phys);
>  	} else {
>  		struct xilinx_vdma_tx_segment *segment, *last = NULL;
> -		int i = 0;
> +		int i = 0, j = 0;
>  
>  		if (chan->desc_submitcount < chan->num_frms)
>  			i = chan->desc_submitcount;
>  
> -		list_for_each_entry(segment, &desc->segments, node) {
> -			if (chan->ext_addr)
> -				vdma_desc_write_64(chan,
> -					XILINX_VDMA_REG_START_ADDRESS_64(i++),
> -					segment->hw.buf_addr,
> -					segment->hw.buf_addr_msb);
> -			else
> -				vdma_desc_write(chan,
> -					XILINX_VDMA_REG_START_ADDRESS(i++),
> -					segment->hw.buf_addr);
> -
> -			last = segment;
> +		for (j = 0; j < chan->num_frms; ) {
> +			list_for_each_entry(segment, &desc->segments, node) {
> +				if (chan->ext_addr)
> +					vdma_desc_write_64(chan,
> +					  XILINX_VDMA_REG_START_ADDRESS_64(i++),
> +					  segment->hw.buf_addr,
> +					  segment->hw.buf_addr_msb);
> +				else
> +					vdma_desc_write(chan,
> +					    XILINX_VDMA_REG_START_ADDRESS(i++),
> +					    segment->hw.buf_addr);
> +
> +				last = segment;

Hmm, is it possible to submit more than one segment? If so, then
i and j will get out of sync.

> +			}
> +			list_del(&desc->node);
> +			list_add_tail(&desc->node, &chan->active_list);
> +			j++;

But if i is non zero and pending_list has more than num_frms then
i will not wrap-around as it should and will write to invalid
framebuffer location, right?

> +			if (list_empty(&chan->pending_list))
> +				break;
> +			desc = list_first_entry(&chan->pending_list,
> +						struct xilinx_dma_tx_descriptor,
> +						node);
>  		}
>  
>  		if (!last)
> @@ -1114,14 +1124,13 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
>  		vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
>  				last->hw.stride);
>  		vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize);

Maybe a check that all framebuffers contain valid addresses
should be done before programming vsize so that VDMA does not try
to write to invalid addresses.

> +
> +		chan->desc_submitcount += j;
> +		chan->desc_pendingcount -= j;
>  	}
>  
>  	chan->idle = false;
>  	if (!chan->has_sg) {
> -		list_del(&desc->node);
> -		list_add_tail(&desc->node, &chan->active_list);
> -		chan->desc_submitcount++;
> -		chan->desc_pendingcount--;
>  		if (chan->desc_submitcount == chan->num_frms)
>  			chan->desc_submitcount = 0;

"desc_submitcount >= chan->num_frms would be safer here.

>  	} else {

Best regards,
Jose Miguel Abreu
Appana Durga Kedareswara rao Dec. 15, 2016, 7:09 p.m. UTC | #2
Hi Jose Miguel Abreu,

	Thanks for the review....

> > -			last = segment;
> > +		for (j = 0; j < chan->num_frms; ) {
> > +			list_for_each_entry(segment, &desc->segments, node)
> {
> > +				if (chan->ext_addr)
> > +					vdma_desc_write_64(chan,
> > +
> XILINX_VDMA_REG_START_ADDRESS_64(i++),
> > +					  segment->hw.buf_addr,
> > +					  segment->hw.buf_addr_msb);
> > +				else
> > +					vdma_desc_write(chan,
> > +
> XILINX_VDMA_REG_START_ADDRESS(i++),
> > +					    segment->hw.buf_addr);
> > +
> > +				last = segment;
> 
> Hmm, is it possible to submit more than one segment? If so, then i and j will get
> out of sync.

If h/w is configured for more than 1 frame buffer and user submits more than one frame buffer
We can submit more than one frame/ segment to hw right??

> 
> > +			}
> > +			list_del(&desc->node);
> > +			list_add_tail(&desc->node, &chan->active_list);
> > +			j++;
> 
> But if i is non zero and pending_list has more than num_frms then i will not
> wrap-around as it should and will write to invalid framebuffer location, right?

Yep will fix in v2...
		
	If (if (list_empty(&chan->pending_list)) || (i == chan->num_frms)
		break;

Above condition is sufficient right???

> 
> > +			if (list_empty(&chan->pending_list))
> > +				break;
> > +			desc = list_first_entry(&chan->pending_list,
> > +						struct
> xilinx_dma_tx_descriptor,
> > +						node);
> >  		}
> >
> >  		if (!last)
> > @@ -1114,14 +1124,13 @@ static void xilinx_vdma_start_transfer(struct
> xilinx_dma_chan *chan)
> >  		vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
> >  				last->hw.stride);
> >  		vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last-
> >hw.vsize);
> 
> Maybe a check that all framebuffers contain valid addresses should be done
> before programming vsize so that VDMA does not try to write to invalid
> addresses.

Do we really need to check for valid address??? 
I didn't get you what to do you mean by invalid address could you please explain???
In the driver we are reading form the pending_list which will be updated by pep_interleaved_dma 
Call so we are under assumption that user sends the proper address right???

> 
> > +
> > +		chan->desc_submitcount += j;
> > +		chan->desc_pendingcount -= j;
> >  	}
> >
> >  	chan->idle = false;
> >  	if (!chan->has_sg) {
> > -		list_del(&desc->node);
> > -		list_add_tail(&desc->node, &chan->active_list);
> > -		chan->desc_submitcount++;
> > -		chan->desc_pendingcount--;
> >  		if (chan->desc_submitcount == chan->num_frms)
> >  			chan->desc_submitcount = 0;
> 
> "desc_submitcount >= chan->num_frms would be safer here.

Sure will fix in v2...

Regards,
Kedar.

> 
> >  	} else {
> 
> Best regards,
> Jose Miguel Abreu
> --
> To unsubscribe from this list: send the line "unsubscribe dmaengine" in the body
> of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html
Jose Abreu Dec. 16, 2016, 10:11 a.m. UTC | #3
Hi Kedar,


On 15-12-2016 19:09, Appana Durga Kedareswara Rao wrote:
> Hi Jose Miguel Abreu,
>
> 	Thanks for the review....
>
>>> -			last = segment;
>>> +		for (j = 0; j < chan->num_frms; ) {
>>> +			list_for_each_entry(segment, &desc->segments, node)
>> {
>>> +				if (chan->ext_addr)
>>> +					vdma_desc_write_64(chan,
>>> +
>> XILINX_VDMA_REG_START_ADDRESS_64(i++),
>>> +					  segment->hw.buf_addr,
>>> +					  segment->hw.buf_addr_msb);
>>> +				else
>>> +					vdma_desc_write(chan,
>>> +
>> XILINX_VDMA_REG_START_ADDRESS(i++),
>>> +					    segment->hw.buf_addr);
>>> +
>>> +				last = segment;
>> Hmm, is it possible to submit more than one segment? If so, then i and j will get
>> out of sync.
> If h/w is configured for more than 1 frame buffer and user submits more than one frame buffer
> We can submit more than one frame/ segment to hw right??

I'm not sure. When I used VDMA driver I always submitted only one
segment and multiple descriptors. But the problem is, for example:

If you have:
descriptor1 (2 segments)
descriptor2 (2 segments)

And you have 3 frame buffers in the HW.

Then:
1st frame buffer will have: descriptor1 -> segment1
2nd frame buffer will have: descriptor1 -> segment2
3rd frame buffer will have: descriptor2 -> segment1
but, 4th frame buffer will have: descriptor2 -> segment2 <----
INVALID because there is only 3 frame buffers

So, maybe a check inside the loop "list_for_each_entry(segment,
&desc->segments, node)" could be a nice to have.

>
>>> +			}
>>> +			list_del(&desc->node);
>>> +			list_add_tail(&desc->node, &chan->active_list);
>>> +			j++;
>> But if i is non zero and pending_list has more than num_frms then i will not
>> wrap-around as it should and will write to invalid framebuffer location, right?
> Yep will fix in v2...
> 		
> 	If (if (list_empty(&chan->pending_list)) || (i == chan->num_frms)
> 		break;
>
> Above condition is sufficient right???

Looks ok.

>
>>> +			if (list_empty(&chan->pending_list))
>>> +				break;
>>> +			desc = list_first_entry(&chan->pending_list,
>>> +						struct
>> xilinx_dma_tx_descriptor,
>>> +						node);
>>>  		}
>>>
>>>  		if (!last)
>>> @@ -1114,14 +1124,13 @@ static void xilinx_vdma_start_transfer(struct
>> xilinx_dma_chan *chan)
>>>  		vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
>>>  				last->hw.stride);
>>>  		vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last-
>>> hw.vsize);
>> Maybe a check that all framebuffers contain valid addresses should be done
>> before programming vsize so that VDMA does not try to write to invalid
>> addresses.
> Do we really need to check for valid address??? 
> I didn't get you what to do you mean by invalid address could you please explain???
> In the driver we are reading form the pending_list which will be updated by pep_interleaved_dma 
> Call so we are under assumption that user sends the proper address right???

What I mean by valid address is to check that i variable has
already been incremented by num_frms at least once since a VDMA
reset. This way you know that you have programmed all the
addresses of the frame buffers with an address and they are non-zero.

Best regards,
Jose Miguel Abreu

>
>>> +
>>> +		chan->desc_submitcount += j;
>>> +		chan->desc_pendingcount -= j;
>>>  	}
>>>
>>>  	chan->idle = false;
>>>  	if (!chan->has_sg) {
>>> -		list_del(&desc->node);
>>> -		list_add_tail(&desc->node, &chan->active_list);
>>> -		chan->desc_submitcount++;
>>> -		chan->desc_pendingcount--;
>>>  		if (chan->desc_submitcount == chan->num_frms)
>>>  			chan->desc_submitcount = 0;
>> "desc_submitcount >= chan->num_frms would be safer here.
> Sure will fix in v2...
>
> Regards,
> Kedar.
>
>>>  	} else {
>> Best regards,
>> Jose Miguel Abreu
>> --
>> To unsubscribe from this list: send the line "unsubscribe dmaengine" in the body
>> of a message to majordomo@vger.kernel.org More majordomo info at
>> http://vger.kernel.org/majordomo-info.html
Laurent Pinchart Dec. 16, 2016, 3:54 p.m. UTC | #4
Hi Kedareswara,

Thank you for the patch.

On Thursday 15 Dec 2016 20:41:21 Kedareswara rao Appana wrote:
> When VDMA is configured for more than one frame in the h/w
> for example h/w is configured for n number of frames and user
> Submits n number of frames and triggered the DMA using issue_pending API.
> In the current driver flow we are submitting one frame at a time
> but we should submit all the n number of frames at one time as the h/w
> Is configured for n number of frames.
> 
> This patch fixes this issue.
> 
> Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
> ---
>  drivers/dma/xilinx/xilinx_dma.c | 43 ++++++++++++++++++++++----------------
>  1 file changed, 26 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/dma/xilinx/xilinx_dma.c
> b/drivers/dma/xilinx/xilinx_dma.c index 736c2a3..4f3fa94 100644
> --- a/drivers/dma/xilinx/xilinx_dma.c
> +++ b/drivers/dma/xilinx/xilinx_dma.c
> @@ -1087,23 +1087,33 @@ static void xilinx_vdma_start_transfer(struct
> xilinx_dma_chan *chan)
> 		tail_segment->phys);
>  	} else {
>  		struct xilinx_vdma_tx_segment *segment, *last = NULL;
> -		int i = 0;
> +		int i = 0, j = 0;
> 
>  		if (chan->desc_submitcount < chan->num_frms)
>  			i = chan->desc_submitcount;

I don't get this. i seems to index into a segment start address array, but 
gets initialized with a variable documented as "Descriptor h/w submitted 
count". I'm not familiar with the hardware, but it makes no sense to me.

> -		list_for_each_entry(segment, &desc->segments, node) {
> -			if (chan->ext_addr)
> -				vdma_desc_write_64(chan,
> -					XILINX_VDMA_REG_START_ADDRESS_64(i++),
> -					segment->hw.buf_addr,
> -					segment->hw.buf_addr_msb);
> -			else
> -				vdma_desc_write(chan,
> -					XILINX_VDMA_REG_START_ADDRESS(i++),
> -					segment->hw.buf_addr);
> -
> -			last = segment;

Isn't it an issue to write the descriptors only after calling 
xilinx_dma_start() ?

> +		for (j = 0; j < chan->num_frms; ) {
> +			list_for_each_entry(segment, &desc->segments, node) {
> +				if (chan->ext_addr)
> +					vdma_desc_write_64(chan,
> +					  
XILINX_VDMA_REG_START_ADDRESS_64(i++),
> +					  segment->hw.buf_addr,
> +					  segment->hw.buf_addr_msb);
> +				else
> +					vdma_desc_write(chan,
> +					    
XILINX_VDMA_REG_START_ADDRESS(i++),
> +					    segment->hw.buf_addr);

I assume the size of the start address array to be limited by the hardware, 
but I don't see how this code prevents from overflowing this.

The whole function is very difficult to understand, it probably requires a 
rewrite.

> +				last = segment;
> +			}
> +			list_del(&desc->node);
> +			list_add_tail(&desc->node, &chan->active_list);
> +			j++;
> +			if (list_empty(&chan->pending_list))
> +				break;
> +			desc = list_first_entry(&chan->pending_list,
> +						struct 
xilinx_dma_tx_descriptor,
> +						node);
>  		}
> 
>  		if (!last)
> @@ -1114,14 +1124,13 @@ static void xilinx_vdma_start_transfer(struct
> xilinx_dma_chan *chan) vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
>  				last->hw.stride);
>  		vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize);
> +
> +		chan->desc_submitcount += j;
> +		chan->desc_pendingcount -= j;
>  	}
> 
>  	chan->idle = false;
>  	if (!chan->has_sg) {
> -		list_del(&desc->node);
> -		list_add_tail(&desc->node, &chan->active_list);
> -		chan->desc_submitcount++;
> -		chan->desc_pendingcount--;
>  		if (chan->desc_submitcount == chan->num_frms)
>  			chan->desc_submitcount = 0;
>  	} else {

While at it, can you merge this into the previous if (chan->has_sg) { ... } 
else { ... } ? Having them separate is confusing.
Appana Durga Kedareswara rao Dec. 19, 2016, 3:40 p.m. UTC | #5
Hi Jose Miguel Abreu,

	Thanks for the review... 

> >
> >>> -			last = segment;
> >>> +		for (j = 0; j < chan->num_frms; ) {
> >>> +			list_for_each_entry(segment, &desc->segments, node)
> >> {
> >>> +				if (chan->ext_addr)
> >>> +					vdma_desc_write_64(chan,
> >>> +
> >> XILINX_VDMA_REG_START_ADDRESS_64(i++),
> >>> +					  segment->hw.buf_addr,
> >>> +					  segment->hw.buf_addr_msb);
> >>> +				else
> >>> +					vdma_desc_write(chan,
> >>> +
> >> XILINX_VDMA_REG_START_ADDRESS(i++),
> >>> +					    segment->hw.buf_addr);
> >>> +
> >>> +				last = segment;
> >> Hmm, is it possible to submit more than one segment? If so, then i
> >> and j will get out of sync.
> > If h/w is configured for more than 1 frame buffer and user submits
> > more than one frame buffer We can submit more than one frame/ segment to
> hw right??
> 
> I'm not sure. When I used VDMA driver I always submitted only one segment and
> multiple descriptors. But the problem is, for example:
> 
> If you have:
> descriptor1 (2 segments)
> descriptor2 (2 segments)
> 
> And you have 3 frame buffers in the HW.
> 
> Then:
> 1st frame buffer will have: descriptor1 -> segment1 2nd frame buffer will have:
> descriptor1 -> segment2 3rd frame buffer will have: descriptor2 -> segment1
> but, 4th frame buffer will have: descriptor2 -> segment2 <---- INVALID because
> there is only 3 frame buffers
> 
> So, maybe a check inside the loop "list_for_each_entry(segment, &desc-
> >segments, node)" could be a nice to have.

With the current driver flow user can submit only 1 segment per descriptor
That's why didn't checked the list_for_each_entry for each descriptors...
Hope it clarifies your query...

> 
> >
> >>> +			}
> >>> +			list_del(&desc->node);
> >>> +			list_add_tail(&desc->node, &chan->active_list);
> >>> +			j++;
> >> But if i is non zero and pending_list has more than num_frms then i
> >> will not wrap-around as it should and will write to invalid framebuffer
> location, right?
> > Yep will fix in v2...
> >
> > 	If (if (list_empty(&chan->pending_list)) || (i == chan->num_frms)
> > 		break;
> >
> > Above condition is sufficient right???
> 
> Looks ok.

Thanks...

> >>> @@ -1114,14 +1124,13 @@ static void
> >>> xilinx_vdma_start_transfer(struct
> >> xilinx_dma_chan *chan)
> >>>  		vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
> >>>  				last->hw.stride);
> >>>  		vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last-
> hw.vsize);
> >> Maybe a check that all framebuffers contain valid addresses should be
> >> done before programming vsize so that VDMA does not try to write to
> >> invalid addresses.
> > Do we really need to check for valid address???
> > I didn't get you what to do you mean by invalid address could you please
> explain???
> > In the driver we are reading form the pending_list which will be
> > updated by pep_interleaved_dma Call so we are under assumption that user
> sends the proper address right???
> 
> What I mean by valid address is to check that i variable has already been
> incremented by num_frms at least once since a VDMA reset. This way you know
> that you have programmed all the addresses of the frame buffers with an
> address and they are non-zero.

Ok Sure will fix in v2...

Regards,
Kedar.

> 
> Best regards,
> Jose Miguel Abreu
>
Appana Durga Kedareswara rao Dec. 19, 2016, 3:41 p.m. UTC | #6
Hi Laurent Pinchart,

	Thanks for the review...

> > +		int i = 0, j = 0;
> >
> >  		if (chan->desc_submitcount < chan->num_frms)
> >  			i = chan->desc_submitcount;
> 
> I don't get this. i seems to index into a segment start address array, but gets
> initialized with a variable documented as "Descriptor h/w submitted count". I'm
> not familiar with the hardware, but it makes no sense to me.

Here i is the h/w buffer address. 
For ex: If the h/w is configured for 3 frame buffers and user submits 4 desc's
Then we need to submit only 3 frame buffers to the h/w and the next desc will be submitted
After there is a room for buffers I mean when the free buffer is available.

> 
> > -		list_for_each_entry(segment, &desc->segments, node) {
> > -			if (chan->ext_addr)
> > -				vdma_desc_write_64(chan,
> > -
> 	XILINX_VDMA_REG_START_ADDRESS_64(i++),
> > -					segment->hw.buf_addr,
> > -					segment->hw.buf_addr_msb);
> > -			else
> > -				vdma_desc_write(chan,
> > -
> 	XILINX_VDMA_REG_START_ADDRESS(i++),
> > -					segment->hw.buf_addr);
> > -
> > -			last = segment;
> 
> Isn't it an issue to write the descriptors only after calling
> xilinx_dma_start() ?

Until writing to the VSIZE h/w won't get started...

> 
> > +		for (j = 0; j < chan->num_frms; ) {
> > +			list_for_each_entry(segment, &desc->segments, node)
> {
> > +				if (chan->ext_addr)
> > +					vdma_desc_write_64(chan,
> > +
> XILINX_VDMA_REG_START_ADDRESS_64(i++),
> > +					  segment->hw.buf_addr,
> > +					  segment->hw.buf_addr_msb);
> > +				else
> > +					vdma_desc_write(chan,
> > +
> XILINX_VDMA_REG_START_ADDRESS(i++),
> > +					    segment->hw.buf_addr);
> 
> I assume the size of the start address array to be limited by the hardware, but I
> don't see how this code prevents from overflowing this.
> 
> The whole function is very difficult to understand, it probably requires a rewrite.

Will fix it in v2...

> 
> > +				last = segment;
> > +			}
> > +			list_del(&desc->node);
> > +			list_add_tail(&desc->node, &chan->active_list);
> > +			j++;
> > +			if (list_empty(&chan->pending_list))
> > +				break;
> > +			desc = list_first_entry(&chan->pending_list,
> > +						struct
> xilinx_dma_tx_descriptor,
> > +						node);
> >  		}
> >  	if (!chan->has_sg) {
> > -		list_del(&desc->node);
> > -		list_add_tail(&desc->node, &chan->active_list);
> > -		chan->desc_submitcount++;
> > -		chan->desc_pendingcount--;
> >  		if (chan->desc_submitcount == chan->num_frms)
> >  			chan->desc_submitcount = 0;
> >  	} else {
> 
> While at it, can you merge this into the previous if (chan->has_sg) { ... } else { ... }
> ? Having them separate is confusing.

Ok sure will fix in v2...

Regards,
Kedar.

> 
> 
> --
> Regards,
> 
> Laurent Pinchart
diff mbox

Patch

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 736c2a3..4f3fa94 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -1087,23 +1087,33 @@  static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 				tail_segment->phys);
 	} else {
 		struct xilinx_vdma_tx_segment *segment, *last = NULL;
-		int i = 0;
+		int i = 0, j = 0;
 
 		if (chan->desc_submitcount < chan->num_frms)
 			i = chan->desc_submitcount;
 
-		list_for_each_entry(segment, &desc->segments, node) {
-			if (chan->ext_addr)
-				vdma_desc_write_64(chan,
-					XILINX_VDMA_REG_START_ADDRESS_64(i++),
-					segment->hw.buf_addr,
-					segment->hw.buf_addr_msb);
-			else
-				vdma_desc_write(chan,
-					XILINX_VDMA_REG_START_ADDRESS(i++),
-					segment->hw.buf_addr);
-
-			last = segment;
+		for (j = 0; j < chan->num_frms; ) {
+			list_for_each_entry(segment, &desc->segments, node) {
+				if (chan->ext_addr)
+					vdma_desc_write_64(chan,
+					  XILINX_VDMA_REG_START_ADDRESS_64(i++),
+					  segment->hw.buf_addr,
+					  segment->hw.buf_addr_msb);
+				else
+					vdma_desc_write(chan,
+					    XILINX_VDMA_REG_START_ADDRESS(i++),
+					    segment->hw.buf_addr);
+
+				last = segment;
+			}
+			list_del(&desc->node);
+			list_add_tail(&desc->node, &chan->active_list);
+			j++;
+			if (list_empty(&chan->pending_list))
+				break;
+			desc = list_first_entry(&chan->pending_list,
+						struct xilinx_dma_tx_descriptor,
+						node);
 		}
 
 		if (!last)
@@ -1114,14 +1124,13 @@  static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 		vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
 				last->hw.stride);
 		vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize);
+
+		chan->desc_submitcount += j;
+		chan->desc_pendingcount -= j;
 	}
 
 	chan->idle = false;
 	if (!chan->has_sg) {
-		list_del(&desc->node);
-		list_add_tail(&desc->node, &chan->active_list);
-		chan->desc_submitcount++;
-		chan->desc_pendingcount--;
 		if (chan->desc_submitcount == chan->num_frms)
 			chan->desc_submitcount = 0;
 	} else {