[v3,2/6] dmaengine: Add interleaved cyclic transaction type
diff mbox series

Message ID 20200123022939.9739-3-laurent.pinchart@ideasonboard.com
State Changes Requested
Headers show
Series
  • dma: Add Xilinx ZynqMP DPDMA driver
Related show

Commit Message

Laurent Pinchart Jan. 23, 2020, 2:29 a.m. UTC
The new interleaved cyclic transaction type combines interleaved and
cycle transactions. It is designed for DMA engines that back display
controllers, where the same 2D frame needs to be output to the display
until a new frame is available.

Suggested-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
---
 drivers/dma/dmaengine.c   |  8 +++++++-
 include/linux/dmaengine.h | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

Comments

Peter Ujfalusi Jan. 23, 2020, 8:03 a.m. UTC | #1
Hi Laurent,

On 23/01/2020 4.29, Laurent Pinchart wrote:
> The new interleaved cyclic transaction type combines interleaved and
> cycle transactions. It is designed for DMA engines that back display
> controllers, where the same 2D frame needs to be output to the display
> until a new frame is available.
> 
> Suggested-by: Vinod Koul <vkoul@kernel.org>
> Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> ---
>  drivers/dma/dmaengine.c   |  8 +++++++-
>  include/linux/dmaengine.h | 18 ++++++++++++++++++
>  2 files changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
> index 03ac4b96117c..4ffb98a47f31 100644
> --- a/drivers/dma/dmaengine.c
> +++ b/drivers/dma/dmaengine.c
> @@ -981,7 +981,13 @@ int dma_async_device_register(struct dma_device *device)
>  			"DMA_INTERLEAVE");
>  		return -EIO;
>  	}
> -
> +	if (dma_has_cap(DMA_INTERLEAVE_CYCLIC, device->cap_mask) &&
> +	    !device->device_prep_interleaved_cyclic) {
> +		dev_err(device->dev,
> +			"Device claims capability %s, but op is not defined\n",
> +			"DMA_INTERLEAVE_CYCLIC");
> +		return -EIO;
> +	}
>  
>  	if (!device->device_tx_status) {
>  		dev_err(device->dev, "Device tx_status is not defined\n");
> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> index 8fcdee1c0cf9..e9af3bf835cb 100644
> --- a/include/linux/dmaengine.h
> +++ b/include/linux/dmaengine.h
> @@ -61,6 +61,7 @@ enum dma_transaction_type {
>  	DMA_SLAVE,
>  	DMA_CYCLIC,
>  	DMA_INTERLEAVE,
> +	DMA_INTERLEAVE_CYCLIC,
>  /* last transaction type for creation of the capabilities mask */
>  	DMA_TX_TYPE_END,
>  };
> @@ -701,6 +702,10 @@ struct dma_filter {
>   *	The function takes a buffer of size buf_len. The callback function will
>   *	be called after period_len bytes have been transferred.
>   * @device_prep_interleaved_dma: Transfer expression in a generic way.
> + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
> + *	This is similar to @device_prep_interleaved_dma, but the transfer is
> + *	repeated until a new transfer is issued. This transfer type is meant
> + *	for display.

I think capture (camera) is another potential beneficiary of this.

So you don't need to terminate the running interleaved_cyclic and start
a new one, but prepare and issue a new one, which would
terminate/replace the currently running cyclic interleaved DMA?

Can you also update the documentation at
Documentation/driver-api/dmaengine/client.rst

One more thing might be good to clarify for the interleaved_cyclic:
What is expected when DMA_PREP_INTERRUPT is set in the flags? The
client's callback is called for each completion of
dma_interleaved_template, right?

- Péter

>   * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address
>   * @device_config: Pushes a new configuration to a channel, return 0 or an error
>   *	code
> @@ -785,6 +790,9 @@ struct dma_device {
>  	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
>  		struct dma_chan *chan, struct dma_interleaved_template *xt,
>  		unsigned long flags);
> +	struct dma_async_tx_descriptor *(*device_prep_interleaved_cyclic)(
> +		struct dma_chan *chan, struct dma_interleaved_template *xt,
> +		unsigned long flags);
>  	struct dma_async_tx_descriptor *(*device_prep_dma_imm_data)(
>  		struct dma_chan *chan, dma_addr_t dst, u64 data,
>  		unsigned long flags);
> @@ -880,6 +888,16 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
>  	return chan->device->device_prep_interleaved_dma(chan, xt, flags);
>  }
>  
> +static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_cyclic(
> +		struct dma_chan *chan, struct dma_interleaved_template *xt,
> +		unsigned long flags)
> +{
> +	if (!chan || !chan->device || !chan->device->device_prep_interleaved_cyclic)
> +		return NULL;
> +
> +	return chan->device->device_prep_interleaved_cyclic(chan, xt, flags);
> +}
> +
>  static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset(
>  		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
>  		unsigned long flags)
> 

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Vinod Koul Jan. 23, 2020, 8:43 a.m. UTC | #2
On 23-01-20, 10:03, Peter Ujfalusi wrote:
> Hi Laurent,
> 
> On 23/01/2020 4.29, Laurent Pinchart wrote:
> > The new interleaved cyclic transaction type combines interleaved and
> > cycle transactions. It is designed for DMA engines that back display
> > controllers, where the same 2D frame needs to be output to the display
> > until a new frame is available.
> > 
> > Suggested-by: Vinod Koul <vkoul@kernel.org>
> > Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> > ---
> >  drivers/dma/dmaengine.c   |  8 +++++++-
> >  include/linux/dmaengine.h | 18 ++++++++++++++++++
> >  2 files changed, 25 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
> > index 03ac4b96117c..4ffb98a47f31 100644
> > --- a/drivers/dma/dmaengine.c
> > +++ b/drivers/dma/dmaengine.c
> > @@ -981,7 +981,13 @@ int dma_async_device_register(struct dma_device *device)
> >  			"DMA_INTERLEAVE");
> >  		return -EIO;
> >  	}
> > -
> > +	if (dma_has_cap(DMA_INTERLEAVE_CYCLIC, device->cap_mask) &&
> > +	    !device->device_prep_interleaved_cyclic) {
> > +		dev_err(device->dev,
> > +			"Device claims capability %s, but op is not defined\n",
> > +			"DMA_INTERLEAVE_CYCLIC");
> > +		return -EIO;
> > +	}
> >  
> >  	if (!device->device_tx_status) {
> >  		dev_err(device->dev, "Device tx_status is not defined\n");
> > diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> > index 8fcdee1c0cf9..e9af3bf835cb 100644
> > --- a/include/linux/dmaengine.h
> > +++ b/include/linux/dmaengine.h
> > @@ -61,6 +61,7 @@ enum dma_transaction_type {
> >  	DMA_SLAVE,
> >  	DMA_CYCLIC,
> >  	DMA_INTERLEAVE,
> > +	DMA_INTERLEAVE_CYCLIC,
> >  /* last transaction type for creation of the capabilities mask */
> >  	DMA_TX_TYPE_END,
> >  };
> > @@ -701,6 +702,10 @@ struct dma_filter {
> >   *	The function takes a buffer of size buf_len. The callback function will
> >   *	be called after period_len bytes have been transferred.
> >   * @device_prep_interleaved_dma: Transfer expression in a generic way.
> > + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
> > + *	This is similar to @device_prep_interleaved_dma, but the transfer is
> > + *	repeated until a new transfer is issued. This transfer type is meant
> > + *	for display.
> 
> I think capture (camera) is another potential beneficiary of this.
> 
> So you don't need to terminate the running interleaved_cyclic and start
> a new one, but prepare and issue a new one, which would
> terminate/replace the currently running cyclic interleaved DMA?

Why not explicitly terminate the transfer and start when a new one is
issued. That can be common usage for audio and display..
Peter Ujfalusi Jan. 23, 2020, 8:51 a.m. UTC | #3
Vinod,

On 23/01/2020 10.43, Vinod Koul wrote:
> On 23-01-20, 10:03, Peter Ujfalusi wrote:
>> Hi Laurent,
>>
>> On 23/01/2020 4.29, Laurent Pinchart wrote:
>>> The new interleaved cyclic transaction type combines interleaved and
>>> cycle transactions. It is designed for DMA engines that back display
>>> controllers, where the same 2D frame needs to be output to the display
>>> until a new frame is available.
>>>
>>> Suggested-by: Vinod Koul <vkoul@kernel.org>
>>> Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
>>> ---
>>>  drivers/dma/dmaengine.c   |  8 +++++++-
>>>  include/linux/dmaengine.h | 18 ++++++++++++++++++
>>>  2 files changed, 25 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
>>> index 03ac4b96117c..4ffb98a47f31 100644
>>> --- a/drivers/dma/dmaengine.c
>>> +++ b/drivers/dma/dmaengine.c
>>> @@ -981,7 +981,13 @@ int dma_async_device_register(struct dma_device *device)
>>>  			"DMA_INTERLEAVE");
>>>  		return -EIO;
>>>  	}
>>> -
>>> +	if (dma_has_cap(DMA_INTERLEAVE_CYCLIC, device->cap_mask) &&
>>> +	    !device->device_prep_interleaved_cyclic) {
>>> +		dev_err(device->dev,
>>> +			"Device claims capability %s, but op is not defined\n",
>>> +			"DMA_INTERLEAVE_CYCLIC");
>>> +		return -EIO;
>>> +	}
>>>  
>>>  	if (!device->device_tx_status) {
>>>  		dev_err(device->dev, "Device tx_status is not defined\n");
>>> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
>>> index 8fcdee1c0cf9..e9af3bf835cb 100644
>>> --- a/include/linux/dmaengine.h
>>> +++ b/include/linux/dmaengine.h
>>> @@ -61,6 +61,7 @@ enum dma_transaction_type {
>>>  	DMA_SLAVE,
>>>  	DMA_CYCLIC,
>>>  	DMA_INTERLEAVE,
>>> +	DMA_INTERLEAVE_CYCLIC,
>>>  /* last transaction type for creation of the capabilities mask */
>>>  	DMA_TX_TYPE_END,
>>>  };
>>> @@ -701,6 +702,10 @@ struct dma_filter {
>>>   *	The function takes a buffer of size buf_len. The callback function will
>>>   *	be called after period_len bytes have been transferred.
>>>   * @device_prep_interleaved_dma: Transfer expression in a generic way.
>>> + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
>>> + *	This is similar to @device_prep_interleaved_dma, but the transfer is
>>> + *	repeated until a new transfer is issued. This transfer type is meant
>>> + *	for display.
>>
>> I think capture (camera) is another potential beneficiary of this.
>>
>> So you don't need to terminate the running interleaved_cyclic and start
>> a new one, but prepare and issue a new one, which would
>> terminate/replace the currently running cyclic interleaved DMA?
> 
> Why not explicitly terminate the transfer and start when a new one is
> issued. That can be common usage for audio and display..

Yes, this is what I'm asking. The cyclic transfer is running and in
order to start the new transfer, the previous should stop. But in cyclic
case it is not going to happen unless it is terminated.

When one would want to have different interleaved transfer the display
(or capture )IP needs to be reconfigured as well. The the would need to
be terminated anyways to avoid interpreting data in a wrong way.

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Laurent Pinchart Jan. 23, 2020, 12:23 p.m. UTC | #4
Hello,

On Thu, Jan 23, 2020 at 10:51:42AM +0200, Peter Ujfalusi wrote:
> On 23/01/2020 10.43, Vinod Koul wrote:
> > On 23-01-20, 10:03, Peter Ujfalusi wrote:
> >> On 23/01/2020 4.29, Laurent Pinchart wrote:
> >>> The new interleaved cyclic transaction type combines interleaved and
> >>> cycle transactions. It is designed for DMA engines that back display
> >>> controllers, where the same 2D frame needs to be output to the display
> >>> until a new frame is available.
> >>>
> >>> Suggested-by: Vinod Koul <vkoul@kernel.org>
> >>> Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> >>> ---
> >>>  drivers/dma/dmaengine.c   |  8 +++++++-
> >>>  include/linux/dmaengine.h | 18 ++++++++++++++++++
> >>>  2 files changed, 25 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
> >>> index 03ac4b96117c..4ffb98a47f31 100644
> >>> --- a/drivers/dma/dmaengine.c
> >>> +++ b/drivers/dma/dmaengine.c
> >>> @@ -981,7 +981,13 @@ int dma_async_device_register(struct dma_device *device)
> >>>  			"DMA_INTERLEAVE");
> >>>  		return -EIO;
> >>>  	}
> >>> -
> >>> +	if (dma_has_cap(DMA_INTERLEAVE_CYCLIC, device->cap_mask) &&
> >>> +	    !device->device_prep_interleaved_cyclic) {
> >>> +		dev_err(device->dev,
> >>> +			"Device claims capability %s, but op is not defined\n",
> >>> +			"DMA_INTERLEAVE_CYCLIC");
> >>> +		return -EIO;
> >>> +	}
> >>>  
> >>>  	if (!device->device_tx_status) {
> >>>  		dev_err(device->dev, "Device tx_status is not defined\n");
> >>> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> >>> index 8fcdee1c0cf9..e9af3bf835cb 100644
> >>> --- a/include/linux/dmaengine.h
> >>> +++ b/include/linux/dmaengine.h
> >>> @@ -61,6 +61,7 @@ enum dma_transaction_type {
> >>>  	DMA_SLAVE,
> >>>  	DMA_CYCLIC,
> >>>  	DMA_INTERLEAVE,
> >>> +	DMA_INTERLEAVE_CYCLIC,
> >>>  /* last transaction type for creation of the capabilities mask */
> >>>  	DMA_TX_TYPE_END,
> >>>  };
> >>> @@ -701,6 +702,10 @@ struct dma_filter {
> >>>   *	The function takes a buffer of size buf_len. The callback function will
> >>>   *	be called after period_len bytes have been transferred.
> >>>   * @device_prep_interleaved_dma: Transfer expression in a generic way.
> >>> + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
> >>> + *	This is similar to @device_prep_interleaved_dma, but the transfer is
> >>> + *	repeated until a new transfer is issued. This transfer type is meant
> >>> + *	for display.
> >>
> >> I think capture (camera) is another potential beneficiary of this.

Possibly, although in the camera case I'd rather have the hardware stop
if there's no more buffer. Requiring a buffer to always be present is
annoying from a userspace point of view. For display it's different, if
userspace doesn't submit a new frame, the same frame should keep being
displayed on the screen.

> >> So you don't need to terminate the running interleaved_cyclic and start
> >> a new one, but prepare and issue a new one, which would
> >> terminate/replace the currently running cyclic interleaved DMA?

Correct.

> > Why not explicitly terminate the transfer and start when a new one is
> > issued. That can be common usage for audio and display..
> 
> Yes, this is what I'm asking. The cyclic transfer is running and in
> order to start the new transfer, the previous should stop. But in cyclic
> case it is not going to happen unless it is terminated.
> 
> When one would want to have different interleaved transfer the display
> (or capture )IP needs to be reconfigured as well. The the would need to
> be terminated anyways to avoid interpreting data in a wrong way.

The use case here is not to switch to a new configuration, but to switch
to a new buffer. If the transfer had to be terminated manually first,
the DMA engine would potentially miss a frame, which is not acceptable.
We need an atomic way to switch to the next transfer.
Vinod Koul Jan. 24, 2020, 6:10 a.m. UTC | #5
Hi Laurent,

On 23-01-20, 14:23, Laurent Pinchart wrote:
> > >>> @@ -701,6 +702,10 @@ struct dma_filter {
> > >>>   *	The function takes a buffer of size buf_len. The callback function will
> > >>>   *	be called after period_len bytes have been transferred.
> > >>>   * @device_prep_interleaved_dma: Transfer expression in a generic way.
> > >>> + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
> > >>> + *	This is similar to @device_prep_interleaved_dma, but the transfer is
> > >>> + *	repeated until a new transfer is issued. This transfer type is meant
> > >>> + *	for display.
> > >>
> > >> I think capture (camera) is another potential beneficiary of this.
> 
> Possibly, although in the camera case I'd rather have the hardware stop
> if there's no more buffer. Requiring a buffer to always be present is
> annoying from a userspace point of view. For display it's different, if
> userspace doesn't submit a new frame, the same frame should keep being
> displayed on the screen.
> 
> > >> So you don't need to terminate the running interleaved_cyclic and start
> > >> a new one, but prepare and issue a new one, which would
> > >> terminate/replace the currently running cyclic interleaved DMA?
> 
> Correct.
> 
> > > Why not explicitly terminate the transfer and start when a new one is
> > > issued. That can be common usage for audio and display..
> > 
> > Yes, this is what I'm asking. The cyclic transfer is running and in
> > order to start the new transfer, the previous should stop. But in cyclic
> > case it is not going to happen unless it is terminated.
> > 
> > When one would want to have different interleaved transfer the display
> > (or capture )IP needs to be reconfigured as well. The the would need to
> > be terminated anyways to avoid interpreting data in a wrong way.
> 
> The use case here is not to switch to a new configuration, but to switch
> to a new buffer. If the transfer had to be terminated manually first,
> the DMA engine would potentially miss a frame, which is not acceptable.
> We need an atomic way to switch to the next transfer.

So in this case you have, let's say a cyclic descriptor with N buffers
and they are cyclically capturing data and providing to client/user..

So why would you like to submit again...? Once whole capture has
completed you would terminate, right...

Sorry not able to wrap my head around why new submission is required and
if that is the case why previous one cant be terminated :)
Peter Ujfalusi Jan. 24, 2020, 7:20 a.m. UTC | #6
Hi Laurent,

On 23/01/2020 14.23, Laurent Pinchart wrote:
>>>> I think capture (camera) is another potential beneficiary of this.
> 
> Possibly, although in the camera case I'd rather have the hardware stop
> if there's no more buffer. Requiring a buffer to always be present is
> annoying from a userspace point of view. For display it's different, if
> userspace doesn't submit a new frame, the same frame should keep being
> displayed on the screen.
> 
>>>> So you don't need to terminate the running interleaved_cyclic and start
>>>> a new one, but prepare and issue a new one, which would
>>>> terminate/replace the currently running cyclic interleaved DMA?
> 
> Correct.
> 
>>> Why not explicitly terminate the transfer and start when a new one is
>>> issued. That can be common usage for audio and display..
>>
>> Yes, this is what I'm asking. The cyclic transfer is running and in
>> order to start the new transfer, the previous should stop. But in cyclic
>> case it is not going to happen unless it is terminated.
>>
>> When one would want to have different interleaved transfer the display
>> (or capture )IP needs to be reconfigured as well. The the would need to
>> be terminated anyways to avoid interpreting data in a wrong way.
> 
> The use case here is not to switch to a new configuration, but to switch
> to a new buffer. If the transfer had to be terminated manually first,
> the DMA engine would potentially miss a frame, which is not acceptable.
> We need an atomic way to switch to the next transfer.

You have a special hardware in hand, most DMAs can not just replace a
cyclic transfer in-flight and it also kind of violates the DMAengine
principles.
If cyclic transfer is started then it is expected to run forever until
it is terminated. Preparing and issuing a new transfer will not get
executed when there is already a cyclic transfer in flight as your only
option is to terminate_all, which will kill the running cyclic _and_
will discard the issued and pending transfers.

So the use case is page flip when you have multiple framebuffers and you
switch them to show the updated one, right?

There are things missing in DMAengine in API level for sure to do this,
imho.
The issue is that cyclic transfers will never complete, they run until
terminated, but you want to replace the currently executing one with a
another cyclic transfer without actually terminating the other.

It is like pause the 1st cyclic and continue with the 2nd one. Then at
some point you pause the 2nd one and restart the 1st one.
It is also crucial that the pause /switch happens when the executing one
finished the interleaved round and not in the middle somewhere, right?

If you:
desc_1 = dmaengine_prep_interleaved_cyclic(chan, );
cookie_1 = dmaengine_submit(desc_1);
desc_2 = dmaengine_prep_interleaved_cyclic(chan, );
cookie_2 = dmaengine_submit(desc_1);

/* cookie_1/desc_1 is started */
dma_async_issue_pending(chan);

/* When need to switch to cookie_2 */
dmaengine_cyclic_set_active_cookie(chan, cookie_2);
/*
 * cookie_1 execution is suspended after it finished the running
 * dma_interleaved_template or buffer in normal cyclic and cookie_2
 * is replacing it.
 */

/* Switch back to cookie_1 */
dmaengine_cyclic_set_active_cookie(chan, cookie_1);
/*
 * cookie_2 execution is suspended after it finished the running
 * dma_interleaved_template or buffer in normal cyclic and cookie_1
 * is replacing it.
 */

There should be a (yet another) capabilities flag got
cyclic_set_active_cookie and the documentation should be strict on what
is the expected behavior.

You can kill everything with terminate_all.
There is another thing which is missing imho from DMAengine: to
terminate a specific cookie, not the entire channel, which might be a
good addition as you might spawn framebuffers and then delete them and
you might want to release the corresponding cookie/descriptor as well.

What do you think?

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Peter Ujfalusi Jan. 24, 2020, 7:38 a.m. UTC | #7
On 24/01/2020 9.20, Peter Ujfalusi wrote:
> Hi Laurent,
> 
> On 23/01/2020 14.23, Laurent Pinchart wrote:
>>>>> I think capture (camera) is another potential beneficiary of this.
>>
>> Possibly, although in the camera case I'd rather have the hardware stop
>> if there's no more buffer. Requiring a buffer to always be present is
>> annoying from a userspace point of view. For display it's different, if
>> userspace doesn't submit a new frame, the same frame should keep being
>> displayed on the screen.
>>
>>>>> So you don't need to terminate the running interleaved_cyclic and start
>>>>> a new one, but prepare and issue a new one, which would
>>>>> terminate/replace the currently running cyclic interleaved DMA?
>>
>> Correct.
>>
>>>> Why not explicitly terminate the transfer and start when a new one is
>>>> issued. That can be common usage for audio and display..
>>>
>>> Yes, this is what I'm asking. The cyclic transfer is running and in
>>> order to start the new transfer, the previous should stop. But in cyclic
>>> case it is not going to happen unless it is terminated.
>>>
>>> When one would want to have different interleaved transfer the display
>>> (or capture )IP needs to be reconfigured as well. The the would need to
>>> be terminated anyways to avoid interpreting data in a wrong way.
>>
>> The use case here is not to switch to a new configuration, but to switch
>> to a new buffer. If the transfer had to be terminated manually first,
>> the DMA engine would potentially miss a frame, which is not acceptable.
>> We need an atomic way to switch to the next transfer.
> 
> You have a special hardware in hand, most DMAs can not just replace a
> cyclic transfer in-flight and it also kind of violates the DMAengine
> principles.

Is there any specific reason why you need DMAengine driver for a display
DMA? Usually the drm drivers handle their DMA internally.

> If cyclic transfer is started then it is expected to run forever until
> it is terminated. Preparing and issuing a new transfer will not get
> executed when there is already a cyclic transfer in flight as your only
> option is to terminate_all, which will kill the running cyclic _and_
> will discard the issued and pending transfers.
> 
> So the use case is page flip when you have multiple framebuffers and you
> switch them to show the updated one, right?
> 
> There are things missing in DMAengine in API level for sure to do this,
> imho.
> The issue is that cyclic transfers will never complete, they run until
> terminated, but you want to replace the currently executing one with a
> another cyclic transfer without actually terminating the other.
> 
> It is like pause the 1st cyclic and continue with the 2nd one. Then at
> some point you pause the 2nd one and restart the 1st one.
> It is also crucial that the pause /switch happens when the executing one
> finished the interleaved round and not in the middle somewhere, right?
> 
> If you:
> desc_1 = dmaengine_prep_interleaved_cyclic(chan, );
> cookie_1 = dmaengine_submit(desc_1);
> desc_2 = dmaengine_prep_interleaved_cyclic(chan, );
> cookie_2 = dmaengine_submit(desc_1);
> 
> /* cookie_1/desc_1 is started */
> dma_async_issue_pending(chan);
> 
> /* When need to switch to cookie_2 */
> dmaengine_cyclic_set_active_cookie(chan, cookie_2);
> /*
>  * cookie_1 execution is suspended after it finished the running
>  * dma_interleaved_template or buffer in normal cyclic and cookie_2
>  * is replacing it.
>  */
> 
> /* Switch back to cookie_1 */
> dmaengine_cyclic_set_active_cookie(chan, cookie_1);
> /*
>  * cookie_2 execution is suspended after it finished the running
>  * dma_interleaved_template or buffer in normal cyclic and cookie_1
>  * is replacing it.
>  */
> 
> There should be a (yet another) capabilities flag got
> cyclic_set_active_cookie and the documentation should be strict on what
> is the expected behavior.
> 
> You can kill everything with terminate_all.
> There is another thing which is missing imho from DMAengine: to
> terminate a specific cookie, not the entire channel, which might be a
> good addition as you might spawn framebuffers and then delete them and
> you might want to release the corresponding cookie/descriptor as well.

This is a bit trickier as DMAengine's cookie is s32 and internally
treated as a running number and cookie status is checked against s32
numbers with < >, I think this will not like when someone kills a cookie
in the middle.

> 
> What do you think?
> 
> - Péter
> 
> Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
> Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
> 

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Laurent Pinchart Jan. 24, 2020, 8:50 a.m. UTC | #8
On Fri, Jan 24, 2020 at 11:40:47AM +0530, Vinod Koul wrote:
> On 23-01-20, 14:23, Laurent Pinchart wrote:
> > > >>> @@ -701,6 +702,10 @@ struct dma_filter {
> > > >>>   *	The function takes a buffer of size buf_len. The callback function will
> > > >>>   *	be called after period_len bytes have been transferred.
> > > >>>   * @device_prep_interleaved_dma: Transfer expression in a generic way.
> > > >>> + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
> > > >>> + *	This is similar to @device_prep_interleaved_dma, but the transfer is
> > > >>> + *	repeated until a new transfer is issued. This transfer type is meant
> > > >>> + *	for display.
> > > >>
> > > >> I think capture (camera) is another potential beneficiary of this.
> > 
> > Possibly, although in the camera case I'd rather have the hardware stop
> > if there's no more buffer. Requiring a buffer to always be present is
> > annoying from a userspace point of view. For display it's different, if
> > userspace doesn't submit a new frame, the same frame should keep being
> > displayed on the screen.
> > 
> > > >> So you don't need to terminate the running interleaved_cyclic and start
> > > >> a new one, but prepare and issue a new one, which would
> > > >> terminate/replace the currently running cyclic interleaved DMA?
> > 
> > Correct.
> > 
> > > > Why not explicitly terminate the transfer and start when a new one is
> > > > issued. That can be common usage for audio and display..
> > > 
> > > Yes, this is what I'm asking. The cyclic transfer is running and in
> > > order to start the new transfer, the previous should stop. But in cyclic
> > > case it is not going to happen unless it is terminated.
> > > 
> > > When one would want to have different interleaved transfer the display
> > > (or capture )IP needs to be reconfigured as well. The the would need to
> > > be terminated anyways to avoid interpreting data in a wrong way.
> > 
> > The use case here is not to switch to a new configuration, but to switch
> > to a new buffer. If the transfer had to be terminated manually first,
> > the DMA engine would potentially miss a frame, which is not acceptable.
> > We need an atomic way to switch to the next transfer.
> 
> So in this case you have, let's say a cyclic descriptor with N buffers
> and they are cyclically capturing data and providing to client/user..

For the display case it's cyclic over a single buffer that is repeatedly
displayed over and over again until a new one replaces it, when
userspace wants to change the content on the screen. Userspace only has
to provide a new buffer when content changes, otherwise the display has
to keep displaying the same one.

For cameras I don't think cyclic makes too much sense, except when the
DMA engine can't work in single-shot mode and always requires a buffer
to write into. That shouldn't be the norm.

> So why would you like to submit again...? Once whole capture has
> completed you would terminate, right...
> 
> Sorry not able to wrap my head around why new submission is required and
> if that is the case why previous one cant be terminated :)
Laurent Pinchart Jan. 24, 2020, 8:56 a.m. UTC | #9
Hi Peter,

On Fri, Jan 24, 2020 at 09:20:15AM +0200, Peter Ujfalusi wrote:
> On 23/01/2020 14.23, Laurent Pinchart wrote:
> >>>> I think capture (camera) is another potential beneficiary of this.
> > 
> > Possibly, although in the camera case I'd rather have the hardware stop
> > if there's no more buffer. Requiring a buffer to always be present is
> > annoying from a userspace point of view. For display it's different, if
> > userspace doesn't submit a new frame, the same frame should keep being
> > displayed on the screen.
> > 
> >>>> So you don't need to terminate the running interleaved_cyclic and start
> >>>> a new one, but prepare and issue a new one, which would
> >>>> terminate/replace the currently running cyclic interleaved DMA?
> > 
> > Correct.
> > 
> >>> Why not explicitly terminate the transfer and start when a new one is
> >>> issued. That can be common usage for audio and display..
> >>
> >> Yes, this is what I'm asking. The cyclic transfer is running and in
> >> order to start the new transfer, the previous should stop. But in cyclic
> >> case it is not going to happen unless it is terminated.
> >>
> >> When one would want to have different interleaved transfer the display
> >> (or capture )IP needs to be reconfigured as well. The the would need to
> >> be terminated anyways to avoid interpreting data in a wrong way.
> > 
> > The use case here is not to switch to a new configuration, but to switch
> > to a new buffer. If the transfer had to be terminated manually first,
> > the DMA engine would potentially miss a frame, which is not acceptable.
> > We need an atomic way to switch to the next transfer.
> 
> You have a special hardware in hand, most DMAs can not just replace a
> cyclic transfer in-flight and it also kind of violates the DMAengine
> principles.

That's why cyclic support is optional :-)

> If cyclic transfer is started then it is expected to run forever until
> it is terminated. Preparing and issuing a new transfer will not get
> executed when there is already a cyclic transfer in flight as your only
> option is to terminate_all, which will kill the running cyclic _and_
> will discard the issued and pending transfers.

For the existing cyclic API, I could agree with that, although there's
very little documentation in the dmaengine subsystem to be used as an
authoritative source of information :-(

> So the use case is page flip when you have multiple framebuffers and you
> switch them to show the updated one, right?

Correct.

> There are things missing in DMAengine in API level for sure to do this,
> imho.
> The issue is that cyclic transfers will never complete, they run until
> terminated, but you want to replace the currently executing one with a
> another cyclic transfer without actually terminating the other.

Correct.

> It is like pause the 1st cyclic and continue with the 2nd one. Then at
> some point you pause the 2nd one and restart the 1st one.

No, after the 2nd one comes the 3rd one. It's not a double-buffering
case, it's really about replacing the buffer with another one,
regardless of where it comes from. Userspace may double-buffer, or
triple, or more.

> It is also crucial that the pause /switch happens when the executing one
> finished the interleaved round and not in the middle somewhere, right?

Yes. But that's not specific to this use case, with all non-cyclic
transfers submitting a new transfer request doesn't stop the ongoing
transfer (if any) immediately, it just queues the new transfer for
processing.

> If you:
> desc_1 = dmaengine_prep_interleaved_cyclic(chan, );
> cookie_1 = dmaengine_submit(desc_1);
> desc_2 = dmaengine_prep_interleaved_cyclic(chan, );
> cookie_2 = dmaengine_submit(desc_1);
> 
> /* cookie_1/desc_1 is started */
> dma_async_issue_pending(chan);
> 
> /* When need to switch to cookie_2 */
> dmaengine_cyclic_set_active_cookie(chan, cookie_2);
> /*
>  * cookie_1 execution is suspended after it finished the running
>  * dma_interleaved_template or buffer in normal cyclic and cookie_2
>  * is replacing it.
>  */
> 
> /* Switch back to cookie_1 */
> dmaengine_cyclic_set_active_cookie(chan, cookie_1);
> /*
>  * cookie_2 execution is suspended after it finished the running
>  * dma_interleaved_template or buffer in normal cyclic and cookie_1
>  * is replacing it.
>  */

As explained above, I don't want to switch back to a previous transfer,
I always want a new one. I don't see why we would need this kind of API
when we can just define that any queued interleaved transfer, whether
cyclic or not, is just queued and replaces the ongoing transfer at the
next frame boundary. Drivers don't have to implement the new API if the
hardware doesn't possess this capability.

> There should be a (yet another) capabilities flag got
> cyclic_set_active_cookie and the documentation should be strict on what
> is the expected behavior.
> 
> You can kill everything with terminate_all.
> There is another thing which is missing imho from DMAengine: to
> terminate a specific cookie, not the entire channel, which might be a
> good addition as you might spawn framebuffers and then delete them and
> you might want to release the corresponding cookie/descriptor as well.
> 
> What do you think?

I think it's overcomplicated for this use case :-)
Laurent Pinchart Jan. 24, 2020, 8:58 a.m. UTC | #10
Hi Peter,

On Fri, Jan 24, 2020 at 09:38:50AM +0200, Peter Ujfalusi wrote:
> On 24/01/2020 9.20, Peter Ujfalusi wrote:
> > On 23/01/2020 14.23, Laurent Pinchart wrote:
> >>>>> I think capture (camera) is another potential beneficiary of this.
> >>
> >> Possibly, although in the camera case I'd rather have the hardware stop
> >> if there's no more buffer. Requiring a buffer to always be present is
> >> annoying from a userspace point of view. For display it's different, if
> >> userspace doesn't submit a new frame, the same frame should keep being
> >> displayed on the screen.
> >>
> >>>>> So you don't need to terminate the running interleaved_cyclic and start
> >>>>> a new one, but prepare and issue a new one, which would
> >>>>> terminate/replace the currently running cyclic interleaved DMA?
> >>
> >> Correct.
> >>
> >>>> Why not explicitly terminate the transfer and start when a new one is
> >>>> issued. That can be common usage for audio and display..
> >>>
> >>> Yes, this is what I'm asking. The cyclic transfer is running and in
> >>> order to start the new transfer, the previous should stop. But in cyclic
> >>> case it is not going to happen unless it is terminated.
> >>>
> >>> When one would want to have different interleaved transfer the display
> >>> (or capture )IP needs to be reconfigured as well. The the would need to
> >>> be terminated anyways to avoid interpreting data in a wrong way.
> >>
> >> The use case here is not to switch to a new configuration, but to switch
> >> to a new buffer. If the transfer had to be terminated manually first,
> >> the DMA engine would potentially miss a frame, which is not acceptable.
> >> We need an atomic way to switch to the next transfer.
> > 
> > You have a special hardware in hand, most DMAs can not just replace a
> > cyclic transfer in-flight and it also kind of violates the DMAengine
> > principles.
> 
> Is there any specific reason why you need DMAengine driver for a display
> DMA? Usually the drm drivers handle their DMA internally.

Because it's a separate IP core that can be reused in different FPGAs
for different purposes. It happens that in my case it's a hard IP
connected to a display controller, but it could be used for non-cyclic
use cases in a different chip.

> > If cyclic transfer is started then it is expected to run forever until
> > it is terminated. Preparing and issuing a new transfer will not get
> > executed when there is already a cyclic transfer in flight as your only
> > option is to terminate_all, which will kill the running cyclic _and_
> > will discard the issued and pending transfers.
> > 
> > So the use case is page flip when you have multiple framebuffers and you
> > switch them to show the updated one, right?
> > 
> > There are things missing in DMAengine in API level for sure to do this,
> > imho.
> > The issue is that cyclic transfers will never complete, they run until
> > terminated, but you want to replace the currently executing one with a
> > another cyclic transfer without actually terminating the other.
> > 
> > It is like pause the 1st cyclic and continue with the 2nd one. Then at
> > some point you pause the 2nd one and restart the 1st one.
> > It is also crucial that the pause /switch happens when the executing one
> > finished the interleaved round and not in the middle somewhere, right?
> > 
> > If you:
> > desc_1 = dmaengine_prep_interleaved_cyclic(chan, );
> > cookie_1 = dmaengine_submit(desc_1);
> > desc_2 = dmaengine_prep_interleaved_cyclic(chan, );
> > cookie_2 = dmaengine_submit(desc_1);
> > 
> > /* cookie_1/desc_1 is started */
> > dma_async_issue_pending(chan);
> > 
> > /* When need to switch to cookie_2 */
> > dmaengine_cyclic_set_active_cookie(chan, cookie_2);
> > /*
> >  * cookie_1 execution is suspended after it finished the running
> >  * dma_interleaved_template or buffer in normal cyclic and cookie_2
> >  * is replacing it.
> >  */
> > 
> > /* Switch back to cookie_1 */
> > dmaengine_cyclic_set_active_cookie(chan, cookie_1);
> > /*
> >  * cookie_2 execution is suspended after it finished the running
> >  * dma_interleaved_template or buffer in normal cyclic and cookie_1
> >  * is replacing it.
> >  */
> > 
> > There should be a (yet another) capabilities flag got
> > cyclic_set_active_cookie and the documentation should be strict on what
> > is the expected behavior.
> > 
> > You can kill everything with terminate_all.
> > There is another thing which is missing imho from DMAengine: to
> > terminate a specific cookie, not the entire channel, which might be a
> > good addition as you might spawn framebuffers and then delete them and
> > you might want to release the corresponding cookie/descriptor as well.
> 
> This is a bit trickier as DMAengine's cookie is s32 and internally
> treated as a running number and cookie status is checked against s32
> numbers with < >, I think this will not like when someone kills a cookie
> in the middle.

I would require a major redesign, yes. Not looking forward to that,
especially as I think we don't need it.

> > What do you think?
Laurent Pinchart Feb. 10, 2020, 2:06 p.m. UTC | #11
Hi Vinod,

On Fri, Jan 24, 2020 at 10:50:51AM +0200, Laurent Pinchart wrote:
> On Fri, Jan 24, 2020 at 11:40:47AM +0530, Vinod Koul wrote:
> > On 23-01-20, 14:23, Laurent Pinchart wrote:
> >>>>>> @@ -701,6 +702,10 @@ struct dma_filter {
> >>>>>>   *	The function takes a buffer of size buf_len. The callback function will
> >>>>>>   *	be called after period_len bytes have been transferred.
> >>>>>>   * @device_prep_interleaved_dma: Transfer expression in a generic way.
> >>>>>> + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
> >>>>>> + *	This is similar to @device_prep_interleaved_dma, but the transfer is
> >>>>>> + *	repeated until a new transfer is issued. This transfer type is meant
> >>>>>> + *	for display.
> >>>>>
> >>>>> I think capture (camera) is another potential beneficiary of this.
> >> 
> >> Possibly, although in the camera case I'd rather have the hardware stop
> >> if there's no more buffer. Requiring a buffer to always be present is
> >> annoying from a userspace point of view. For display it's different, if
> >> userspace doesn't submit a new frame, the same frame should keep being
> >> displayed on the screen.
> >> 
> >>>>> So you don't need to terminate the running interleaved_cyclic and start
> >>>>> a new one, but prepare and issue a new one, which would
> >>>>> terminate/replace the currently running cyclic interleaved DMA?
> >> 
> >> Correct.
> >> 
> >>>> Why not explicitly terminate the transfer and start when a new one is
> >>>> issued. That can be common usage for audio and display..
> >>> 
> >>> Yes, this is what I'm asking. The cyclic transfer is running and in
> >>> order to start the new transfer, the previous should stop. But in cyclic
> >>> case it is not going to happen unless it is terminated.
> >>> 
> >>> When one would want to have different interleaved transfer the display
> >>> (or capture )IP needs to be reconfigured as well. The the would need to
> >>> be terminated anyways to avoid interpreting data in a wrong way.
> >> 
> >> The use case here is not to switch to a new configuration, but to switch
> >> to a new buffer. If the transfer had to be terminated manually first,
> >> the DMA engine would potentially miss a frame, which is not acceptable.
> >> We need an atomic way to switch to the next transfer.
> > 
> > So in this case you have, let's say a cyclic descriptor with N buffers
> > and they are cyclically capturing data and providing to client/user..
> 
> For the display case it's cyclic over a single buffer that is repeatedly
> displayed over and over again until a new one replaces it, when
> userspace wants to change the content on the screen. Userspace only has
> to provide a new buffer when content changes, otherwise the display has
> to keep displaying the same one.

Is the use case clear enough, or do you need more information ? Are you
fine with the API for this kind of use case ?

> For cameras I don't think cyclic makes too much sense, except when the
> DMA engine can't work in single-shot mode and always requires a buffer
> to write into. That shouldn't be the norm.
> 
> > So why would you like to submit again...? Once whole capture has
> > completed you would terminate, right...
> > 
> > Sorry not able to wrap my head around why new submission is required and
> > if that is the case why previous one cant be terminated :)
Vinod Koul Feb. 13, 2020, 1:29 p.m. UTC | #12
Hi Laurent,

On 10-02-20, 16:06, Laurent Pinchart wrote:

> > >> The use case here is not to switch to a new configuration, but to switch
> > >> to a new buffer. If the transfer had to be terminated manually first,
> > >> the DMA engine would potentially miss a frame, which is not acceptable.
> > >> We need an atomic way to switch to the next transfer.
> > > 
> > > So in this case you have, let's say a cyclic descriptor with N buffers
> > > and they are cyclically capturing data and providing to client/user..
> > 
> > For the display case it's cyclic over a single buffer that is repeatedly
> > displayed over and over again until a new one replaces it, when
> > userspace wants to change the content on the screen. Userspace only has
> > to provide a new buffer when content changes, otherwise the display has
> > to keep displaying the same one.
> 
> Is the use case clear enough, or do you need more information ? Are you
> fine with the API for this kind of use case ?

So we *know* when a new buffer is being used?

IOW would it be possible for display (rather a dmaengine facing display wrapper) to detect that we are reusing an
old buffer and keep the cyclic and once detected prepare a new
descriptor, submit a new one and then terminate old one which should
trigger next transaction to be submitted

Would that make sense here?
Laurent Pinchart Feb. 13, 2020, 1:48 p.m. UTC | #13
Hi Vinod,

On Thu, Feb 13, 2020 at 06:59:38PM +0530, Vinod Koul wrote:
> On 10-02-20, 16:06, Laurent Pinchart wrote:
> 
> > > >> The use case here is not to switch to a new configuration, but to switch
> > > >> to a new buffer. If the transfer had to be terminated manually first,
> > > >> the DMA engine would potentially miss a frame, which is not acceptable.
> > > >> We need an atomic way to switch to the next transfer.
> > > > 
> > > > So in this case you have, let's say a cyclic descriptor with N buffers
> > > > and they are cyclically capturing data and providing to client/user..
> > > 
> > > For the display case it's cyclic over a single buffer that is repeatedly
> > > displayed over and over again until a new one replaces it, when
> > > userspace wants to change the content on the screen. Userspace only has
> > > to provide a new buffer when content changes, otherwise the display has
> > > to keep displaying the same one.
> > 
> > Is the use case clear enough, or do you need more information ? Are you
> > fine with the API for this kind of use case ?
> 
> So we *know* when a new buffer is being used?

The user of the DMA engine (the DRM DPSUB driver in this case) knows
when a new buffer needs to be used, as it receives it from userspace. In
response, it prepares a new interleaved cyclic transaction and queues
it. At the next IRQ, the DMA engine driver switches to the new
transaction (the implementation is slightly more complex to handle race
conditions, but that's the idea).

> IOW would it be possible for display (rather a dmaengine facing
> display wrapper) to detect that we are reusing an old buffer and keep
> the cyclic and once detected prepare a new descriptor, submit a new
> one and then terminate old one which should trigger next transaction
> to be submitted

I'm not sure to follow you. Do you mean that the display driver should
submit a non-cyclic transaction for every frame, reusing the same buffer
for every transaction, until a new buffer is available ? The issue with
this is that if the CPU load gets high, we may miss a frame, and the
display will break. The DPDMA hardware implements cyclic support for
this reason, and we want to use that feature to comply with the real
time requirements.

If you meant something else, could you please elaborate ?

> Would that make sense here?
Vinod Koul Feb. 13, 2020, 2:07 p.m. UTC | #14
On 13-02-20, 15:48, Laurent Pinchart wrote:
> Hi Vinod,
> 
> On Thu, Feb 13, 2020 at 06:59:38PM +0530, Vinod Koul wrote:
> > On 10-02-20, 16:06, Laurent Pinchart wrote:
> > 
> > > > >> The use case here is not to switch to a new configuration, but to switch
> > > > >> to a new buffer. If the transfer had to be terminated manually first,
> > > > >> the DMA engine would potentially miss a frame, which is not acceptable.
> > > > >> We need an atomic way to switch to the next transfer.
> > > > > 
> > > > > So in this case you have, let's say a cyclic descriptor with N buffers
> > > > > and they are cyclically capturing data and providing to client/user..
> > > > 
> > > > For the display case it's cyclic over a single buffer that is repeatedly
> > > > displayed over and over again until a new one replaces it, when
> > > > userspace wants to change the content on the screen. Userspace only has
> > > > to provide a new buffer when content changes, otherwise the display has
> > > > to keep displaying the same one.
> > > 
> > > Is the use case clear enough, or do you need more information ? Are you
> > > fine with the API for this kind of use case ?
> > 
> > So we *know* when a new buffer is being used?
> 
> The user of the DMA engine (the DRM DPSUB driver in this case) knows
> when a new buffer needs to be used, as it receives it from userspace. In
> response, it prepares a new interleaved cyclic transaction and queues
> it. At the next IRQ, the DMA engine driver switches to the new
> transaction (the implementation is slightly more complex to handle race
> conditions, but that's the idea).
> 
> > IOW would it be possible for display (rather a dmaengine facing
> > display wrapper) to detect that we are reusing an old buffer and keep
> > the cyclic and once detected prepare a new descriptor, submit a new
> > one and then terminate old one which should trigger next transaction
> > to be submitted
> 
> I'm not sure to follow you. Do you mean that the display driver should
> submit a non-cyclic transaction for every frame, reusing the same buffer
> for every transaction, until a new buffer is available ? The issue with
> this is that if the CPU load gets high, we may miss a frame, and the
> display will break. The DPDMA hardware implements cyclic support for
> this reason, and we want to use that feature to comply with the real
> time requirements.

Sorry to cause confusion :) I mean cyclic

So, DRM DPSUB get first buffer
A.1 Prepare cyclic interleave txn
A.2 Submit the txn (it doesn't start here)
A.3 Invoke issue_pending (that starts the txn)

DRM DPSUB gets next buffer:
B.1 Prepare cyclic interleave txn
B.2 Submit the txn
B.3 Call terminate for current cyclic txn (we need an updated terminate
which terminates the current txn, right now we have terminate_all which
is a sledge hammer approach)
B.4 Next txn would start once current one is started

Does this help and make sense in your case

Thanks
Peter Ujfalusi Feb. 13, 2020, 2:15 p.m. UTC | #15
Hi Vinod, Laurent,

On 13/02/2020 16.07, Vinod Koul wrote:
> On 13-02-20, 15:48, Laurent Pinchart wrote:
>> Hi Vinod,
>>
>> On Thu, Feb 13, 2020 at 06:59:38PM +0530, Vinod Koul wrote:
>>> On 10-02-20, 16:06, Laurent Pinchart wrote:
>>>
>>>>>>> The use case here is not to switch to a new configuration, but to switch
>>>>>>> to a new buffer. If the transfer had to be terminated manually first,
>>>>>>> the DMA engine would potentially miss a frame, which is not acceptable.
>>>>>>> We need an atomic way to switch to the next transfer.
>>>>>>
>>>>>> So in this case you have, let's say a cyclic descriptor with N buffers
>>>>>> and they are cyclically capturing data and providing to client/user..
>>>>>
>>>>> For the display case it's cyclic over a single buffer that is repeatedly
>>>>> displayed over and over again until a new one replaces it, when
>>>>> userspace wants to change the content on the screen. Userspace only has
>>>>> to provide a new buffer when content changes, otherwise the display has
>>>>> to keep displaying the same one.
>>>>
>>>> Is the use case clear enough, or do you need more information ? Are you
>>>> fine with the API for this kind of use case ?
>>>
>>> So we *know* when a new buffer is being used?
>>
>> The user of the DMA engine (the DRM DPSUB driver in this case) knows
>> when a new buffer needs to be used, as it receives it from userspace. In
>> response, it prepares a new interleaved cyclic transaction and queues
>> it. At the next IRQ, the DMA engine driver switches to the new
>> transaction (the implementation is slightly more complex to handle race
>> conditions, but that's the idea).
>>
>>> IOW would it be possible for display (rather a dmaengine facing
>>> display wrapper) to detect that we are reusing an old buffer and keep
>>> the cyclic and once detected prepare a new descriptor, submit a new
>>> one and then terminate old one which should trigger next transaction
>>> to be submitted
>>
>> I'm not sure to follow you. Do you mean that the display driver should
>> submit a non-cyclic transaction for every frame, reusing the same buffer
>> for every transaction, until a new buffer is available ? The issue with
>> this is that if the CPU load gets high, we may miss a frame, and the
>> display will break. The DPDMA hardware implements cyclic support for
>> this reason, and we want to use that feature to comply with the real
>> time requirements.
> 
> Sorry to cause confusion :) I mean cyclic
> 
> So, DRM DPSUB get first buffer
> A.1 Prepare cyclic interleave txn
> A.2 Submit the txn (it doesn't start here)
> A.3 Invoke issue_pending (that starts the txn)
> 
> DRM DPSUB gets next buffer:
> B.1 Prepare cyclic interleave txn
> B.2 Submit the txn
> B.3 Call terminate for current cyclic txn (we need an updated terminate
> which terminates the current txn, right now we have terminate_all which
> is a sledge hammer approach)
> B.4 Next txn would start once current one is started
> 
> Does this help and make sense in your case

That would be a clean way to handle it. We were missing this API for a
long time to be able to cancel the ongoing transfer (whether it is
cyclic or slave_sg, or memcpy) and move to the next one if there is one
pending.

+1 from me if it counts ;)

> 
> Thanks
> 

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Laurent Pinchart Feb. 13, 2020, 4:52 p.m. UTC | #16
Hi Vinod and Peter,

On Thu, Feb 13, 2020 at 04:15:38PM +0200, Peter Ujfalusi wrote:
> On 13/02/2020 16.07, Vinod Koul wrote:
> > On 13-02-20, 15:48, Laurent Pinchart wrote:
> >> On Thu, Feb 13, 2020 at 06:59:38PM +0530, Vinod Koul wrote:
> >>> On 10-02-20, 16:06, Laurent Pinchart wrote:
> >>>
> >>>>>>> The use case here is not to switch to a new configuration, but to switch
> >>>>>>> to a new buffer. If the transfer had to be terminated manually first,
> >>>>>>> the DMA engine would potentially miss a frame, which is not acceptable.
> >>>>>>> We need an atomic way to switch to the next transfer.
> >>>>>>
> >>>>>> So in this case you have, let's say a cyclic descriptor with N buffers
> >>>>>> and they are cyclically capturing data and providing to client/user..
> >>>>>
> >>>>> For the display case it's cyclic over a single buffer that is repeatedly
> >>>>> displayed over and over again until a new one replaces it, when
> >>>>> userspace wants to change the content on the screen. Userspace only has
> >>>>> to provide a new buffer when content changes, otherwise the display has
> >>>>> to keep displaying the same one.
> >>>>
> >>>> Is the use case clear enough, or do you need more information ? Are you
> >>>> fine with the API for this kind of use case ?
> >>>
> >>> So we *know* when a new buffer is being used?
> >>
> >> The user of the DMA engine (the DRM DPSUB driver in this case) knows
> >> when a new buffer needs to be used, as it receives it from userspace. In
> >> response, it prepares a new interleaved cyclic transaction and queues
> >> it. At the next IRQ, the DMA engine driver switches to the new
> >> transaction (the implementation is slightly more complex to handle race
> >> conditions, but that's the idea).
> >>
> >>> IOW would it be possible for display (rather a dmaengine facing
> >>> display wrapper) to detect that we are reusing an old buffer and keep
> >>> the cyclic and once detected prepare a new descriptor, submit a new
> >>> one and then terminate old one which should trigger next transaction
> >>> to be submitted
> >>
> >> I'm not sure to follow you. Do you mean that the display driver should
> >> submit a non-cyclic transaction for every frame, reusing the same buffer
> >> for every transaction, until a new buffer is available ? The issue with
> >> this is that if the CPU load gets high, we may miss a frame, and the
> >> display will break. The DPDMA hardware implements cyclic support for
> >> this reason, and we want to use that feature to comply with the real
> >> time requirements.
> > 
> > Sorry to cause confusion :) I mean cyclic
> > 
> > So, DRM DPSUB get first buffer
> > A.1 Prepare cyclic interleave txn
> > A.2 Submit the txn (it doesn't start here)
> > A.3 Invoke issue_pending (that starts the txn)

I assume that, at this point, the transfer is started, and repeated
forever until step B below, right ?

> > DRM DPSUB gets next buffer:
> > B.1 Prepare cyclic interleave txn
> > B.2 Submit the txn
> > B.3 Call terminate for current cyclic txn (we need an updated terminate
> > which terminates the current txn, right now we have terminate_all which
> > is a sledge hammer approach)
> > B.4 Next txn would start once current one is started

Do you mean "once current one is completed" ?

> > Does this help and make sense in your case

It does, but I really wonder why we need a new terminate operation that
would terminate a single transfer. If we call issue_pending at step B.3,
when the new txn submitted, we can terminate the current transfer at the
point. It changes the semantics of issue_pending, but only for cyclic
transfers (this whole discussions it only about cyclic transfers). As a
cyclic transfer will be repeated forever until terminated, there's no
use case for issuing a new transfer without terminating the one in
progress. I thus don't think we need a new terminate operation: the only
thing that makes sense to do when submitting a new cyclic transfer is to
terminate the current one and switch to the new one, and we already have
all the APIs we need to enable this behaviour.

> That would be a clean way to handle it. We were missing this API for a
> long time to be able to cancel the ongoing transfer (whether it is
> cyclic or slave_sg, or memcpy) and move to the next one if there is one
> pending.

Note that this new terminate API wouldn't terminate the ongoing transfer
immediately, it would complete first, until the end of the cycle for
cyclic transfers, and until the end of the whole transfer otherwise.
This new operation would thus essentially be a no-op for non-cyclic
transfers. I don't see how it would help :-) Do you have any particular
use case in mind ?

> +1 from me if it counts ;)
Vinod Koul Feb. 14, 2020, 4:23 a.m. UTC | #17
On 13-02-20, 18:52, Laurent Pinchart wrote:
> Hi Vinod and Peter,
> 
> On Thu, Feb 13, 2020 at 04:15:38PM +0200, Peter Ujfalusi wrote:
> > On 13/02/2020 16.07, Vinod Koul wrote:
> > > On 13-02-20, 15:48, Laurent Pinchart wrote:
> > >> On Thu, Feb 13, 2020 at 06:59:38PM +0530, Vinod Koul wrote:
> > >>> On 10-02-20, 16:06, Laurent Pinchart wrote:
> > >>>
> > >>>>>>> The use case here is not to switch to a new configuration, but to switch
> > >>>>>>> to a new buffer. If the transfer had to be terminated manually first,
> > >>>>>>> the DMA engine would potentially miss a frame, which is not acceptable.
> > >>>>>>> We need an atomic way to switch to the next transfer.
> > >>>>>>
> > >>>>>> So in this case you have, let's say a cyclic descriptor with N buffers
> > >>>>>> and they are cyclically capturing data and providing to client/user..
> > >>>>>
> > >>>>> For the display case it's cyclic over a single buffer that is repeatedly
> > >>>>> displayed over and over again until a new one replaces it, when
> > >>>>> userspace wants to change the content on the screen. Userspace only has
> > >>>>> to provide a new buffer when content changes, otherwise the display has
> > >>>>> to keep displaying the same one.
> > >>>>
> > >>>> Is the use case clear enough, or do you need more information ? Are you
> > >>>> fine with the API for this kind of use case ?
> > >>>
> > >>> So we *know* when a new buffer is being used?
> > >>
> > >> The user of the DMA engine (the DRM DPSUB driver in this case) knows
> > >> when a new buffer needs to be used, as it receives it from userspace. In
> > >> response, it prepares a new interleaved cyclic transaction and queues
> > >> it. At the next IRQ, the DMA engine driver switches to the new
> > >> transaction (the implementation is slightly more complex to handle race
> > >> conditions, but that's the idea).
> > >>
> > >>> IOW would it be possible for display (rather a dmaengine facing
> > >>> display wrapper) to detect that we are reusing an old buffer and keep
> > >>> the cyclic and once detected prepare a new descriptor, submit a new
> > >>> one and then terminate old one which should trigger next transaction
> > >>> to be submitted
> > >>
> > >> I'm not sure to follow you. Do you mean that the display driver should
> > >> submit a non-cyclic transaction for every frame, reusing the same buffer
> > >> for every transaction, until a new buffer is available ? The issue with
> > >> this is that if the CPU load gets high, we may miss a frame, and the
> > >> display will break. The DPDMA hardware implements cyclic support for
> > >> this reason, and we want to use that feature to comply with the real
> > >> time requirements.
> > > 
> > > Sorry to cause confusion :) I mean cyclic
> > > 
> > > So, DRM DPSUB get first buffer
> > > A.1 Prepare cyclic interleave txn
> > > A.2 Submit the txn (it doesn't start here)
> > > A.3 Invoke issue_pending (that starts the txn)
> 
> I assume that, at this point, the transfer is started, and repeated
> forever until step B below, right ?

Right, since the transaction is cyclic in nature, the transaction will continue
until stopped or switched :)

> > > DRM DPSUB gets next buffer:
> > > B.1 Prepare cyclic interleave txn
> > > B.2 Submit the txn
> > > B.3 Call terminate for current cyclic txn (we need an updated terminate
> > > which terminates the current txn, right now we have terminate_all which
> > > is a sledge hammer approach)
> > > B.4 Next txn would start once current one is started
> 
> Do you mean "once current one is completed" ?

Yup, sorry for the typo!

> > > Does this help and make sense in your case
> 
> It does, but I really wonder why we need a new terminate operation that
> would terminate a single transfer. If we call issue_pending at step B.3,
> when the new txn submitted, we can terminate the current transfer at the
> point. It changes the semantics of issue_pending, but only for cyclic
> transfers (this whole discussions it only about cyclic transfers). As a
> cyclic transfer will be repeated forever until terminated, there's no
> use case for issuing a new transfer without terminating the one in
> progress. I thus don't think we need a new terminate operation: the only
> thing that makes sense to do when submitting a new cyclic transfer is to
> terminate the current one and switch to the new one, and we already have
> all the APIs we need to enable this behaviour.

The issue_pending() is a NOP when engine is already running.

The design of APIs is that we submit a txn to pending_list and then the
pending_list is started when issue_pending() is called.
Or if the engine is already running, it will take next txn from
pending_list() when current txn completes.

The only consideration here in this case is that the cyclic txn never
completes. Do we really treat a new txn submission as an 'indication' of
completeness? That is indeed a point to ponder upon.

Also, we need to keep in mind that the dmaengine wont stop a cyclic
txn. It would be running and start next transfer (in this case do
from start) while it also gives you an interrupt. Here we would be
required to stop it and then start a new one...

Or perhaps remove the cyclic setting from the txn when a new one
arrives and that behaviour IMO is controller dependent, not sure if
all controllers support it..

> > That would be a clean way to handle it. We were missing this API for a
> > long time to be able to cancel the ongoing transfer (whether it is
> > cyclic or slave_sg, or memcpy) and move to the next one if there is one
> > pending.
> 
> Note that this new terminate API wouldn't terminate the ongoing transfer
> immediately, it would complete first, until the end of the cycle for
> cyclic transfers, and until the end of the whole transfer otherwise.
> This new operation would thus essentially be a no-op for non-cyclic
> transfers. I don't see how it would help :-) Do you have any particular
> use case in mind ?

Yeah that is something more to think about. Do we really abort here or
wait for the txn to complete. I think Peter needs the former and your
falls in the latter category

Thanks
Laurent Pinchart Feb. 14, 2020, 4:22 p.m. UTC | #18
Hi Vinod,

On Fri, Feb 14, 2020 at 09:53:49AM +0530, Vinod Koul wrote:
> On 13-02-20, 18:52, Laurent Pinchart wrote:
> > On Thu, Feb 13, 2020 at 04:15:38PM +0200, Peter Ujfalusi wrote:
> > > On 13/02/2020 16.07, Vinod Koul wrote:
> > > > On 13-02-20, 15:48, Laurent Pinchart wrote:
> > > >> On Thu, Feb 13, 2020 at 06:59:38PM +0530, Vinod Koul wrote:
> > > >>> On 10-02-20, 16:06, Laurent Pinchart wrote:
> > > >>>
> > > >>>>>>> The use case here is not to switch to a new configuration, but to switch
> > > >>>>>>> to a new buffer. If the transfer had to be terminated manually first,
> > > >>>>>>> the DMA engine would potentially miss a frame, which is not acceptable.
> > > >>>>>>> We need an atomic way to switch to the next transfer.
> > > >>>>>>
> > > >>>>>> So in this case you have, let's say a cyclic descriptor with N buffers
> > > >>>>>> and they are cyclically capturing data and providing to client/user..
> > > >>>>>
> > > >>>>> For the display case it's cyclic over a single buffer that is repeatedly
> > > >>>>> displayed over and over again until a new one replaces it, when
> > > >>>>> userspace wants to change the content on the screen. Userspace only has
> > > >>>>> to provide a new buffer when content changes, otherwise the display has
> > > >>>>> to keep displaying the same one.
> > > >>>>
> > > >>>> Is the use case clear enough, or do you need more information ? Are you
> > > >>>> fine with the API for this kind of use case ?
> > > >>>
> > > >>> So we *know* when a new buffer is being used?
> > > >>
> > > >> The user of the DMA engine (the DRM DPSUB driver in this case) knows
> > > >> when a new buffer needs to be used, as it receives it from userspace. In
> > > >> response, it prepares a new interleaved cyclic transaction and queues
> > > >> it. At the next IRQ, the DMA engine driver switches to the new
> > > >> transaction (the implementation is slightly more complex to handle race
> > > >> conditions, but that's the idea).
> > > >>
> > > >>> IOW would it be possible for display (rather a dmaengine facing
> > > >>> display wrapper) to detect that we are reusing an old buffer and keep
> > > >>> the cyclic and once detected prepare a new descriptor, submit a new
> > > >>> one and then terminate old one which should trigger next transaction
> > > >>> to be submitted
> > > >>
> > > >> I'm not sure to follow you. Do you mean that the display driver should
> > > >> submit a non-cyclic transaction for every frame, reusing the same buffer
> > > >> for every transaction, until a new buffer is available ? The issue with
> > > >> this is that if the CPU load gets high, we may miss a frame, and the
> > > >> display will break. The DPDMA hardware implements cyclic support for
> > > >> this reason, and we want to use that feature to comply with the real
> > > >> time requirements.
> > > > 
> > > > Sorry to cause confusion :) I mean cyclic
> > > > 
> > > > So, DRM DPSUB get first buffer
> > > > A.1 Prepare cyclic interleave txn
> > > > A.2 Submit the txn (it doesn't start here)
> > > > A.3 Invoke issue_pending (that starts the txn)
> > 
> > I assume that, at this point, the transfer is started, and repeated
> > forever until step B below, right ?
> 
> Right, since the transaction is cyclic in nature, the transaction will continue
> until stopped or switched :)
> 
> > > > DRM DPSUB gets next buffer:
> > > > B.1 Prepare cyclic interleave txn
> > > > B.2 Submit the txn
> > > > B.3 Call terminate for current cyclic txn (we need an updated terminate
> > > > which terminates the current txn, right now we have terminate_all which
> > > > is a sledge hammer approach)
> > > > B.4 Next txn would start once current one is started
> > 
> > Do you mean "once current one is completed" ?
> 
> Yup, sorry for the typo!

No worries, I just wanted to make sure it wasn't a misunderstanding on
my side.

> > > > Does this help and make sense in your case
> > 
> > It does, but I really wonder why we need a new terminate operation that
> > would terminate a single transfer. If we call issue_pending at step B.3,
> > when the new txn submitted, we can terminate the current transfer at the
> > point. It changes the semantics of issue_pending, but only for cyclic
> > transfers (this whole discussions it only about cyclic transfers). As a
> > cyclic transfer will be repeated forever until terminated, there's no
> > use case for issuing a new transfer without terminating the one in
> > progress. I thus don't think we need a new terminate operation: the only
> > thing that makes sense to do when submitting a new cyclic transfer is to
> > terminate the current one and switch to the new one, and we already have
> > all the APIs we need to enable this behaviour.
> 
> The issue_pending() is a NOP when engine is already running.

That's not totally right. issue_pending() still moves submitted but not
issued transactions from the submitted queue to the issued queue. The
DMA engine only considers the issued queue, so issue_pending()
essentially tells the DMA engine to consider the submitted transaction
for processing after the already issued transactions complete (in the
non-cyclic case).

> The design of APIs is that we submit a txn to pending_list and then the
> pending_list is started when issue_pending() is called.
> Or if the engine is already running, it will take next txn from
> pending_list() when current txn completes.
> 
> The only consideration here in this case is that the cyclic txn never
> completes. Do we really treat a new txn submission as an 'indication' of
> completeness? That is indeed a point to ponder upon.

The reason why I think we should is two-fold:

1. I believe it's semantically aligned with the existing behaviour of
issue_pending(). As explained above, the operation tells the DMA engine
to consider submitted transactions for processing when the current (and
other issued) transactions complete. If we extend the definition of
complete to cover cyclic transactions, I think it's a good match.

2. There's really nothing else we could do with cyclic transactions.
They never complete today and have to be terminated manually with
terminate_all(). Using issue_pending() to move to a next cyclic
transaction doesn't change the existing behaviour by replacing a useful
(and used) feature, as issue_pending() is currently a no-op for cyclic
transactions. The newly issued transaction is never considered, and
calling terminate_all() will cancel the issued transactions. By
extending the behaviour of issue_pending(), we're making a new use case
possible, without restricting any other feature, and without "stealing"
issue_pending() and preventing it from implementing another useful
behaviour.

In a nutshell, an important reason why I like using issue_pending() for
this purpose is because it makes cyclic and non-cyclic transactions
behave more similarly, which I think is good from an API consistency
point of view.

> Also, we need to keep in mind that the dmaengine wont stop a cyclic
> txn. It would be running and start next transfer (in this case do
> from start) while it also gives you an interrupt. Here we would be
> required to stop it and then start a new one...

We wouldn't be required to stop it in the middle, the expected behaviour
is for the DMA engine to complete the cyclic transaction until the end
of the cycle and then replace it by the new one. That's exactly what
happens for non-cyclic transactions when you call issue_pending(), which
makes me like this solution.

> Or perhaps remove the cyclic setting from the txn when a new one
> arrives and that behaviour IMO is controller dependent, not sure if
> all controllers support it..

At the very least I would assume controllers to be able to stop a cyclic
transaction forcefully, otherwise terminate_all() could never be
implemented. This may not lead to a gracefully switch from one cyclic
transaction to another one if the hardware doesn't allow doing so. In
that case I think tx_submit() could return an error, or we could turn
issue_pending() into an int operation to signal the error. Note that
there's no need to mass-patch drivers here, if a DMA engine client
issues a second cyclic transaction while one is in progress, the second
transaction won't be considered today. Signalling an error is in my
opinion a useful feature, but not doing so in DMA engine drivers can't
be a regression. We could also add a flag to tell whether this mode of
operation is supported.

> > > That would be a clean way to handle it. We were missing this API for a
> > > long time to be able to cancel the ongoing transfer (whether it is
> > > cyclic or slave_sg, or memcpy) and move to the next one if there is one
> > > pending.
> > 
> > Note that this new terminate API wouldn't terminate the ongoing transfer
> > immediately, it would complete first, until the end of the cycle for
> > cyclic transfers, and until the end of the whole transfer otherwise.
> > This new operation would thus essentially be a no-op for non-cyclic
> > transfers. I don't see how it would help :-) Do you have any particular
> > use case in mind ?
> 
> Yeah that is something more to think about. Do we really abort here or
> wait for the txn to complete. I think Peter needs the former and your
> falls in the latter category

I definitely need the latter, otherwise the display will flicker (or
completely misoperate) every time a new frame is displayed, which isn't
a good idea :-) I'm not sure about Peter's use cases, but it seems to me
that aborting a transaction immediately is racy in most cases, unless
the DMA engine supports byte-level residue reporting. One non-intrusive
option would be to add a flag to signal that a newly issued transaction
should interrupt the current transaction immediately.
Peter Ujfalusi Feb. 17, 2020, 10 a.m. UTC | #19
Hi Laurent, Vinod,

On 14/02/2020 18.22, Laurent Pinchart wrote:
>>> It does, but I really wonder why we need a new terminate operation that
>>> would terminate a single transfer. If we call issue_pending at step B.3,
>>> when the new txn submitted, we can terminate the current transfer at the
>>> point. It changes the semantics of issue_pending, but only for cyclic
>>> transfers (this whole discussions it only about cyclic transfers). As a
>>> cyclic transfer will be repeated forever until terminated, there's no
>>> use case for issuing a new transfer without terminating the one in
>>> progress. I thus don't think we need a new terminate operation: the only
>>> thing that makes sense to do when submitting a new cyclic transfer is to
>>> terminate the current one and switch to the new one, and we already have
>>> all the APIs we need to enable this behaviour.
>>
>> The issue_pending() is a NOP when engine is already running.
> 
> That's not totally right. issue_pending() still moves submitted but not
> issued transactions from the submitted queue to the issued queue. The
> DMA engine only considers the issued queue, so issue_pending()
> essentially tells the DMA engine to consider the submitted transaction
> for processing after the already issued transactions complete (in the
> non-cyclic case).

Vinod's point is for the cyclic case at the current state. It is NOP
essentially as we don't have way to not kill the whole channel.

Just a sidenote: it is not even that clean cut for slave transfers
either as the slave_config must _not_ change between the issued
transfers. Iow, you can not switch between 16bit and 32bit word lengths
with some DMA. EDMA, sDMA can do that, but UDMA can not for example...

>> The design of APIs is that we submit a txn to pending_list and then the
>> pending_list is started when issue_pending() is called.
>> Or if the engine is already running, it will take next txn from
>> pending_list() when current txn completes.
>>
>> The only consideration here in this case is that the cyclic txn never
>> completes. Do we really treat a new txn submission as an 'indication' of
>> completeness? That is indeed a point to ponder upon.
> 
> The reason why I think we should is two-fold:
> 
> 1. I believe it's semantically aligned with the existing behaviour of
> issue_pending(). As explained above, the operation tells the DMA engine
> to consider submitted transactions for processing when the current (and
> other issued) transactions complete. If we extend the definition of
> complete to cover cyclic transactions, I think it's a good match.

We will end up with different behavior between cyclic and non cyclic
transfers and the new behavior should be somehow supported by existing
drivers.
Yes, issue_pending is moving the submitted tx to the issued queue to be
executed on HW when the current transfer finished.
We only needed this for non cyclic uses so far. Some DMA hw can replace
the current transfer with a new one (re-trigger to fetch the new
configuration, like your's), but some can not (none of the system DMAs
on TI platforms can).
If we say that this is the behavior the DMA drivers must follow then we
will have non compliant DMA drivers. You can not move simply to other
DMA or can not create generic DMA code shared by drivers.

> 2. There's really nothing else we could do with cyclic transactions.
> They never complete today and have to be terminated manually with
> terminate_all(). Using issue_pending() to move to a next cyclic
> transaction doesn't change the existing behaviour by replacing a useful
> (and used) feature, as issue_pending() is currently a no-op for cyclic
> transactions. The newly issued transaction is never considered, and
> calling terminate_all() will cancel the issued transactions. By
> extending the behaviour of issue_pending(), we're making a new use case
> possible, without restricting any other feature, and without "stealing"
> issue_pending() and preventing it from implementing another useful
> behaviour.

But at the same time we make existing drivers non compliant...

Imo a new callback to 'kill' / 'terminate' / 'replace' / 'abort' an
issued cookie would be cleaner.

cookie1 = dmaengine_issue_pending();
// will start the transfer
cookie2 = dmaengine_issue_pending();
// cookie1 still runs, cookie2 is waiting to be executed
dmaengine_abort_tx(chan);
// will kill cookie1 and executes cookie2

dmaengine_abort_tx() could take a cookie as parameter if we wish, so you
can say selectively which issued tx you want to remove, if it is the
running one, then stop it and move to the next one.
In place of the cookie parameter a 0 could imply that I don't know the
cookie, but kill the running one.

We would preserve what issue_pending does atm and would give us a
generic flow of how other drivers should handle such cases.

Note that this is not only useful for cyclic cases. Any driver which
currently uses brute-force termination can be upgraded.
Prime example is UART RX. We issue an RX buffer to receive data, but it
is not guarantied that the remote will send data which would fill the
buffer and we hit a timeout waiting. We could issue the next buffer and
kill the stale transfer to reclaim the received data.

I think this can be even implemented for DMAs which can not do the same
thing as your DMA can.

> In a nutshell, an important reason why I like using issue_pending() for
> this purpose is because it makes cyclic and non-cyclic transactions
> behave more similarly, which I think is good from an API consistency
> point of view.
> 
>> Also, we need to keep in mind that the dmaengine wont stop a cyclic
>> txn. It would be running and start next transfer (in this case do
>> from start) while it also gives you an interrupt. Here we would be
>> required to stop it and then start a new one...
> 
> We wouldn't be required to stop it in the middle, the expected behaviour
> is for the DMA engine to complete the cyclic transaction until the end
> of the cycle and then replace it by the new one. That's exactly what
> happens for non-cyclic transactions when you call issue_pending(), which
> makes me like this solution.

Right, so we have two different use cases. Replace the current transfers
with the next issued one and abort the current transfer now and arm the
next issued one.
dmaengine_abort_tx(chan, cookie, forced) ?
forced == false: replace it at cyclic boundary
forced == true: right away (as HW allows), do not wait for cyclic round

>> Or perhaps remove the cyclic setting from the txn when a new one
>> arrives and that behaviour IMO is controller dependent, not sure if
>> all controllers support it..
> 
> At the very least I would assume controllers to be able to stop a cyclic
> transaction forcefully, otherwise terminate_all() could never be
> implemented. This may not lead to a gracefully switch from one cyclic
> transaction to another one if the hardware doesn't allow doing so. In
> that case I think tx_submit() could return an error, or we could turn
> issue_pending() into an int operation to signal the error. Note that
> there's no need to mass-patch drivers here, if a DMA engine client
> issues a second cyclic transaction while one is in progress, the second
> transaction won't be considered today. Signalling an error is in my
> opinion a useful feature, but not doing so in DMA engine drivers can't
> be a regression. We could also add a flag to tell whether this mode of
> operation is supported.

My problems is that it is changing the behavior of issue_pending() for
cyclic. If we document this than all existing DMA drivers are broken
(not complaint with the API documentation) as they don't do this.


>>>> That would be a clean way to handle it. We were missing this API for a
>>>> long time to be able to cancel the ongoing transfer (whether it is
>>>> cyclic or slave_sg, or memcpy) and move to the next one if there is one
>>>> pending.
>>>
>>> Note that this new terminate API wouldn't terminate the ongoing transfer
>>> immediately, it would complete first, until the end of the cycle for
>>> cyclic transfers, and until the end of the whole transfer otherwise.
>>> This new operation would thus essentially be a no-op for non-cyclic
>>> transfers. I don't see how it would help :-) Do you have any particular
>>> use case in mind ?
>>
>> Yeah that is something more to think about. Do we really abort here or
>> wait for the txn to complete. I think Peter needs the former and your
>> falls in the latter category
> 
> I definitely need the latter, otherwise the display will flicker (or
> completely misoperate) every time a new frame is displayed, which isn't
> a good idea :-)

Sure, and it is a great feature.

> I'm not sure about Peter's use cases, but it seems to me
> that aborting a transaction immediately is racy in most cases, unless
> the DMA engine supports byte-level residue reporting.

Sort of yes. With EDMA, sDMA I can just kill the channel and set up a
new one right away.
UDMA on the other hand is not that forgiving... I would need to kill the
channel, wait for the termination to complete, reconfigure the channel
and execute the new transfer.

But with a separate callback API at least there will be an entry point
when this can be initiated and handled.
Fwiw, I think it should be simple to add this functionality to them, the
code is kind of handling it in other parts, but implementing it in the
issue_pending() is not really a clean solution.

In a channel you can run slave_sg transfers followed by cyclic if you
wish. A slave channel is what it is, slave channel which can be capable
to execute slave_sg and/or cyclic (and/or interleaved).
If issue_pending() is to take care then we need to check if the current
transfer is cyclic or not and decide based on that.

With a separate callback we in the DMA driver just need to do what the
client is asking for and no need to think.

> One non-intrusive
> option would be to add a flag to signal that a newly issued transaction
> should interrupt the current transaction immediately.

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Vinod Koul Feb. 19, 2020, 9:25 a.m. UTC | #20
On 17-02-20, 12:00, Peter Ujfalusi wrote:
> Hi Laurent, Vinod,
> 
> On 14/02/2020 18.22, Laurent Pinchart wrote:
> >>> It does, but I really wonder why we need a new terminate operation that
> >>> would terminate a single transfer. If we call issue_pending at step B.3,
> >>> when the new txn submitted, we can terminate the current transfer at the
> >>> point. It changes the semantics of issue_pending, but only for cyclic
> >>> transfers (this whole discussions it only about cyclic transfers). As a
> >>> cyclic transfer will be repeated forever until terminated, there's no
> >>> use case for issuing a new transfer without terminating the one in
> >>> progress. I thus don't think we need a new terminate operation: the only
> >>> thing that makes sense to do when submitting a new cyclic transfer is to
> >>> terminate the current one and switch to the new one, and we already have
> >>> all the APIs we need to enable this behaviour.
> >>
> >> The issue_pending() is a NOP when engine is already running.
> > 
> > That's not totally right. issue_pending() still moves submitted but not
> > issued transactions from the submitted queue to the issued queue. The
> > DMA engine only considers the issued queue, so issue_pending()
> > essentially tells the DMA engine to consider the submitted transaction
> > for processing after the already issued transactions complete (in the
> > non-cyclic case).
> 
> Vinod's point is for the cyclic case at the current state. It is NOP
> essentially as we don't have way to not kill the whole channel.

Or IOW there is no descriptor movement to hardware..

> Just a sidenote: it is not even that clean cut for slave transfers
> either as the slave_config must _not_ change between the issued
> transfers. Iow, you can not switch between 16bit and 32bit word lengths
> with some DMA. EDMA, sDMA can do that, but UDMA can not for example...
> 
> >> The design of APIs is that we submit a txn to pending_list and then the
> >> pending_list is started when issue_pending() is called.
> >> Or if the engine is already running, it will take next txn from
> >> pending_list() when current txn completes.
> >>
> >> The only consideration here in this case is that the cyclic txn never
> >> completes. Do we really treat a new txn submission as an 'indication' of
> >> completeness? That is indeed a point to ponder upon.
> > 
> > The reason why I think we should is two-fold:
> > 
> > 1. I believe it's semantically aligned with the existing behaviour of
> > issue_pending(). As explained above, the operation tells the DMA engine
> > to consider submitted transactions for processing when the current (and
> > other issued) transactions complete. If we extend the definition of
> > complete to cover cyclic transactions, I think it's a good match.
> 
> We will end up with different behavior between cyclic and non cyclic
> transfers and the new behavior should be somehow supported by existing
> drivers.
> Yes, issue_pending is moving the submitted tx to the issued queue to be
> executed on HW when the current transfer finished.
> We only needed this for non cyclic uses so far. Some DMA hw can replace
> the current transfer with a new one (re-trigger to fetch the new
> configuration, like your's), but some can not (none of the system DMAs
> on TI platforms can).
> If we say that this is the behavior the DMA drivers must follow then we
> will have non compliant DMA drivers. You can not move simply to other
> DMA or can not create generic DMA code shared by drivers.

That is very important point for API. We want no implicit behaviour, so
if we want an behaviour let us do that explicitly.

> > 2. There's really nothing else we could do with cyclic transactions.
> > They never complete today and have to be terminated manually with
> > terminate_all(). Using issue_pending() to move to a next cyclic
> > transaction doesn't change the existing behaviour by replacing a useful
> > (and used) feature, as issue_pending() is currently a no-op for cyclic
> > transactions. The newly issued transaction is never considered, and
> > calling terminate_all() will cancel the issued transactions. By
> > extending the behaviour of issue_pending(), we're making a new use case
> > possible, without restricting any other feature, and without "stealing"
> > issue_pending() and preventing it from implementing another useful
> > behaviour.
> 
> But at the same time we make existing drivers non compliant...
> 
> Imo a new callback to 'kill' / 'terminate' / 'replace' / 'abort' an
> issued cookie would be cleaner.
> 
> cookie1 = dmaengine_issue_pending();
> // will start the transfer
> cookie2 = dmaengine_issue_pending();
> // cookie1 still runs, cookie2 is waiting to be executed
> dmaengine_abort_tx(chan);
> // will kill cookie1 and executes cookie2

Right and we need a kill mode which kills the cookie1 at the end of
transfer (conditional to hw supporting that)

I think it should be generic API and usable in both the cyclic and
non-cyclic case

> 
> dmaengine_abort_tx() could take a cookie as parameter if we wish, so you
> can say selectively which issued tx you want to remove, if it is the
> running one, then stop it and move to the next one.
> In place of the cookie parameter a 0 could imply that I don't know the
> cookie, but kill the running one.
> 
> We would preserve what issue_pending does atm and would give us a
> generic flow of how other drivers should handle such cases.
> 
> Note that this is not only useful for cyclic cases. Any driver which
> currently uses brute-force termination can be upgraded.
> Prime example is UART RX. We issue an RX buffer to receive data, but it
> is not guarantied that the remote will send data which would fill the
> buffer and we hit a timeout waiting. We could issue the next buffer and
> kill the stale transfer to reclaim the received data.
> 
> I think this can be even implemented for DMAs which can not do the same
> thing as your DMA can.
> 
> > In a nutshell, an important reason why I like using issue_pending() for
> > this purpose is because it makes cyclic and non-cyclic transactions
> > behave more similarly, which I think is good from an API consistency
> > point of view.
> > 
> >> Also, we need to keep in mind that the dmaengine wont stop a cyclic
> >> txn. It would be running and start next transfer (in this case do
> >> from start) while it also gives you an interrupt. Here we would be
> >> required to stop it and then start a new one...
> > 
> > We wouldn't be required to stop it in the middle, the expected behaviour
> > is for the DMA engine to complete the cyclic transaction until the end
> > of the cycle and then replace it by the new one. That's exactly what
> > happens for non-cyclic transactions when you call issue_pending(), which
> > makes me like this solution.
> 
> Right, so we have two different use cases. Replace the current transfers
> with the next issued one and abort the current transfer now and arm the
> next issued one.
> dmaengine_abort_tx(chan, cookie, forced) ?
> forced == false: replace it at cyclic boundary
> forced == true: right away (as HW allows), do not wait for cyclic round
> 
> >> Or perhaps remove the cyclic setting from the txn when a new one
> >> arrives and that behaviour IMO is controller dependent, not sure if
> >> all controllers support it..
> > 
> > At the very least I would assume controllers to be able to stop a cyclic
> > transaction forcefully, otherwise terminate_all() could never be
> > implemented. This may not lead to a gracefully switch from one cyclic
> > transaction to another one if the hardware doesn't allow doing so. In
> > that case I think tx_submit() could return an error, or we could turn
> > issue_pending() into an int operation to signal the error. Note that
> > there's no need to mass-patch drivers here, if a DMA engine client
> > issues a second cyclic transaction while one is in progress, the second
> > transaction won't be considered today. Signalling an error is in my
> > opinion a useful feature, but not doing so in DMA engine drivers can't
> > be a regression. We could also add a flag to tell whether this mode of
> > operation is supported.
> 
> My problems is that it is changing the behavior of issue_pending() for
> cyclic. If we document this than all existing DMA drivers are broken
> (not complaint with the API documentation) as they don't do this.
> 
> 
> >>>> That would be a clean way to handle it. We were missing this API for a
> >>>> long time to be able to cancel the ongoing transfer (whether it is
> >>>> cyclic or slave_sg, or memcpy) and move to the next one if there is one
> >>>> pending.
> >>>
> >>> Note that this new terminate API wouldn't terminate the ongoing transfer
> >>> immediately, it would complete first, until the end of the cycle for
> >>> cyclic transfers, and until the end of the whole transfer otherwise.
> >>> This new operation would thus essentially be a no-op for non-cyclic
> >>> transfers. I don't see how it would help :-) Do you have any particular
> >>> use case in mind ?
> >>
> >> Yeah that is something more to think about. Do we really abort here or
> >> wait for the txn to complete. I think Peter needs the former and your
> >> falls in the latter category
> > 
> > I definitely need the latter, otherwise the display will flicker (or
> > completely misoperate) every time a new frame is displayed, which isn't
> > a good idea :-)
> 
> Sure, and it is a great feature.
> 
> > I'm not sure about Peter's use cases, but it seems to me
> > that aborting a transaction immediately is racy in most cases, unless
> > the DMA engine supports byte-level residue reporting.
> 
> Sort of yes. With EDMA, sDMA I can just kill the channel and set up a
> new one right away.
> UDMA on the other hand is not that forgiving... I would need to kill the
> channel, wait for the termination to complete, reconfigure the channel
> and execute the new transfer.
> 
> But with a separate callback API at least there will be an entry point
> when this can be initiated and handled.
> Fwiw, I think it should be simple to add this functionality to them, the
> code is kind of handling it in other parts, but implementing it in the
> issue_pending() is not really a clean solution.
> 
> In a channel you can run slave_sg transfers followed by cyclic if you
> wish. A slave channel is what it is, slave channel which can be capable
> to execute slave_sg and/or cyclic (and/or interleaved).
> If issue_pending() is to take care then we need to check if the current
> transfer is cyclic or not and decide based on that.
> 
> With a separate callback we in the DMA driver just need to do what the
> client is asking for and no need to think.
> 
> > One non-intrusive
> > option would be to add a flag to signal that a newly issued transaction
> > should interrupt the current transaction immediately.
> 
> - Péter
> 
> Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
> Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Laurent Pinchart Feb. 26, 2020, 4:24 p.m. UTC | #21
Hi Peter,

On Mon, Feb 17, 2020 at 12:00:02PM +0200, Peter Ujfalusi wrote:
> On 14/02/2020 18.22, Laurent Pinchart wrote:
> >>> It does, but I really wonder why we need a new terminate operation that
> >>> would terminate a single transfer. If we call issue_pending at step B.3,
> >>> when the new txn submitted, we can terminate the current transfer at the
> >>> point. It changes the semantics of issue_pending, but only for cyclic
> >>> transfers (this whole discussions it only about cyclic transfers). As a
> >>> cyclic transfer will be repeated forever until terminated, there's no
> >>> use case for issuing a new transfer without terminating the one in
> >>> progress. I thus don't think we need a new terminate operation: the only
> >>> thing that makes sense to do when submitting a new cyclic transfer is to
> >>> terminate the current one and switch to the new one, and we already have
> >>> all the APIs we need to enable this behaviour.
> >>
> >> The issue_pending() is a NOP when engine is already running.
> > 
> > That's not totally right. issue_pending() still moves submitted but not
> > issued transactions from the submitted queue to the issued queue. The
> > DMA engine only considers the issued queue, so issue_pending()
> > essentially tells the DMA engine to consider the submitted transaction
> > for processing after the already issued transactions complete (in the
> > non-cyclic case).
> 
> Vinod's point is for the cyclic case at the current state. It is NOP
> essentially as we don't have way to not kill the whole channel.

Considering the current implementation of issue_pending(), for cyclic
transfers, that's correct.

My point was that, semantically, and as is implemented today for
non-cyclic transfers, issue_pending() is meant to tell the DMA engine to
consider the submitted transactions for processing after the already
issued transactions complete. For cyclic transactions, .issue_pending
has no defined semantics, and is implemented as a NOP. My proposal is to
extend the existing semantics of issue_pending() as defined for the
non-cyclic transactions to also cover the cyclic transactions. This
won't cause any breakage (the issue_pending() operation being unused for
cyclic transactions, it won't cause any change to existing code), and
will make the API more consistent as the same semantics (moving to the
next submitted transaction when the current one completes) will be
implemented using the same operation.

> Just a sidenote: it is not even that clean cut for slave transfers
> either as the slave_config must _not_ change between the issued
> transfers. Iow, you can not switch between 16bit and 32bit word lengths
> with some DMA. EDMA, sDMA can do that, but UDMA can not for example...

I agree this can be an issue, but I'm not sure how it's related :-) I
believe we need to consider this feature, and specify the API better,
but that's fairly unrelated, isn't it ?

> >> The design of APIs is that we submit a txn to pending_list and then the
> >> pending_list is started when issue_pending() is called.
> >> Or if the engine is already running, it will take next txn from
> >> pending_list() when current txn completes.
> >>
> >> The only consideration here in this case is that the cyclic txn never
> >> completes. Do we really treat a new txn submission as an 'indication' of
> >> completeness? That is indeed a point to ponder upon.
> > 
> > The reason why I think we should is two-fold:
> > 
> > 1. I believe it's semantically aligned with the existing behaviour of
> > issue_pending(). As explained above, the operation tells the DMA engine
> > to consider submitted transactions for processing when the current (and
> > other issued) transactions complete. If we extend the definition of
> > complete to cover cyclic transactions, I think it's a good match.
> 
> We will end up with different behavior between cyclic and non cyclic
> transfers and the new behavior should be somehow supported by existing
> drivers.
> Yes, issue_pending is moving the submitted tx to the issued queue to be
> executed on HW when the current transfer finished.
> We only needed this for non cyclic uses so far. Some DMA hw can replace
> the current transfer with a new one (re-trigger to fetch the new
> configuration, like your's), but some can not (none of the system DMAs
> on TI platforms can).
> If we say that this is the behavior the DMA drivers must follow then we
> will have non compliant DMA drivers. You can not move simply to other
> DMA or can not create generic DMA code shared by drivers.

I think that's a matter of reporting the capabilities of the DMA engine,
and I believe a flag is enough for this. My proposal really gives a
purpose to an API that is unused today (.issue_pending() for cyclic
transfers), and that purpose is semantically coherent with the purpose
of the same function for non-cyclic transfers. I thus believe it brings
the cyclic and non-cyclic cases closer, making their behaviour more
similar, not different.

> > 2. There's really nothing else we could do with cyclic transactions.
> > They never complete today and have to be terminated manually with
> > terminate_all(). Using issue_pending() to move to a next cyclic
> > transaction doesn't change the existing behaviour by replacing a useful
> > (and used) feature, as issue_pending() is currently a no-op for cyclic
> > transactions. The newly issued transaction is never considered, and
> > calling terminate_all() will cancel the issued transactions. By
> > extending the behaviour of issue_pending(), we're making a new use case
> > possible, without restricting any other feature, and without "stealing"
> > issue_pending() and preventing it from implementing another useful
> > behaviour.
> 
> But at the same time we make existing drivers non compliant...

With a flag to report this new feature, that's not a problem.

> Imo a new callback to 'kill' / 'terminate' / 'replace' / 'abort' an
> issued cookie would be cleaner.
> 
> cookie1 = dmaengine_issue_pending();
> // will start the transfer
> cookie2 = dmaengine_issue_pending();
> // cookie1 still runs, cookie2 is waiting to be executed
> dmaengine_abort_tx(chan);
> // will kill cookie1 and executes cookie2
> 
> dmaengine_abort_tx() could take a cookie as parameter if we wish, so you
> can say selectively which issued tx you want to remove, if it is the
> running one, then stop it and move to the next one.
> In place of the cookie parameter a 0 could imply that I don't know the
> cookie, but kill the running one.
> 
> We would preserve what issue_pending does atm and would give us a
> generic flow of how other drivers should handle such cases.
> 
> Note that this is not only useful for cyclic cases. Any driver which
> currently uses brute-force termination can be upgraded.
> Prime example is UART RX. We issue an RX buffer to receive data, but it
> is not guarantied that the remote will send data which would fill the
> buffer and we hit a timeout waiting. We could issue the next buffer and
> kill the stale transfer to reclaim the received data.
> 
> I think this can be even implemented for DMAs which can not do the same
> thing as your DMA can.

But that's a different use case. What I'm after is *not*
killing/aborting a currently running transfer, it's moving to the next
submitted transfer at the next available sync point. I don't want to
abort the transfer in progress immediately, that would kill the display.

I understand that the above can be useful, but I really don't see why
I'd need to implement support for a more complex use case that I have no
need for, and could hardly even test properly, when what I'm after is
fixing what I view as a bug in the existing implementation: we have an
operation, issue_pending(), with a defined purpose, and it happens that
for one of the transfer types that operation doesn't work. I really see
no reason to implement a brand new API in this case.

Note that using issue_pending() as I propose doesn't preclude anyone
(you, or someone else) to implement your above proposal, but please
don't make me do your work :-) This is becoming a case of yak shaving
where I'm asked to fix shortcomings of the DMA engine API when they're
unrelated to my use case.

> > In a nutshell, an important reason why I like using issue_pending() for
> > this purpose is because it makes cyclic and non-cyclic transactions
> > behave more similarly, which I think is good from an API consistency
> > point of view.
> > 
> >> Also, we need to keep in mind that the dmaengine wont stop a cyclic
> >> txn. It would be running and start next transfer (in this case do
> >> from start) while it also gives you an interrupt. Here we would be
> >> required to stop it and then start a new one...
> > 
> > We wouldn't be required to stop it in the middle, the expected behaviour
> > is for the DMA engine to complete the cyclic transaction until the end
> > of the cycle and then replace it by the new one. That's exactly what
> > happens for non-cyclic transactions when you call issue_pending(), which
> > makes me like this solution.
> 
> Right, so we have two different use cases. Replace the current transfers
> with the next issued one and abort the current transfer now and arm the
> next issued one.
> dmaengine_abort_tx(chan, cookie, forced) ?
> forced == false: replace it at cyclic boundary
> forced == true: right away (as HW allows), do not wait for cyclic round

See the above. You're making this more complicated than it should be,
designing an API that contains a small part that could help solving my
problem, and asking me to implement the 90% for free. Not fair :-)

> >> Or perhaps remove the cyclic setting from the txn when a new one
> >> arrives and that behaviour IMO is controller dependent, not sure if
> >> all controllers support it..
> > 
> > At the very least I would assume controllers to be able to stop a cyclic
> > transaction forcefully, otherwise terminate_all() could never be
> > implemented. This may not lead to a gracefully switch from one cyclic
> > transaction to another one if the hardware doesn't allow doing so. In
> > that case I think tx_submit() could return an error, or we could turn
> > issue_pending() into an int operation to signal the error. Note that
> > there's no need to mass-patch drivers here, if a DMA engine client
> > issues a second cyclic transaction while one is in progress, the second
> > transaction won't be considered today. Signalling an error is in my
> > opinion a useful feature, but not doing so in DMA engine drivers can't
> > be a regression. We could also add a flag to tell whether this mode of
> > operation is supported.
> 
> My problems is that it is changing the behavior of issue_pending() for
> cyclic. If we document this than all existing DMA drivers are broken
> (not complaint with the API documentation) as they don't do this.

Again, see above. I argue that it's not a behavioural change as such, as
the current behaviour is unused, because it's implemented as a NOP and
useless. With a simple flag to report if a DMA engine supports replacing
cyclic transfers, we would have a more consistent API as issue_pending()
will operate the same way for *all* types of transfers.

> >>>> That would be a clean way to handle it. We were missing this API for a
> >>>> long time to be able to cancel the ongoing transfer (whether it is
> >>>> cyclic or slave_sg, or memcpy) and move to the next one if there is one
> >>>> pending.
> >>>
> >>> Note that this new terminate API wouldn't terminate the ongoing transfer
> >>> immediately, it would complete first, until the end of the cycle for
> >>> cyclic transfers, and until the end of the whole transfer otherwise.
> >>> This new operation would thus essentially be a no-op for non-cyclic
> >>> transfers. I don't see how it would help :-) Do you have any particular
> >>> use case in mind ?
> >>
> >> Yeah that is something more to think about. Do we really abort here or
> >> wait for the txn to complete. I think Peter needs the former and your
> >> falls in the latter category
> > 
> > I definitely need the latter, otherwise the display will flicker (or
> > completely misoperate) every time a new frame is displayed, which isn't
> > a good idea :-)
> 
> Sure, and it is a great feature.
> 
> > I'm not sure about Peter's use cases, but it seems to me
> > that aborting a transaction immediately is racy in most cases, unless
> > the DMA engine supports byte-level residue reporting.
> 
> Sort of yes. With EDMA, sDMA I can just kill the channel and set up a
> new one right away.
> UDMA on the other hand is not that forgiving... I would need to kill the
> channel, wait for the termination to complete, reconfigure the channel
> and execute the new transfer.
> 
> But with a separate callback API at least there will be an entry point
> when this can be initiated and handled.
> Fwiw, I think it should be simple to add this functionality to them, the
> code is kind of handling it in other parts, but implementing it in the
> issue_pending() is not really a clean solution.
> 
> In a channel you can run slave_sg transfers followed by cyclic if you
> wish. A slave channel is what it is, slave channel which can be capable
> to execute slave_sg and/or cyclic (and/or interleaved).
> If issue_pending() is to take care then we need to check if the current
> transfer is cyclic or not and decide based on that.
> 
> With a separate callback we in the DMA driver just need to do what the
> client is asking for and no need to think.

Let's put it that way: are you volunteering to implement your proposal
(with proper API documentation) in a reasonable time frame, so that I
can try it for my use case ? Otherwise I see no reason to push against
my proposal.

> > One non-intrusive
> > option would be to add a flag to signal that a newly issued transaction
> > should interrupt the current transaction immediately.
Laurent Pinchart Feb. 26, 2020, 4:30 p.m. UTC | #22
Hi Vinod,

On Wed, Feb 19, 2020 at 02:55:14PM +0530, Vinod Koul wrote:
> On 17-02-20, 12:00, Peter Ujfalusi wrote:
> > On 14/02/2020 18.22, Laurent Pinchart wrote:
> >>>> It does, but I really wonder why we need a new terminate operation that
> >>>> would terminate a single transfer. If we call issue_pending at step B.3,
> >>>> when the new txn submitted, we can terminate the current transfer at the
> >>>> point. It changes the semantics of issue_pending, but only for cyclic
> >>>> transfers (this whole discussions it only about cyclic transfers). As a
> >>>> cyclic transfer will be repeated forever until terminated, there's no
> >>>> use case for issuing a new transfer without terminating the one in
> >>>> progress. I thus don't think we need a new terminate operation: the only
> >>>> thing that makes sense to do when submitting a new cyclic transfer is to
> >>>> terminate the current one and switch to the new one, and we already have
> >>>> all the APIs we need to enable this behaviour.
> >>>
> >>> The issue_pending() is a NOP when engine is already running.
> >> 
> >> That's not totally right. issue_pending() still moves submitted but not
> >> issued transactions from the submitted queue to the issued queue. The
> >> DMA engine only considers the issued queue, so issue_pending()
> >> essentially tells the DMA engine to consider the submitted transaction
> >> for processing after the already issued transactions complete (in the
> >> non-cyclic case).
> > 
> > Vinod's point is for the cyclic case at the current state. It is NOP
> > essentially as we don't have way to not kill the whole channel.
> 
> Or IOW there is no descriptor movement to hardware..
> 
> > Just a sidenote: it is not even that clean cut for slave transfers
> > either as the slave_config must _not_ change between the issued
> > transfers. Iow, you can not switch between 16bit and 32bit word lengths
> > with some DMA. EDMA, sDMA can do that, but UDMA can not for example...
> > 
> >>> The design of APIs is that we submit a txn to pending_list and then the
> >>> pending_list is started when issue_pending() is called.
> >>> Or if the engine is already running, it will take next txn from
> >>> pending_list() when current txn completes.
> >>>
> >>> The only consideration here in this case is that the cyclic txn never
> >>> completes. Do we really treat a new txn submission as an 'indication' of
> >>> completeness? That is indeed a point to ponder upon.
> >> 
> >> The reason why I think we should is two-fold:
> >> 
> >> 1. I believe it's semantically aligned with the existing behaviour of
> >> issue_pending(). As explained above, the operation tells the DMA engine
> >> to consider submitted transactions for processing when the current (and
> >> other issued) transactions complete. If we extend the definition of
> >> complete to cover cyclic transactions, I think it's a good match.
> > 
> > We will end up with different behavior between cyclic and non cyclic
> > transfers and the new behavior should be somehow supported by existing
> > drivers.
> > Yes, issue_pending is moving the submitted tx to the issued queue to be
> > executed on HW when the current transfer finished.
> > We only needed this for non cyclic uses so far. Some DMA hw can replace
> > the current transfer with a new one (re-trigger to fetch the new
> > configuration, like your's), but some can not (none of the system DMAs
> > on TI platforms can).
> > If we say that this is the behavior the DMA drivers must follow then we
> > will have non compliant DMA drivers. You can not move simply to other
> > DMA or can not create generic DMA code shared by drivers.
> 
> That is very important point for API. We want no implicit behaviour, so
> if we want an behaviour let us do that explicitly.

As I've just explained in my reply to Peter, there's nothing implicit in
my proposal :-) It's however missing a flag to report if the DMA engine
driver supports this feature, put apart from that, it makes the API
*more* consistent by making issue_pending() cover *all* transfer types
with the *same* semantics.

> >> 2. There's really nothing else we could do with cyclic transactions.
> >> They never complete today and have to be terminated manually with
> >> terminate_all(). Using issue_pending() to move to a next cyclic
> >> transaction doesn't change the existing behaviour by replacing a useful
> >> (and used) feature, as issue_pending() is currently a no-op for cyclic
> >> transactions. The newly issued transaction is never considered, and
> >> calling terminate_all() will cancel the issued transactions. By
> >> extending the behaviour of issue_pending(), we're making a new use case
> >> possible, without restricting any other feature, and without "stealing"
> >> issue_pending() and preventing it from implementing another useful
> >> behaviour.
> > 
> > But at the same time we make existing drivers non compliant...
> > 
> > Imo a new callback to 'kill' / 'terminate' / 'replace' / 'abort' an
> > issued cookie would be cleaner.
> > 
> > cookie1 = dmaengine_issue_pending();
> > // will start the transfer
> > cookie2 = dmaengine_issue_pending();
> > // cookie1 still runs, cookie2 is waiting to be executed
> > dmaengine_abort_tx(chan);
> > // will kill cookie1 and executes cookie2
> 
> Right and we need a kill mode which kills the cookie1 at the end of
> transfer (conditional to hw supporting that)
> 
> I think it should be generic API and usable in both the cyclic and
> non-cyclic case

I have no issue with an API that can abort ongoing transfers without
killing the whole queue of pending transfers, but that's not what I'm
after, it's not my use case. Again, as explained in my reply to Peter,
I'm not looking for a way to abort a transfer immediately, but to move
to the next transfer at the end of the current one. It's very different,
and the DMA engine API already supports this for all transfers but
cyclic transfers. I'd go as far as saying that my proposal is fixing a
bug in the current implementation :-)

> > dmaengine_abort_tx() could take a cookie as parameter if we wish, so you
> > can say selectively which issued tx you want to remove, if it is the
> > running one, then stop it and move to the next one.
> > In place of the cookie parameter a 0 could imply that I don't know the
> > cookie, but kill the running one.
> > 
> > We would preserve what issue_pending does atm and would give us a
> > generic flow of how other drivers should handle such cases.
> > 
> > Note that this is not only useful for cyclic cases. Any driver which
> > currently uses brute-force termination can be upgraded.
> > Prime example is UART RX. We issue an RX buffer to receive data, but it
> > is not guarantied that the remote will send data which would fill the
> > buffer and we hit a timeout waiting. We could issue the next buffer and
> > kill the stale transfer to reclaim the received data.
> > 
> > I think this can be even implemented for DMAs which can not do the same
> > thing as your DMA can.
> > 
> >> In a nutshell, an important reason why I like using issue_pending() for
> >> this purpose is because it makes cyclic and non-cyclic transactions
> >> behave more similarly, which I think is good from an API consistency
> >> point of view.
> >> 
> >>> Also, we need to keep in mind that the dmaengine wont stop a cyclic
> >>> txn. It would be running and start next transfer (in this case do
> >>> from start) while it also gives you an interrupt. Here we would be
> >>> required to stop it and then start a new one...
> >> 
> >> We wouldn't be required to stop it in the middle, the expected behaviour
> >> is for the DMA engine to complete the cyclic transaction until the end
> >> of the cycle and then replace it by the new one. That's exactly what
> >> happens for non-cyclic transactions when you call issue_pending(), which
> >> makes me like this solution.
> > 
> > Right, so we have two different use cases. Replace the current transfers
> > with the next issued one and abort the current transfer now and arm the
> > next issued one.
> > dmaengine_abort_tx(chan, cookie, forced) ?
> > forced == false: replace it at cyclic boundary
> > forced == true: right away (as HW allows), do not wait for cyclic round
> > 
> >>> Or perhaps remove the cyclic setting from the txn when a new one
> >>> arrives and that behaviour IMO is controller dependent, not sure if
> >>> all controllers support it..
> >> 
> >> At the very least I would assume controllers to be able to stop a cyclic
> >> transaction forcefully, otherwise terminate_all() could never be
> >> implemented. This may not lead to a gracefully switch from one cyclic
> >> transaction to another one if the hardware doesn't allow doing so. In
> >> that case I think tx_submit() could return an error, or we could turn
> >> issue_pending() into an int operation to signal the error. Note that
> >> there's no need to mass-patch drivers here, if a DMA engine client
> >> issues a second cyclic transaction while one is in progress, the second
> >> transaction won't be considered today. Signalling an error is in my
> >> opinion a useful feature, but not doing so in DMA engine drivers can't
> >> be a regression. We could also add a flag to tell whether this mode of
> >> operation is supported.
> > 
> > My problems is that it is changing the behavior of issue_pending() for
> > cyclic. If we document this than all existing DMA drivers are broken
> > (not complaint with the API documentation) as they don't do this.
> > 
> > 
> >>>>> That would be a clean way to handle it. We were missing this API for a
> >>>>> long time to be able to cancel the ongoing transfer (whether it is
> >>>>> cyclic or slave_sg, or memcpy) and move to the next one if there is one
> >>>>> pending.
> >>>>
> >>>> Note that this new terminate API wouldn't terminate the ongoing transfer
> >>>> immediately, it would complete first, until the end of the cycle for
> >>>> cyclic transfers, and until the end of the whole transfer otherwise.
> >>>> This new operation would thus essentially be a no-op for non-cyclic
> >>>> transfers. I don't see how it would help :-) Do you have any particular
> >>>> use case in mind ?
> >>>
> >>> Yeah that is something more to think about. Do we really abort here or
> >>> wait for the txn to complete. I think Peter needs the former and your
> >>> falls in the latter category
> >> 
> >> I definitely need the latter, otherwise the display will flicker (or
> >> completely misoperate) every time a new frame is displayed, which isn't
> >> a good idea :-)
> > 
> > Sure, and it is a great feature.
> > 
> >> I'm not sure about Peter's use cases, but it seems to me
> >> that aborting a transaction immediately is racy in most cases, unless
> >> the DMA engine supports byte-level residue reporting.
> > 
> > Sort of yes. With EDMA, sDMA I can just kill the channel and set up a
> > new one right away.
> > UDMA on the other hand is not that forgiving... I would need to kill the
> > channel, wait for the termination to complete, reconfigure the channel
> > and execute the new transfer.
> > 
> > But with a separate callback API at least there will be an entry point
> > when this can be initiated and handled.
> > Fwiw, I think it should be simple to add this functionality to them, the
> > code is kind of handling it in other parts, but implementing it in the
> > issue_pending() is not really a clean solution.
> > 
> > In a channel you can run slave_sg transfers followed by cyclic if you
> > wish. A slave channel is what it is, slave channel which can be capable
> > to execute slave_sg and/or cyclic (and/or interleaved).
> > If issue_pending() is to take care then we need to check if the current
> > transfer is cyclic or not and decide based on that.
> > 
> > With a separate callback we in the DMA driver just need to do what the
> > client is asking for and no need to think.
> > 
> >> One non-intrusive
> >> option would be to add a flag to signal that a newly issued transaction
> >> should interrupt the current transaction immediately.

Patch
diff mbox series

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 03ac4b96117c..4ffb98a47f31 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -981,7 +981,13 @@  int dma_async_device_register(struct dma_device *device)
 			"DMA_INTERLEAVE");
 		return -EIO;
 	}
-
+	if (dma_has_cap(DMA_INTERLEAVE_CYCLIC, device->cap_mask) &&
+	    !device->device_prep_interleaved_cyclic) {
+		dev_err(device->dev,
+			"Device claims capability %s, but op is not defined\n",
+			"DMA_INTERLEAVE_CYCLIC");
+		return -EIO;
+	}
 
 	if (!device->device_tx_status) {
 		dev_err(device->dev, "Device tx_status is not defined\n");
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 8fcdee1c0cf9..e9af3bf835cb 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -61,6 +61,7 @@  enum dma_transaction_type {
 	DMA_SLAVE,
 	DMA_CYCLIC,
 	DMA_INTERLEAVE,
+	DMA_INTERLEAVE_CYCLIC,
 /* last transaction type for creation of the capabilities mask */
 	DMA_TX_TYPE_END,
 };
@@ -701,6 +702,10 @@  struct dma_filter {
  *	The function takes a buffer of size buf_len. The callback function will
  *	be called after period_len bytes have been transferred.
  * @device_prep_interleaved_dma: Transfer expression in a generic way.
+ * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
+ *	This is similar to @device_prep_interleaved_dma, but the transfer is
+ *	repeated until a new transfer is issued. This transfer type is meant
+ *	for display.
  * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address
  * @device_config: Pushes a new configuration to a channel, return 0 or an error
  *	code
@@ -785,6 +790,9 @@  struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
 		struct dma_chan *chan, struct dma_interleaved_template *xt,
 		unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_interleaved_cyclic)(
+		struct dma_chan *chan, struct dma_interleaved_template *xt,
+		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_imm_data)(
 		struct dma_chan *chan, dma_addr_t dst, u64 data,
 		unsigned long flags);
@@ -880,6 +888,16 @@  static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
 	return chan->device->device_prep_interleaved_dma(chan, xt, flags);
 }
 
+static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_cyclic(
+		struct dma_chan *chan, struct dma_interleaved_template *xt,
+		unsigned long flags)
+{
+	if (!chan || !chan->device || !chan->device->device_prep_interleaved_cyclic)
+		return NULL;
+
+	return chan->device->device_prep_interleaved_cyclic(chan, xt, flags);
+}
+
 static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset(
 		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
 		unsigned long flags)