diff mbox series

[v3,2/6] dmaengine: Add interleaved cyclic transaction type

Message ID 20200123022939.9739-3-laurent.pinchart@ideasonboard.com (mailing list archive)
State Changes Requested
Headers show
Series dma: Add Xilinx ZynqMP DPDMA driver | expand

Commit Message

Laurent Pinchart Jan. 23, 2020, 2:29 a.m. UTC
The new interleaved cyclic transaction type combines interleaved and
cycle transactions. It is designed for DMA engines that back display
controllers, where the same 2D frame needs to be output to the display
until a new frame is available.

Suggested-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
---
 drivers/dma/dmaengine.c   |  8 +++++++-
 include/linux/dmaengine.h | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

Comments

Peter Ujfalusi Jan. 23, 2020, 8:03 a.m. UTC | #1
Hi Laurent,

On 23/01/2020 4.29, Laurent Pinchart wrote:
> The new interleaved cyclic transaction type combines interleaved and
> cycle transactions. It is designed for DMA engines that back display
> controllers, where the same 2D frame needs to be output to the display
> until a new frame is available.
> 
> Suggested-by: Vinod Koul <vkoul@kernel.org>
> Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> ---
>  drivers/dma/dmaengine.c   |  8 +++++++-
>  include/linux/dmaengine.h | 18 ++++++++++++++++++
>  2 files changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
> index 03ac4b96117c..4ffb98a47f31 100644
> --- a/drivers/dma/dmaengine.c
> +++ b/drivers/dma/dmaengine.c
> @@ -981,7 +981,13 @@ int dma_async_device_register(struct dma_device *device)
>  			"DMA_INTERLEAVE");
>  		return -EIO;
>  	}
> -
> +	if (dma_has_cap(DMA_INTERLEAVE_CYCLIC, device->cap_mask) &&
> +	    !device->device_prep_interleaved_cyclic) {
> +		dev_err(device->dev,
> +			"Device claims capability %s, but op is not defined\n",
> +			"DMA_INTERLEAVE_CYCLIC");
> +		return -EIO;
> +	}
>  
>  	if (!device->device_tx_status) {
>  		dev_err(device->dev, "Device tx_status is not defined\n");
> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> index 8fcdee1c0cf9..e9af3bf835cb 100644
> --- a/include/linux/dmaengine.h
> +++ b/include/linux/dmaengine.h
> @@ -61,6 +61,7 @@ enum dma_transaction_type {
>  	DMA_SLAVE,
>  	DMA_CYCLIC,
>  	DMA_INTERLEAVE,
> +	DMA_INTERLEAVE_CYCLIC,
>  /* last transaction type for creation of the capabilities mask */
>  	DMA_TX_TYPE_END,
>  };
> @@ -701,6 +702,10 @@ struct dma_filter {
>   *	The function takes a buffer of size buf_len. The callback function will
>   *	be called after period_len bytes have been transferred.
>   * @device_prep_interleaved_dma: Transfer expression in a generic way.
> + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
> + *	This is similar to @device_prep_interleaved_dma, but the transfer is
> + *	repeated until a new transfer is issued. This transfer type is meant
> + *	for display.

I think capture (camera) is another potential beneficiary of this.

So you don't need to terminate the running interleaved_cyclic and start
a new one, but prepare and issue a new one, which would
terminate/replace the currently running cyclic interleaved DMA?

Can you also update the documentation at
Documentation/driver-api/dmaengine/client.rst

One more thing might be good to clarify for the interleaved_cyclic:
What is expected when DMA_PREP_INTERRUPT is set in the flags? The
client's callback is called for each completion of
dma_interleaved_template, right?

- Péter

>   * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address
>   * @device_config: Pushes a new configuration to a channel, return 0 or an error
>   *	code
> @@ -785,6 +790,9 @@ struct dma_device {
>  	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
>  		struct dma_chan *chan, struct dma_interleaved_template *xt,
>  		unsigned long flags);
> +	struct dma_async_tx_descriptor *(*device_prep_interleaved_cyclic)(
> +		struct dma_chan *chan, struct dma_interleaved_template *xt,
> +		unsigned long flags);
>  	struct dma_async_tx_descriptor *(*device_prep_dma_imm_data)(
>  		struct dma_chan *chan, dma_addr_t dst, u64 data,
>  		unsigned long flags);
> @@ -880,6 +888,16 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
>  	return chan->device->device_prep_interleaved_dma(chan, xt, flags);
>  }
>  
> +static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_cyclic(
> +		struct dma_chan *chan, struct dma_interleaved_template *xt,
> +		unsigned long flags)
> +{
> +	if (!chan || !chan->device || !chan->device->device_prep_interleaved_cyclic)
> +		return NULL;
> +
> +	return chan->device->device_prep_interleaved_cyclic(chan, xt, flags);
> +}
> +
>  static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset(
>  		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
>  		unsigned long flags)
> 

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Vinod Koul Jan. 23, 2020, 8:43 a.m. UTC | #2
On 23-01-20, 10:03, Peter Ujfalusi wrote:
> Hi Laurent,
> 
> On 23/01/2020 4.29, Laurent Pinchart wrote:
> > The new interleaved cyclic transaction type combines interleaved and
> > cycle transactions. It is designed for DMA engines that back display
> > controllers, where the same 2D frame needs to be output to the display
> > until a new frame is available.
> > 
> > Suggested-by: Vinod Koul <vkoul@kernel.org>
> > Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> > ---
> >  drivers/dma/dmaengine.c   |  8 +++++++-
> >  include/linux/dmaengine.h | 18 ++++++++++++++++++
> >  2 files changed, 25 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
> > index 03ac4b96117c..4ffb98a47f31 100644
> > --- a/drivers/dma/dmaengine.c
> > +++ b/drivers/dma/dmaengine.c
> > @@ -981,7 +981,13 @@ int dma_async_device_register(struct dma_device *device)
> >  			"DMA_INTERLEAVE");
> >  		return -EIO;
> >  	}
> > -
> > +	if (dma_has_cap(DMA_INTERLEAVE_CYCLIC, device->cap_mask) &&
> > +	    !device->device_prep_interleaved_cyclic) {
> > +		dev_err(device->dev,
> > +			"Device claims capability %s, but op is not defined\n",
> > +			"DMA_INTERLEAVE_CYCLIC");
> > +		return -EIO;
> > +	}
> >  
> >  	if (!device->device_tx_status) {
> >  		dev_err(device->dev, "Device tx_status is not defined\n");
> > diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> > index 8fcdee1c0cf9..e9af3bf835cb 100644
> > --- a/include/linux/dmaengine.h
> > +++ b/include/linux/dmaengine.h
> > @@ -61,6 +61,7 @@ enum dma_transaction_type {
> >  	DMA_SLAVE,
> >  	DMA_CYCLIC,
> >  	DMA_INTERLEAVE,
> > +	DMA_INTERLEAVE_CYCLIC,
> >  /* last transaction type for creation of the capabilities mask */
> >  	DMA_TX_TYPE_END,
> >  };
> > @@ -701,6 +702,10 @@ struct dma_filter {
> >   *	The function takes a buffer of size buf_len. The callback function will
> >   *	be called after period_len bytes have been transferred.
> >   * @device_prep_interleaved_dma: Transfer expression in a generic way.
> > + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
> > + *	This is similar to @device_prep_interleaved_dma, but the transfer is
> > + *	repeated until a new transfer is issued. This transfer type is meant
> > + *	for display.
> 
> I think capture (camera) is another potential beneficiary of this.
> 
> So you don't need to terminate the running interleaved_cyclic and start
> a new one, but prepare and issue a new one, which would
> terminate/replace the currently running cyclic interleaved DMA?

Why not explicitly terminate the transfer and start when a new one is
issued. That can be common usage for audio and display..
Peter Ujfalusi Jan. 23, 2020, 8:51 a.m. UTC | #3
Vinod,

On 23/01/2020 10.43, Vinod Koul wrote:
> On 23-01-20, 10:03, Peter Ujfalusi wrote:
>> Hi Laurent,
>>
>> On 23/01/2020 4.29, Laurent Pinchart wrote:
>>> The new interleaved cyclic transaction type combines interleaved and
>>> cycle transactions. It is designed for DMA engines that back display
>>> controllers, where the same 2D frame needs to be output to the display
>>> until a new frame is available.
>>>
>>> Suggested-by: Vinod Koul <vkoul@kernel.org>
>>> Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
>>> ---
>>>  drivers/dma/dmaengine.c   |  8 +++++++-
>>>  include/linux/dmaengine.h | 18 ++++++++++++++++++
>>>  2 files changed, 25 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
>>> index 03ac4b96117c..4ffb98a47f31 100644
>>> --- a/drivers/dma/dmaengine.c
>>> +++ b/drivers/dma/dmaengine.c
>>> @@ -981,7 +981,13 @@ int dma_async_device_register(struct dma_device *device)
>>>  			"DMA_INTERLEAVE");
>>>  		return -EIO;
>>>  	}
>>> -
>>> +	if (dma_has_cap(DMA_INTERLEAVE_CYCLIC, device->cap_mask) &&
>>> +	    !device->device_prep_interleaved_cyclic) {
>>> +		dev_err(device->dev,
>>> +			"Device claims capability %s, but op is not defined\n",
>>> +			"DMA_INTERLEAVE_CYCLIC");
>>> +		return -EIO;
>>> +	}
>>>  
>>>  	if (!device->device_tx_status) {
>>>  		dev_err(device->dev, "Device tx_status is not defined\n");
>>> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
>>> index 8fcdee1c0cf9..e9af3bf835cb 100644
>>> --- a/include/linux/dmaengine.h
>>> +++ b/include/linux/dmaengine.h
>>> @@ -61,6 +61,7 @@ enum dma_transaction_type {
>>>  	DMA_SLAVE,
>>>  	DMA_CYCLIC,
>>>  	DMA_INTERLEAVE,
>>> +	DMA_INTERLEAVE_CYCLIC,
>>>  /* last transaction type for creation of the capabilities mask */
>>>  	DMA_TX_TYPE_END,
>>>  };
>>> @@ -701,6 +702,10 @@ struct dma_filter {
>>>   *	The function takes a buffer of size buf_len. The callback function will
>>>   *	be called after period_len bytes have been transferred.
>>>   * @device_prep_interleaved_dma: Transfer expression in a generic way.
>>> + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
>>> + *	This is similar to @device_prep_interleaved_dma, but the transfer is
>>> + *	repeated until a new transfer is issued. This transfer type is meant
>>> + *	for display.
>>
>> I think capture (camera) is another potential beneficiary of this.
>>
>> So you don't need to terminate the running interleaved_cyclic and start
>> a new one, but prepare and issue a new one, which would
>> terminate/replace the currently running cyclic interleaved DMA?
> 
> Why not explicitly terminate the transfer and start when a new one is
> issued. That can be common usage for audio and display..

Yes, this is what I'm asking. The cyclic transfer is running and in
order to start the new transfer, the previous should stop. But in cyclic
case it is not going to happen unless it is terminated.

When one would want to have different interleaved transfer the display
(or capture )IP needs to be reconfigured as well. The the would need to
be terminated anyways to avoid interpreting data in a wrong way.

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Laurent Pinchart Jan. 23, 2020, 12:23 p.m. UTC | #4
Hello,

On Thu, Jan 23, 2020 at 10:51:42AM +0200, Peter Ujfalusi wrote:
> On 23/01/2020 10.43, Vinod Koul wrote:
> > On 23-01-20, 10:03, Peter Ujfalusi wrote:
> >> On 23/01/2020 4.29, Laurent Pinchart wrote:
> >>> The new interleaved cyclic transaction type combines interleaved and
> >>> cycle transactions. It is designed for DMA engines that back display
> >>> controllers, where the same 2D frame needs to be output to the display
> >>> until a new frame is available.
> >>>
> >>> Suggested-by: Vinod Koul <vkoul@kernel.org>
> >>> Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> >>> ---
> >>>  drivers/dma/dmaengine.c   |  8 +++++++-
> >>>  include/linux/dmaengine.h | 18 ++++++++++++++++++
> >>>  2 files changed, 25 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
> >>> index 03ac4b96117c..4ffb98a47f31 100644
> >>> --- a/drivers/dma/dmaengine.c
> >>> +++ b/drivers/dma/dmaengine.c
> >>> @@ -981,7 +981,13 @@ int dma_async_device_register(struct dma_device *device)
> >>>  			"DMA_INTERLEAVE");
> >>>  		return -EIO;
> >>>  	}
> >>> -
> >>> +	if (dma_has_cap(DMA_INTERLEAVE_CYCLIC, device->cap_mask) &&
> >>> +	    !device->device_prep_interleaved_cyclic) {
> >>> +		dev_err(device->dev,
> >>> +			"Device claims capability %s, but op is not defined\n",
> >>> +			"DMA_INTERLEAVE_CYCLIC");
> >>> +		return -EIO;
> >>> +	}
> >>>  
> >>>  	if (!device->device_tx_status) {
> >>>  		dev_err(device->dev, "Device tx_status is not defined\n");
> >>> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> >>> index 8fcdee1c0cf9..e9af3bf835cb 100644
> >>> --- a/include/linux/dmaengine.h
> >>> +++ b/include/linux/dmaengine.h
> >>> @@ -61,6 +61,7 @@ enum dma_transaction_type {
> >>>  	DMA_SLAVE,
> >>>  	DMA_CYCLIC,
> >>>  	DMA_INTERLEAVE,
> >>> +	DMA_INTERLEAVE_CYCLIC,
> >>>  /* last transaction type for creation of the capabilities mask */
> >>>  	DMA_TX_TYPE_END,
> >>>  };
> >>> @@ -701,6 +702,10 @@ struct dma_filter {
> >>>   *	The function takes a buffer of size buf_len. The callback function will
> >>>   *	be called after period_len bytes have been transferred.
> >>>   * @device_prep_interleaved_dma: Transfer expression in a generic way.
> >>> + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
> >>> + *	This is similar to @device_prep_interleaved_dma, but the transfer is
> >>> + *	repeated until a new transfer is issued. This transfer type is meant
> >>> + *	for display.
> >>
> >> I think capture (camera) is another potential beneficiary of this.

Possibly, although in the camera case I'd rather have the hardware stop
if there's no more buffer. Requiring a buffer to always be present is
annoying from a userspace point of view. For display it's different, if
userspace doesn't submit a new frame, the same frame should keep being
displayed on the screen.

> >> So you don't need to terminate the running interleaved_cyclic and start
> >> a new one, but prepare and issue a new one, which would
> >> terminate/replace the currently running cyclic interleaved DMA?

Correct.

> > Why not explicitly terminate the transfer and start when a new one is
> > issued. That can be common usage for audio and display..
> 
> Yes, this is what I'm asking. The cyclic transfer is running and in
> order to start the new transfer, the previous should stop. But in cyclic
> case it is not going to happen unless it is terminated.
> 
> When one would want to have different interleaved transfer the display
> (or capture )IP needs to be reconfigured as well. The the would need to
> be terminated anyways to avoid interpreting data in a wrong way.

The use case here is not to switch to a new configuration, but to switch
to a new buffer. If the transfer had to be terminated manually first,
the DMA engine would potentially miss a frame, which is not acceptable.
We need an atomic way to switch to the next transfer.
Vinod Koul Jan. 24, 2020, 6:10 a.m. UTC | #5
Hi Laurent,

On 23-01-20, 14:23, Laurent Pinchart wrote:
> > >>> @@ -701,6 +702,10 @@ struct dma_filter {
> > >>>   *	The function takes a buffer of size buf_len. The callback function will
> > >>>   *	be called after period_len bytes have been transferred.
> > >>>   * @device_prep_interleaved_dma: Transfer expression in a generic way.
> > >>> + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
> > >>> + *	This is similar to @device_prep_interleaved_dma, but the transfer is
> > >>> + *	repeated until a new transfer is issued. This transfer type is meant
> > >>> + *	for display.
> > >>
> > >> I think capture (camera) is another potential beneficiary of this.
> 
> Possibly, although in the camera case I'd rather have the hardware stop
> if there's no more buffer. Requiring a buffer to always be present is
> annoying from a userspace point of view. For display it's different, if
> userspace doesn't submit a new frame, the same frame should keep being
> displayed on the screen.
> 
> > >> So you don't need to terminate the running interleaved_cyclic and start
> > >> a new one, but prepare and issue a new one, which would
> > >> terminate/replace the currently running cyclic interleaved DMA?
> 
> Correct.
> 
> > > Why not explicitly terminate the transfer and start when a new one is
> > > issued. That can be common usage for audio and display..
> > 
> > Yes, this is what I'm asking. The cyclic transfer is running and in
> > order to start the new transfer, the previous should stop. But in cyclic
> > case it is not going to happen unless it is terminated.
> > 
> > When one would want to have different interleaved transfer the display
> > (or capture )IP needs to be reconfigured as well. The the would need to
> > be terminated anyways to avoid interpreting data in a wrong way.
> 
> The use case here is not to switch to a new configuration, but to switch
> to a new buffer. If the transfer had to be terminated manually first,
> the DMA engine would potentially miss a frame, which is not acceptable.
> We need an atomic way to switch to the next transfer.

So in this case you have, let's say a cyclic descriptor with N buffers
and they are cyclically capturing data and providing to client/user..

So why would you like to submit again...? Once whole capture has
completed you would terminate, right...

Sorry not able to wrap my head around why new submission is required and
if that is the case why previous one cant be terminated :)
Peter Ujfalusi Jan. 24, 2020, 7:20 a.m. UTC | #6
Hi Laurent,

On 23/01/2020 14.23, Laurent Pinchart wrote:
>>>> I think capture (camera) is another potential beneficiary of this.
> 
> Possibly, although in the camera case I'd rather have the hardware stop
> if there's no more buffer. Requiring a buffer to always be present is
> annoying from a userspace point of view. For display it's different, if
> userspace doesn't submit a new frame, the same frame should keep being
> displayed on the screen.
> 
>>>> So you don't need to terminate the running interleaved_cyclic and start
>>>> a new one, but prepare and issue a new one, which would
>>>> terminate/replace the currently running cyclic interleaved DMA?
> 
> Correct.
> 
>>> Why not explicitly terminate the transfer and start when a new one is
>>> issued. That can be common usage for audio and display..
>>
>> Yes, this is what I'm asking. The cyclic transfer is running and in
>> order to start the new transfer, the previous should stop. But in cyclic
>> case it is not going to happen unless it is terminated.
>>
>> When one would want to have different interleaved transfer the display
>> (or capture )IP needs to be reconfigured as well. The the would need to
>> be terminated anyways to avoid interpreting data in a wrong way.
> 
> The use case here is not to switch to a new configuration, but to switch
> to a new buffer. If the transfer had to be terminated manually first,
> the DMA engine would potentially miss a frame, which is not acceptable.
> We need an atomic way to switch to the next transfer.

You have a special hardware in hand, most DMAs can not just replace a
cyclic transfer in-flight and it also kind of violates the DMAengine
principles.
If cyclic transfer is started then it is expected to run forever until
it is terminated. Preparing and issuing a new transfer will not get
executed when there is already a cyclic transfer in flight as your only
option is to terminate_all, which will kill the running cyclic _and_
will discard the issued and pending transfers.

So the use case is page flip when you have multiple framebuffers and you
switch them to show the updated one, right?

There are things missing in DMAengine in API level for sure to do this,
imho.
The issue is that cyclic transfers will never complete, they run until
terminated, but you want to replace the currently executing one with a
another cyclic transfer without actually terminating the other.

It is like pause the 1st cyclic and continue with the 2nd one. Then at
some point you pause the 2nd one and restart the 1st one.
It is also crucial that the pause /switch happens when the executing one
finished the interleaved round and not in the middle somewhere, right?

If you:
desc_1 = dmaengine_prep_interleaved_cyclic(chan, );
cookie_1 = dmaengine_submit(desc_1);
desc_2 = dmaengine_prep_interleaved_cyclic(chan, );
cookie_2 = dmaengine_submit(desc_1);

/* cookie_1/desc_1 is started */
dma_async_issue_pending(chan);

/* When need to switch to cookie_2 */
dmaengine_cyclic_set_active_cookie(chan, cookie_2);
/*
 * cookie_1 execution is suspended after it finished the running
 * dma_interleaved_template or buffer in normal cyclic and cookie_2
 * is replacing it.
 */

/* Switch back to cookie_1 */
dmaengine_cyclic_set_active_cookie(chan, cookie_1);
/*
 * cookie_2 execution is suspended after it finished the running
 * dma_interleaved_template or buffer in normal cyclic and cookie_1
 * is replacing it.
 */

There should be a (yet another) capabilities flag got
cyclic_set_active_cookie and the documentation should be strict on what
is the expected behavior.

You can kill everything with terminate_all.
There is another thing which is missing imho from DMAengine: to
terminate a specific cookie, not the entire channel, which might be a
good addition as you might spawn framebuffers and then delete them and
you might want to release the corresponding cookie/descriptor as well.

What do you think?

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Peter Ujfalusi Jan. 24, 2020, 7:38 a.m. UTC | #7
On 24/01/2020 9.20, Peter Ujfalusi wrote:
> Hi Laurent,
> 
> On 23/01/2020 14.23, Laurent Pinchart wrote:
>>>>> I think capture (camera) is another potential beneficiary of this.
>>
>> Possibly, although in the camera case I'd rather have the hardware stop
>> if there's no more buffer. Requiring a buffer to always be present is
>> annoying from a userspace point of view. For display it's different, if
>> userspace doesn't submit a new frame, the same frame should keep being
>> displayed on the screen.
>>
>>>>> So you don't need to terminate the running interleaved_cyclic and start
>>>>> a new one, but prepare and issue a new one, which would
>>>>> terminate/replace the currently running cyclic interleaved DMA?
>>
>> Correct.
>>
>>>> Why not explicitly terminate the transfer and start when a new one is
>>>> issued. That can be common usage for audio and display..
>>>
>>> Yes, this is what I'm asking. The cyclic transfer is running and in
>>> order to start the new transfer, the previous should stop. But in cyclic
>>> case it is not going to happen unless it is terminated.
>>>
>>> When one would want to have different interleaved transfer the display
>>> (or capture )IP needs to be reconfigured as well. The the would need to
>>> be terminated anyways to avoid interpreting data in a wrong way.
>>
>> The use case here is not to switch to a new configuration, but to switch
>> to a new buffer. If the transfer had to be terminated manually first,
>> the DMA engine would potentially miss a frame, which is not acceptable.
>> We need an atomic way to switch to the next transfer.
> 
> You have a special hardware in hand, most DMAs can not just replace a
> cyclic transfer in-flight and it also kind of violates the DMAengine
> principles.

Is there any specific reason why you need DMAengine driver for a display
DMA? Usually the drm drivers handle their DMA internally.

> If cyclic transfer is started then it is expected to run forever until
> it is terminated. Preparing and issuing a new transfer will not get
> executed when there is already a cyclic transfer in flight as your only
> option is to terminate_all, which will kill the running cyclic _and_
> will discard the issued and pending transfers.
> 
> So the use case is page flip when you have multiple framebuffers and you
> switch them to show the updated one, right?
> 
> There are things missing in DMAengine in API level for sure to do this,
> imho.
> The issue is that cyclic transfers will never complete, they run until
> terminated, but you want to replace the currently executing one with a
> another cyclic transfer without actually terminating the other.
> 
> It is like pause the 1st cyclic and continue with the 2nd one. Then at
> some point you pause the 2nd one and restart the 1st one.
> It is also crucial that the pause /switch happens when the executing one
> finished the interleaved round and not in the middle somewhere, right?
> 
> If you:
> desc_1 = dmaengine_prep_interleaved_cyclic(chan, );
> cookie_1 = dmaengine_submit(desc_1);
> desc_2 = dmaengine_prep_interleaved_cyclic(chan, );
> cookie_2 = dmaengine_submit(desc_1);
> 
> /* cookie_1/desc_1 is started */
> dma_async_issue_pending(chan);
> 
> /* When need to switch to cookie_2 */
> dmaengine_cyclic_set_active_cookie(chan, cookie_2);
> /*
>  * cookie_1 execution is suspended after it finished the running
>  * dma_interleaved_template or buffer in normal cyclic and cookie_2
>  * is replacing it.
>  */
> 
> /* Switch back to cookie_1 */
> dmaengine_cyclic_set_active_cookie(chan, cookie_1);
> /*
>  * cookie_2 execution is suspended after it finished the running
>  * dma_interleaved_template or buffer in normal cyclic and cookie_1
>  * is replacing it.
>  */
> 
> There should be a (yet another) capabilities flag got
> cyclic_set_active_cookie and the documentation should be strict on what
> is the expected behavior.
> 
> You can kill everything with terminate_all.
> There is another thing which is missing imho from DMAengine: to
> terminate a specific cookie, not the entire channel, which might be a
> good addition as you might spawn framebuffers and then delete them and
> you might want to release the corresponding cookie/descriptor as well.

This is a bit trickier as DMAengine's cookie is s32 and internally
treated as a running number and cookie status is checked against s32
numbers with < >, I think this will not like when someone kills a cookie
in the middle.

> 
> What do you think?
> 
> - Péter
> 
> Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
> Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
> 

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Laurent Pinchart Jan. 24, 2020, 8:50 a.m. UTC | #8
On Fri, Jan 24, 2020 at 11:40:47AM +0530, Vinod Koul wrote:
> On 23-01-20, 14:23, Laurent Pinchart wrote:
> > > >>> @@ -701,6 +702,10 @@ struct dma_filter {
> > > >>>   *	The function takes a buffer of size buf_len. The callback function will
> > > >>>   *	be called after period_len bytes have been transferred.
> > > >>>   * @device_prep_interleaved_dma: Transfer expression in a generic way.
> > > >>> + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
> > > >>> + *	This is similar to @device_prep_interleaved_dma, but the transfer is
> > > >>> + *	repeated until a new transfer is issued. This transfer type is meant
> > > >>> + *	for display.
> > > >>
> > > >> I think capture (camera) is another potential beneficiary of this.
> > 
> > Possibly, although in the camera case I'd rather have the hardware stop
> > if there's no more buffer. Requiring a buffer to always be present is
> > annoying from a userspace point of view. For display it's different, if
> > userspace doesn't submit a new frame, the same frame should keep being
> > displayed on the screen.
> > 
> > > >> So you don't need to terminate the running interleaved_cyclic and start
> > > >> a new one, but prepare and issue a new one, which would
> > > >> terminate/replace the currently running cyclic interleaved DMA?
> > 
> > Correct.
> > 
> > > > Why not explicitly terminate the transfer and start when a new one is
> > > > issued. That can be common usage for audio and display..
> > > 
> > > Yes, this is what I'm asking. The cyclic transfer is running and in
> > > order to start the new transfer, the previous should stop. But in cyclic
> > > case it is not going to happen unless it is terminated.
> > > 
> > > When one would want to have different interleaved transfer the display
> > > (or capture )IP needs to be reconfigured as well. The the would need to
> > > be terminated anyways to avoid interpreting data in a wrong way.
> > 
> > The use case here is not to switch to a new configuration, but to switch
> > to a new buffer. If the transfer had to be terminated manually first,
> > the DMA engine would potentially miss a frame, which is not acceptable.
> > We need an atomic way to switch to the next transfer.
> 
> So in this case you have, let's say a cyclic descriptor with N buffers
> and they are cyclically capturing data and providing to client/user..

For the display case it's cyclic over a single buffer that is repeatedly
displayed over and over again until a new one replaces it, when
userspace wants to change the content on the screen. Userspace only has
to provide a new buffer when content changes, otherwise the display has
to keep displaying the same one.

For cameras I don't think cyclic makes too much sense, except when the
DMA engine can't work in single-shot mode and always requires a buffer
to write into. That shouldn't be the norm.

> So why would you like to submit again...? Once whole capture has
> completed you would terminate, right...
> 
> Sorry not able to wrap my head around why new submission is required and
> if that is the case why previous one cant be terminated :)
Laurent Pinchart Jan. 24, 2020, 8:56 a.m. UTC | #9
Hi Peter,

On Fri, Jan 24, 2020 at 09:20:15AM +0200, Peter Ujfalusi wrote:
> On 23/01/2020 14.23, Laurent Pinchart wrote:
> >>>> I think capture (camera) is another potential beneficiary of this.
> > 
> > Possibly, although in the camera case I'd rather have the hardware stop
> > if there's no more buffer. Requiring a buffer to always be present is
> > annoying from a userspace point of view. For display it's different, if
> > userspace doesn't submit a new frame, the same frame should keep being
> > displayed on the screen.
> > 
> >>>> So you don't need to terminate the running interleaved_cyclic and start
> >>>> a new one, but prepare and issue a new one, which would
> >>>> terminate/replace the currently running cyclic interleaved DMA?
> > 
> > Correct.
> > 
> >>> Why not explicitly terminate the transfer and start when a new one is
> >>> issued. That can be common usage for audio and display..
> >>
> >> Yes, this is what I'm asking. The cyclic transfer is running and in
> >> order to start the new transfer, the previous should stop. But in cyclic
> >> case it is not going to happen unless it is terminated.
> >>
> >> When one would want to have different interleaved transfer the display
> >> (or capture )IP needs to be reconfigured as well. The the would need to
> >> be terminated anyways to avoid interpreting data in a wrong way.
> > 
> > The use case here is not to switch to a new configuration, but to switch
> > to a new buffer. If the transfer had to be terminated manually first,
> > the DMA engine would potentially miss a frame, which is not acceptable.
> > We need an atomic way to switch to the next transfer.
> 
> You have a special hardware in hand, most DMAs can not just replace a
> cyclic transfer in-flight and it also kind of violates the DMAengine
> principles.

That's why cyclic support is optional :-)

> If cyclic transfer is started then it is expected to run forever until
> it is terminated. Preparing and issuing a new transfer will not get
> executed when there is already a cyclic transfer in flight as your only
> option is to terminate_all, which will kill the running cyclic _and_
> will discard the issued and pending transfers.

For the existing cyclic API, I could agree with that, although there's
very little documentation in the dmaengine subsystem to be used as an
authoritative source of information :-(

> So the use case is page flip when you have multiple framebuffers and you
> switch them to show the updated one, right?

Correct.

> There are things missing in DMAengine in API level for sure to do this,
> imho.
> The issue is that cyclic transfers will never complete, they run until
> terminated, but you want to replace the currently executing one with a
> another cyclic transfer without actually terminating the other.

Correct.

> It is like pause the 1st cyclic and continue with the 2nd one. Then at
> some point you pause the 2nd one and restart the 1st one.

No, after the 2nd one comes the 3rd one. It's not a double-buffering
case, it's really about replacing the buffer with another one,
regardless of where it comes from. Userspace may double-buffer, or
triple, or more.

> It is also crucial that the pause /switch happens when the executing one
> finished the interleaved round and not in the middle somewhere, right?

Yes. But that's not specific to this use case, with all non-cyclic
transfers submitting a new transfer request doesn't stop the ongoing
transfer (if any) immediately, it just queues the new transfer for
processing.

> If you:
> desc_1 = dmaengine_prep_interleaved_cyclic(chan, );
> cookie_1 = dmaengine_submit(desc_1);
> desc_2 = dmaengine_prep_interleaved_cyclic(chan, );
> cookie_2 = dmaengine_submit(desc_1);
> 
> /* cookie_1/desc_1 is started */
> dma_async_issue_pending(chan);
> 
> /* When need to switch to cookie_2 */
> dmaengine_cyclic_set_active_cookie(chan, cookie_2);
> /*
>  * cookie_1 execution is suspended after it finished the running
>  * dma_interleaved_template or buffer in normal cyclic and cookie_2
>  * is replacing it.
>  */
> 
> /* Switch back to cookie_1 */
> dmaengine_cyclic_set_active_cookie(chan, cookie_1);
> /*
>  * cookie_2 execution is suspended after it finished the running
>  * dma_interleaved_template or buffer in normal cyclic and cookie_1
>  * is replacing it.
>  */

As explained above, I don't want to switch back to a previous transfer,
I always want a new one. I don't see why we would need this kind of API
when we can just define that any queued interleaved transfer, whether
cyclic or not, is just queued and replaces the ongoing transfer at the
next frame boundary. Drivers don't have to implement the new API if the
hardware doesn't possess this capability.

> There should be a (yet another) capabilities flag got
> cyclic_set_active_cookie and the documentation should be strict on what
> is the expected behavior.
> 
> You can kill everything with terminate_all.
> There is another thing which is missing imho from DMAengine: to
> terminate a specific cookie, not the entire channel, which might be a
> good addition as you might spawn framebuffers and then delete them and
> you might want to release the corresponding cookie/descriptor as well.
> 
> What do you think?

I think it's overcomplicated for this use case :-)
Laurent Pinchart Jan. 24, 2020, 8:58 a.m. UTC | #10
Hi Peter,

On Fri, Jan 24, 2020 at 09:38:50AM +0200, Peter Ujfalusi wrote:
> On 24/01/2020 9.20, Peter Ujfalusi wrote:
> > On 23/01/2020 14.23, Laurent Pinchart wrote:
> >>>>> I think capture (camera) is another potential beneficiary of this.
> >>
> >> Possibly, although in the camera case I'd rather have the hardware stop
> >> if there's no more buffer. Requiring a buffer to always be present is
> >> annoying from a userspace point of view. For display it's different, if
> >> userspace doesn't submit a new frame, the same frame should keep being
> >> displayed on the screen.
> >>
> >>>>> So you don't need to terminate the running interleaved_cyclic and start
> >>>>> a new one, but prepare and issue a new one, which would
> >>>>> terminate/replace the currently running cyclic interleaved DMA?
> >>
> >> Correct.
> >>
> >>>> Why not explicitly terminate the transfer and start when a new one is
> >>>> issued. That can be common usage for audio and display..
> >>>
> >>> Yes, this is what I'm asking. The cyclic transfer is running and in
> >>> order to start the new transfer, the previous should stop. But in cyclic
> >>> case it is not going to happen unless it is terminated.
> >>>
> >>> When one would want to have different interleaved transfer the display
> >>> (or capture )IP needs to be reconfigured as well. The the would need to
> >>> be terminated anyways to avoid interpreting data in a wrong way.
> >>
> >> The use case here is not to switch to a new configuration, but to switch
> >> to a new buffer. If the transfer had to be terminated manually first,
> >> the DMA engine would potentially miss a frame, which is not acceptable.
> >> We need an atomic way to switch to the next transfer.
> > 
> > You have a special hardware in hand, most DMAs can not just replace a
> > cyclic transfer in-flight and it also kind of violates the DMAengine
> > principles.
> 
> Is there any specific reason why you need DMAengine driver for a display
> DMA? Usually the drm drivers handle their DMA internally.

Because it's a separate IP core that can be reused in different FPGAs
for different purposes. It happens that in my case it's a hard IP
connected to a display controller, but it could be used for non-cyclic
use cases in a different chip.

> > If cyclic transfer is started then it is expected to run forever until
> > it is terminated. Preparing and issuing a new transfer will not get
> > executed when there is already a cyclic transfer in flight as your only
> > option is to terminate_all, which will kill the running cyclic _and_
> > will discard the issued and pending transfers.
> > 
> > So the use case is page flip when you have multiple framebuffers and you
> > switch them to show the updated one, right?
> > 
> > There are things missing in DMAengine in API level for sure to do this,
> > imho.
> > The issue is that cyclic transfers will never complete, they run until
> > terminated, but you want to replace the currently executing one with a
> > another cyclic transfer without actually terminating the other.
> > 
> > It is like pause the 1st cyclic and continue with the 2nd one. Then at
> > some point you pause the 2nd one and restart the 1st one.
> > It is also crucial that the pause /switch happens when the executing one
> > finished the interleaved round and not in the middle somewhere, right?
> > 
> > If you:
> > desc_1 = dmaengine_prep_interleaved_cyclic(chan, );
> > cookie_1 = dmaengine_submit(desc_1);
> > desc_2 = dmaengine_prep_interleaved_cyclic(chan, );
> > cookie_2 = dmaengine_submit(desc_1);
> > 
> > /* cookie_1/desc_1 is started */
> > dma_async_issue_pending(chan);
> > 
> > /* When need to switch to cookie_2 */
> > dmaengine_cyclic_set_active_cookie(chan, cookie_2);
> > /*
> >  * cookie_1 execution is suspended after it finished the running
> >  * dma_interleaved_template or buffer in normal cyclic and cookie_2
> >  * is replacing it.
> >  */
> > 
> > /* Switch back to cookie_1 */
> > dmaengine_cyclic_set_active_cookie(chan, cookie_1);
> > /*
> >  * cookie_2 execution is suspended after it finished the running
> >  * dma_interleaved_template or buffer in normal cyclic and cookie_1
> >  * is replacing it.
> >  */
> > 
> > There should be a (yet another) capabilities flag got
> > cyclic_set_active_cookie and the documentation should be strict on what
> > is the expected behavior.
> > 
> > You can kill everything with terminate_all.
> > There is another thing which is missing imho from DMAengine: to
> > terminate a specific cookie, not the entire channel, which might be a
> > good addition as you might spawn framebuffers and then delete them and
> > you might want to release the corresponding cookie/descriptor as well.
> 
> This is a bit trickier as DMAengine's cookie is s32 and internally
> treated as a running number and cookie status is checked against s32
> numbers with < >, I think this will not like when someone kills a cookie
> in the middle.

I would require a major redesign, yes. Not looking forward to that,
especially as I think we don't need it.

> > What do you think?
Laurent Pinchart Feb. 10, 2020, 2:06 p.m. UTC | #11
Hi Vinod,

On Fri, Jan 24, 2020 at 10:50:51AM +0200, Laurent Pinchart wrote:
> On Fri, Jan 24, 2020 at 11:40:47AM +0530, Vinod Koul wrote:
> > On 23-01-20, 14:23, Laurent Pinchart wrote:
> >>>>>> @@ -701,6 +702,10 @@ struct dma_filter {
> >>>>>>   *	The function takes a buffer of size buf_len. The callback function will
> >>>>>>   *	be called after period_len bytes have been transferred.
> >>>>>>   * @device_prep_interleaved_dma: Transfer expression in a generic way.
> >>>>>> + * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
> >>>>>> + *	This is similar to @device_prep_interleaved_dma, but the transfer is
> >>>>>> + *	repeated until a new transfer is issued. This transfer type is meant
> >>>>>> + *	for display.
> >>>>>
> >>>>> I think capture (camera) is another potential beneficiary of this.
> >> 
> >> Possibly, although in the camera case I'd rather have the hardware stop
> >> if there's no more buffer. Requiring a buffer to always be present is
> >> annoying from a userspace point of view. For display it's different, if
> >> userspace doesn't submit a new frame, the same frame should keep being
> >> displayed on the screen.
> >> 
> >>>>> So you don't need to terminate the running interleaved_cyclic and start
> >>>>> a new one, but prepare and issue a new one, which would
> >>>>> terminate/replace the currently running cyclic interleaved DMA?
> >> 
> >> Correct.
> >> 
> >>>> Why not explicitly terminate the transfer and start when a new one is
> >>>> issued. That can be common usage for audio and display..
> >>> 
> >>> Yes, this is what I'm asking. The cyclic transfer is running and in
> >>> order to start the new transfer, the previous should stop. But in cyclic
> >>> case it is not going to happen unless it is terminated.
> >>> 
> >>> When one would want to have different interleaved transfer the display
> >>> (or capture )IP needs to be reconfigured as well. The the would need to
> >>> be terminated anyways to avoid interpreting data in a wrong way.
> >> 
> >> The use case here is not to switch to a new configuration, but to switch
> >> to a new buffer. If the transfer had to be terminated manually first,
> >> the DMA engine would potentially miss a frame, which is not acceptable.
> >> We need an atomic way to switch to the next transfer.
> > 
> > So in this case you have, let's say a cyclic descriptor with N buffers
> > and they are cyclically capturing data and providing to client/user..
> 
> For the display case it's cyclic over a single buffer that is repeatedly
> displayed over and over again until a new one replaces it, when
> userspace wants to change the content on the screen. Userspace only has
> to provide a new buffer when content changes, otherwise the display has
> to keep displaying the same one.

Is the use case clear enough, or do you need more information ? Are you
fine with the API for this kind of use case ?

> For cameras I don't think cyclic makes too much sense, except when the
> DMA engine can't work in single-shot mode and always requires a buffer
> to write into. That shouldn't be the norm.
> 
> > So why would you like to submit again...? Once whole capture has
> > completed you would terminate, right...
> > 
> > Sorry not able to wrap my head around why new submission is required and
> > if that is the case why previous one cant be terminated :)
Vinod Koul Feb. 13, 2020, 1:29 p.m. UTC | #12
Hi Laurent,

On 10-02-20, 16:06, Laurent Pinchart wrote:

> > >> The use case here is not to switch to a new configuration, but to switch
> > >> to a new buffer. If the transfer had to be terminated manually first,
> > >> the DMA engine would potentially miss a frame, which is not acceptable.
> > >> We need an atomic way to switch to the next transfer.
> > > 
> > > So in this case you have, let's say a cyclic descriptor with N buffers
> > > and they are cyclically capturing data and providing to client/user..
> > 
> > For the display case it's cyclic over a single buffer that is repeatedly
> > displayed over and over again until a new one replaces it, when
> > userspace wants to change the content on the screen. Userspace only has
> > to provide a new buffer when content changes, otherwise the display has
> > to keep displaying the same one.
> 
> Is the use case clear enough, or do you need more information ? Are you
> fine with the API for this kind of use case ?

So we *know* when a new buffer is being used?

IOW would it be possible for display (rather a dmaengine facing display wrapper) to detect that we are reusing an
old buffer and keep the cyclic and once detected prepare a new
descriptor, submit a new one and then terminate old one which should
trigger next transaction to be submitted

Would that make sense here?
Laurent Pinchart Feb. 13, 2020, 1:48 p.m. UTC | #13
Hi Vinod,

On Thu, Feb 13, 2020 at 06:59:38PM +0530, Vinod Koul wrote:
> On 10-02-20, 16:06, Laurent Pinchart wrote:
> 
> > > >> The use case here is not to switch to a new configuration, but to switch
> > > >> to a new buffer. If the transfer had to be terminated manually first,
> > > >> the DMA engine would potentially miss a frame, which is not acceptable.
> > > >> We need an atomic way to switch to the next transfer.
> > > > 
> > > > So in this case you have, let's say a cyclic descriptor with N buffers
> > > > and they are cyclically capturing data and providing to client/user..
> > > 
> > > For the display case it's cyclic over a single buffer that is repeatedly
> > > displayed over and over again until a new one replaces it, when
> > > userspace wants to change the content on the screen. Userspace only has
> > > to provide a new buffer when content changes, otherwise the display has
> > > to keep displaying the same one.
> > 
> > Is the use case clear enough, or do you need more information ? Are you
> > fine with the API for this kind of use case ?
> 
> So we *know* when a new buffer is being used?

The user of the DMA engine (the DRM DPSUB driver in this case) knows
when a new buffer needs to be used, as it receives it from userspace. In
response, it prepares a new interleaved cyclic transaction and queues
it. At the next IRQ, the DMA engine driver switches to the new
transaction (the implementation is slightly more complex to handle race
conditions, but that's the idea).

> IOW would it be possible for display (rather a dmaengine facing
> display wrapper) to detect that we are reusing an old buffer and keep
> the cyclic and once detected prepare a new descriptor, submit a new
> one and then terminate old one which should trigger next transaction
> to be submitted

I'm not sure to follow you. Do you mean that the display driver should
submit a non-cyclic transaction for every frame, reusing the same buffer
for every transaction, until a new buffer is available ? The issue with
this is that if the CPU load gets high, we may miss a frame, and the
display will break. The DPDMA hardware implements cyclic support for
this reason, and we want to use that feature to comply with the real
time requirements.

If you meant something else, could you please elaborate ?

> Would that make sense here?
Vinod Koul Feb. 13, 2020, 2:07 p.m. UTC | #14
On 13-02-20, 15:48, Laurent Pinchart wrote:
> Hi Vinod,
> 
> On Thu, Feb 13, 2020 at 06:59:38PM +0530, Vinod Koul wrote:
> > On 10-02-20, 16:06, Laurent Pinchart wrote:
> > 
> > > > >> The use case here is not to switch to a new configuration, but to switch
> > > > >> to a new buffer. If the transfer had to be terminated manually first,
> > > > >> the DMA engine would potentially miss a frame, which is not acceptable.
> > > > >> We need an atomic way to switch to the next transfer.
> > > > > 
> > > > > So in this case you have, let's say a cyclic descriptor with N buffers
> > > > > and they are cyclically capturing data and providing to client/user..
> > > > 
> > > > For the display case it's cyclic over a single buffer that is repeatedly
> > > > displayed over and over again until a new one replaces it, when
> > > > userspace wants to change the content on the screen. Userspace only has
> > > > to provide a new buffer when content changes, otherwise the display has
> > > > to keep displaying the same one.
> > > 
> > > Is the use case clear enough, or do you need more information ? Are you
> > > fine with the API for this kind of use case ?
> > 
> > So we *know* when a new buffer is being used?
> 
> The user of the DMA engine (the DRM DPSUB driver in this case) knows
> when a new buffer needs to be used, as it receives it from userspace. In
> response, it prepares a new interleaved cyclic transaction and queues
> it. At the next IRQ, the DMA engine driver switches to the new
> transaction (the implementation is slightly more complex to handle race
> conditions, but that's the idea).
> 
> > IOW would it be possible for display (rather a dmaengine facing
> > display wrapper) to detect that we are reusing an old buffer and keep
> > the cyclic and once detected prepare a new descriptor, submit a new
> > one and then terminate old one which should trigger next transaction
> > to be submitted
> 
> I'm not sure to follow you. Do you mean that the display driver should
> submit a non-cyclic transaction for every frame, reusing the same buffer
> for every transaction, until a new buffer is available ? The issue with
> this is that if the CPU load gets high, we may miss a frame, and the
> display will break. The DPDMA hardware implements cyclic support for
> this reason, and we want to use that feature to comply with the real
> time requirements.

Sorry to cause confusion :) I mean cyclic

So, DRM DPSUB get first buffer
A.1 Prepare cyclic interleave txn
A.2 Submit the txn (it doesn't start here)
A.3 Invoke issue_pending (that starts the txn)

DRM DPSUB gets next buffer:
B.1 Prepare cyclic interleave txn
B.2 Submit the txn
B.3 Call terminate for current cyclic txn (we need an updated terminate
which terminates the current txn, right now we have terminate_all which
is a sledge hammer approach)
B.4 Next txn would start once current one is started

Does this help and make sense in your case

Thanks
Peter Ujfalusi Feb. 13, 2020, 2:15 p.m. UTC | #15
Hi Vinod, Laurent,

On 13/02/2020 16.07, Vinod Koul wrote:
> On 13-02-20, 15:48, Laurent Pinchart wrote:
>> Hi Vinod,
>>
>> On Thu, Feb 13, 2020 at 06:59:38PM +0530, Vinod Koul wrote:
>>> On 10-02-20, 16:06, Laurent Pinchart wrote:
>>>
>>>>>>> The use case here is not to switch to a new configuration, but to switch
>>>>>>> to a new buffer. If the transfer had to be terminated manually first,
>>>>>>> the DMA engine would potentially miss a frame, which is not acceptable.
>>>>>>> We need an atomic way to switch to the next transfer.
>>>>>>
>>>>>> So in this case you have, let's say a cyclic descriptor with N buffers
>>>>>> and they are cyclically capturing data and providing to client/user..
>>>>>
>>>>> For the display case it's cyclic over a single buffer that is repeatedly
>>>>> displayed over and over again until a new one replaces it, when
>>>>> userspace wants to change the content on the screen. Userspace only has
>>>>> to provide a new buffer when content changes, otherwise the display has
>>>>> to keep displaying the same one.
>>>>
>>>> Is the use case clear enough, or do you need more information ? Are you
>>>> fine with the API for this kind of use case ?
>>>
>>> So we *know* when a new buffer is being used?
>>
>> The user of the DMA engine (the DRM DPSUB driver in this case) knows
>> when a new buffer needs to be used, as it receives it from userspace. In
>> response, it prepares a new interleaved cyclic transaction and queues
>> it. At the next IRQ, the DMA engine driver switches to the new
>> transaction (the implementation is slightly more complex to handle race
>> conditions, but that's the idea).
>>
>>> IOW would it be possible for display (rather a dmaengine facing
>>> display wrapper) to detect that we are reusing an old buffer and keep
>>> the cyclic and once detected prepare a new descriptor, submit a new
>>> one and then terminate old one which should trigger next transaction
>>> to be submitted
>>
>> I'm not sure to follow you. Do you mean that the display driver should
>> submit a non-cyclic transaction for every frame, reusing the same buffer
>> for every transaction, until a new buffer is available ? The issue with
>> this is that if the CPU load gets high, we may miss a frame, and the
>> display will break. The DPDMA hardware implements cyclic support for
>> this reason, and we want to use that feature to comply with the real
>> time requirements.
> 
> Sorry to cause confusion :) I mean cyclic
> 
> So, DRM DPSUB get first buffer
> A.1 Prepare cyclic interleave txn
> A.2 Submit the txn (it doesn't start here)
> A.3 Invoke issue_pending (that starts the txn)
> 
> DRM DPSUB gets next buffer:
> B.1 Prepare cyclic interleave txn
> B.2 Submit the txn
> B.3 Call terminate for current cyclic txn (we need an updated terminate
> which terminates the current txn, right now we have terminate_all which
> is a sledge hammer approach)
> B.4 Next txn would start once current one is started
> 
> Does this help and make sense in your case

That would be a clean way to handle it. We were missing this API for a
long time to be able to cancel the ongoing transfer (whether it is
cyclic or slave_sg, or memcpy) and move to the next one if there is one
pending.

+1 from me if it counts ;)

> 
> Thanks
> 

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Laurent Pinchart Feb. 13, 2020, 4:52 p.m. UTC | #16
Hi Vinod and Peter,

On Thu, Feb 13, 2020 at 04:15:38PM +0200, Peter Ujfalusi wrote:
> On 13/02/2020 16.07, Vinod Koul wrote:
> > On 13-02-20, 15:48, Laurent Pinchart wrote:
> >> On Thu, Feb 13, 2020 at 06:59:38PM +0530, Vinod Koul wrote:
> >>> On 10-02-20, 16:06, Laurent Pinchart wrote:
> >>>
> >>>>>>> The use case here is not to switch to a new configuration, but to switch
> >>>>>>> to a new buffer. If the transfer had to be terminated manually first,
> >>>>>>> the DMA engine would potentially miss a frame, which is not acceptable.
> >>>>>>> We need an atomic way to switch to the next transfer.
> >>>>>>
> >>>>>> So in this case you have, let's say a cyclic descriptor with N buffers
> >>>>>> and they are cyclically capturing data and providing to client/user..
> >>>>>
> >>>>> For the display case it's cyclic over a single buffer that is repeatedly
> >>>>> displayed over and over again until a new one replaces it, when
> >>>>> userspace wants to change the content on the screen. Userspace only has
> >>>>> to provide a new buffer when content changes, otherwise the display has
> >>>>> to keep displaying the same one.
> >>>>
> >>>> Is the use case clear enough, or do you need more information ? Are you
> >>>> fine with the API for this kind of use case ?
> >>>
> >>> So we *know* when a new buffer is being used?
> >>
> >> The user of the DMA engine (the DRM DPSUB driver in this case) knows
> >> when a new buffer needs to be used, as it receives it from userspace. In
> >> response, it prepares a new interleaved cyclic transaction and queues
> >> it. At the next IRQ, the DMA engine driver switches to the new
> >> transaction (the implementation is slightly more complex to handle race
> >> conditions, but that's the idea).
> >>
> >>> IOW would it be possible for display (rather a dmaengine facing
> >>> display wrapper) to detect that we are reusing an old buffer and keep
> >>> the cyclic and once detected prepare a new descriptor, submit a new
> >>> one and then terminate old one which should trigger next transaction
> >>> to be submitted
> >>
> >> I'm not sure to follow you. Do you mean that the display driver should
> >> submit a non-cyclic transaction for every frame, reusing the same buffer
> >> for every transaction, until a new buffer is available ? The issue with
> >> this is that if the CPU load gets high, we may miss a frame, and the
> >> display will break. The DPDMA hardware implements cyclic support for
> >> this reason, and we want to use that feature to comply with the real
> >> time requirements.
> > 
> > Sorry to cause confusion :) I mean cyclic
> > 
> > So, DRM DPSUB get first buffer
> > A.1 Prepare cyclic interleave txn
> > A.2 Submit the txn (it doesn't start here)
> > A.3 Invoke issue_pending (that starts the txn)

I assume that, at this point, the transfer is started, and repeated
forever until step B below, right ?

> > DRM DPSUB gets next buffer:
> > B.1 Prepare cyclic interleave txn
> > B.2 Submit the txn
> > B.3 Call terminate for current cyclic txn (we need an updated terminate
> > which terminates the current txn, right now we have terminate_all which
> > is a sledge hammer approach)
> > B.4 Next txn would start once current one is started

Do you mean "once current one is completed" ?

> > Does this help and make sense in your case

It does, but I really wonder why we need a new terminate operation that
would terminate a single transfer. If we call issue_pending at step B.3,
when the new txn submitted, we can terminate the current transfer at the
point. It changes the semantics of issue_pending, but only for cyclic
transfers (this whole discussions it only about cyclic transfers). As a
cyclic transfer will be repeated forever until terminated, there's no
use case for issuing a new transfer without terminating the one in
progress. I thus don't think we need a new terminate operation: the only
thing that makes sense to do when submitting a new cyclic transfer is to
terminate the current one and switch to the new one, and we already have
all the APIs we need to enable this behaviour.

> That would be a clean way to handle it. We were missing this API for a
> long time to be able to cancel the ongoing transfer (whether it is
> cyclic or slave_sg, or memcpy) and move to the next one if there is one
> pending.

Note that this new terminate API wouldn't terminate the ongoing transfer
immediately, it would complete first, until the end of the cycle for
cyclic transfers, and until the end of the whole transfer otherwise.
This new operation would thus essentially be a no-op for non-cyclic
transfers. I don't see how it would help :-) Do you have any particular
use case in mind ?

> +1 from me if it counts ;)
Vinod Koul Feb. 14, 2020, 4:23 a.m. UTC | #17
On 13-02-20, 18:52, Laurent Pinchart wrote:
> Hi Vinod and Peter,
> 
> On Thu, Feb 13, 2020 at 04:15:38PM +0200, Peter Ujfalusi wrote:
> > On 13/02/2020 16.07, Vinod Koul wrote:
> > > On 13-02-20, 15:48, Laurent Pinchart wrote:
> > >> On Thu, Feb 13, 2020 at 06:59:38PM +0530, Vinod Koul wrote:
> > >>> On 10-02-20, 16:06, Laurent Pinchart wrote:
> > >>>
> > >>>>>>> The use case here is not to switch to a new configuration, but to switch
> > >>>>>>> to a new buffer. If the transfer had to be terminated manually first,
> > >>>>>>> the DMA engine would potentially miss a frame, which is not acceptable.
> > >>>>>>> We need an atomic way to switch to the next transfer.
> > >>>>>>
> > >>>>>> So in this case you have, let's say a cyclic descriptor with N buffers
> > >>>>>> and they are cyclically capturing data and providing to client/user..
> > >>>>>
> > >>>>> For the display case it's cyclic over a single buffer that is repeatedly
> > >>>>> displayed over and over again until a new one replaces it, when
> > >>>>> userspace wants to change the content on the screen. Userspace only has
> > >>>>> to provide a new buffer when content changes, otherwise the display has
> > >>>>> to keep displaying the same one.
> > >>>>
> > >>>> Is the use case clear enough, or do you need more information ? Are you
> > >>>> fine with the API for this kind of use case ?
> > >>>
> > >>> So we *know* when a new buffer is being used?
> > >>
> > >> The user of the DMA engine (the DRM DPSUB driver in this case) knows
> > >> when a new buffer needs to be used, as it receives it from userspace. In
> > >> response, it prepares a new interleaved cyclic transaction and queues
> > >> it. At the next IRQ, the DMA engine driver switches to the new
> > >> transaction (the implementation is slightly more complex to handle race
> > >> conditions, but that's the idea).
> > >>
> > >>> IOW would it be possible for display (rather a dmaengine facing
> > >>> display wrapper) to detect that we are reusing an old buffer and keep
> > >>> the cyclic and once detected prepare a new descriptor, submit a new
> > >>> one and then terminate old one which should trigger next transaction
> > >>> to be submitted
> > >>
> > >> I'm not sure to follow you. Do you mean that the display driver should
> > >> submit a non-cyclic transaction for every frame, reusing the same buffer
> > >> for every transaction, until a new buffer is available ? The issue with
> > >> this is that if the CPU load gets high, we may miss a frame, and the
> > >> display will break. The DPDMA hardware implements cyclic support for
> > >> this reason, and we want to use that feature to comply with the real
> > >> time requirements.
> > > 
> > > Sorry to cause confusion :) I mean cyclic
> > > 
> > > So, DRM DPSUB get first buffer
> > > A.1 Prepare cyclic interleave txn
> > > A.2 Submit the txn (it doesn't start here)
> > > A.3 Invoke issue_pending (that starts the txn)
> 
> I assume that, at this point, the transfer is started, and repeated
> forever until step B below, right ?

Right, since the transaction is cyclic in nature, the transaction will continue
until stopped or switched :)

> > > DRM DPSUB gets next buffer:
> > > B.1 Prepare cyclic interleave txn
> > > B.2 Submit the txn
> > > B.3 Call terminate for current cyclic txn (we need an updated terminate
> > > which terminates the current txn, right now we have terminate_all which
> > > is a sledge hammer approach)
> > > B.4 Next txn would start once current one is started
> 
> Do you mean "once current one is completed" ?

Yup, sorry for the typo!

> > > Does this help and make sense in your case
> 
> It does, but I really wonder why we need a new terminate operation that
> would terminate a single transfer. If we call issue_pending at step B.3,
> when the new txn submitted, we can terminate the current transfer at the
> point. It changes the semantics of issue_pending, but only for cyclic
> transfers (this whole discussions it only about cyclic transfers). As a
> cyclic transfer will be repeated forever until terminated, there's no
> use case for issuing a new transfer without terminating the one in
> progress. I thus don't think we need a new terminate operation: the only
> thing that makes sense to do when submitting a new cyclic transfer is to
> terminate the current one and switch to the new one, and we already have
> all the APIs we need to enable this behaviour.

The issue_pending() is a NOP when engine is already running.

The design of APIs is that we submit a txn to pending_list and then the
pending_list is started when issue_pending() is called.
Or if the engine is already running, it will take next txn from
pending_list() when current txn completes.

The only consideration here in this case is that the cyclic txn never
completes. Do we really treat a new txn submission as an 'indication' of
completeness? That is indeed a point to ponder upon.

Also, we need to keep in mind that the dmaengine wont stop a cyclic
txn. It would be running and start next transfer (in this case do
from start) while it also gives you an interrupt. Here we would be
required to stop it and then start a new one...

Or perhaps remove the cyclic setting from the txn when a new one
arrives and that behaviour IMO is controller dependent, not sure if
all controllers support it..

> > That would be a clean way to handle it. We were missing this API for a
> > long time to be able to cancel the ongoing transfer (whether it is
> > cyclic or slave_sg, or memcpy) and move to the next one if there is one
> > pending.
> 
> Note that this new terminate API wouldn't terminate the ongoing transfer
> immediately, it would complete first, until the end of the cycle for
> cyclic transfers, and until the end of the whole transfer otherwise.
> This new operation would thus essentially be a no-op for non-cyclic
> transfers. I don't see how it would help :-) Do you have any particular
> use case in mind ?

Yeah that is something more to think about. Do we really abort here or
wait for the txn to complete. I think Peter needs the former and your
falls in the latter category

Thanks
Laurent Pinchart Feb. 14, 2020, 4:22 p.m. UTC | #18
Hi Vinod,

On Fri, Feb 14, 2020 at 09:53:49AM +0530, Vinod Koul wrote:
> On 13-02-20, 18:52, Laurent Pinchart wrote:
> > On Thu, Feb 13, 2020 at 04:15:38PM +0200, Peter Ujfalusi wrote:
> > > On 13/02/2020 16.07, Vinod Koul wrote:
> > > > On 13-02-20, 15:48, Laurent Pinchart wrote:
> > > >> On Thu, Feb 13, 2020 at 06:59:38PM +0530, Vinod Koul wrote:
> > > >>> On 10-02-20, 16:06, Laurent Pinchart wrote:
> > > >>>
> > > >>>>>>> The use case here is not to switch to a new configuration, but to switch
> > > >>>>>>> to a new buffer. If the transfer had to be terminated manually first,
> > > >>>>>>> the DMA engine would potentially miss a frame, which is not acceptable.
> > > >>>>>>> We need an atomic way to switch to the next transfer.
> > > >>>>>>
> > > >>>>>> So in this case you have, let's say a cyclic descriptor with N buffers
> > > >>>>>> and they are cyclically capturing data and providing to client/user..
> > > >>>>>
> > > >>>>> For the display case it's cyclic over a single buffer that is repeatedly
> > > >>>>> displayed over and over again until a new one replaces it, when
> > > >>>>> userspace wants to change the content on the screen. Userspace only has
> > > >>>>> to provide a new buffer when content changes, otherwise the display has
> > > >>>>> to keep displaying the same one.
> > > >>>>
> > > >>>> Is the use case clear enough, or do you need more information ? Are you
> > > >>>> fine with the API for this kind of use case ?
> > > >>>
> > > >>> So we *know* when a new buffer is being used?
> > > >>
> > > >> The user of the DMA engine (the DRM DPSUB driver in this case) knows
> > > >> when a new buffer needs to be used, as it receives it from userspace. In
> > > >> response, it prepares a new interleaved cyclic transaction and queues
> > > >> it. At the next IRQ, the DMA engine driver switches to the new
> > > >> transaction (the implementation is slightly more complex to handle race
> > > >> conditions, but that's the idea).
> > > >>
> > > >>> IOW would it be possible for display (rather a dmaengine facing
> > > >>> display wrapper) to detect that we are reusing an old buffer and keep
> > > >>> the cyclic and once detected prepare a new descriptor, submit a new
> > > >>> one and then terminate old one which should trigger next transaction
> > > >>> to be submitted
> > > >>
> > > >> I'm not sure to follow you. Do you mean that the display driver should
> > > >> submit a non-cyclic transaction for every frame, reusing the same buffer
> > > >> for every transaction, until a new buffer is available ? The issue with
> > > >> this is that if the CPU load gets high, we may miss a frame, and the
> > > >> display will break. The DPDMA hardware implements cyclic support for
> > > >> this reason, and we want to use that feature to comply with the real
> > > >> time requirements.
> > > > 
> > > > Sorry to cause confusion :) I mean cyclic
> > > > 
> > > > So, DRM DPSUB get first buffer
> > > > A.1 Prepare cyclic interleave txn
> > > > A.2 Submit the txn (it doesn't start here)
> > > > A.3 Invoke issue_pending (that starts the txn)
> > 
> > I assume that, at this point, the transfer is started, and repeated
> > forever until step B below, right ?
> 
> Right, since the transaction is cyclic in nature, the transaction will continue
> until stopped or switched :)
> 
> > > > DRM DPSUB gets next buffer:
> > > > B.1 Prepare cyclic interleave txn
> > > > B.2 Submit the txn
> > > > B.3 Call terminate for current cyclic txn (we need an updated terminate
> > > > which terminates the current txn, right now we have terminate_all which
> > > > is a sledge hammer approach)
> > > > B.4 Next txn would start once current one is started
> > 
> > Do you mean "once current one is completed" ?
> 
> Yup, sorry for the typo!

No worries, I just wanted to make sure it wasn't a misunderstanding on
my side.

> > > > Does this help and make sense in your case
> > 
> > It does, but I really wonder why we need a new terminate operation that
> > would terminate a single transfer. If we call issue_pending at step B.3,
> > when the new txn submitted, we can terminate the current transfer at the
> > point. It changes the semantics of issue_pending, but only for cyclic
> > transfers (this whole discussions it only about cyclic transfers). As a
> > cyclic transfer will be repeated forever until terminated, there's no
> > use case for issuing a new transfer without terminating the one in
> > progress. I thus don't think we need a new terminate operation: the only
> > thing that makes sense to do when submitting a new cyclic transfer is to
> > terminate the current one and switch to the new one, and we already have
> > all the APIs we need to enable this behaviour.
> 
> The issue_pending() is a NOP when engine is already running.

That's not totally right. issue_pending() still moves submitted but not
issued transactions from the submitted queue to the issued queue. The
DMA engine only considers the issued queue, so issue_pending()
essentially tells the DMA engine to consider the submitted transaction
for processing after the already issued transactions complete (in the
non-cyclic case).

> The design of APIs is that we submit a txn to pending_list and then the
> pending_list is started when issue_pending() is called.
> Or if the engine is already running, it will take next txn from
> pending_list() when current txn completes.
> 
> The only consideration here in this case is that the cyclic txn never
> completes. Do we really treat a new txn submission as an 'indication' of
> completeness? That is indeed a point to ponder upon.

The reason why I think we should is two-fold:

1. I believe it's semantically aligned with the existing behaviour of
issue_pending(). As explained above, the operation tells the DMA engine
to consider submitted transactions for processing when the current (and
other issued) transactions complete. If we extend the definition of
complete to cover cyclic transactions, I think it's a good match.

2. There's really nothing else we could do with cyclic transactions.
They never complete today and have to be terminated manually with
terminate_all(). Using issue_pending() to move to a next cyclic
transaction doesn't change the existing behaviour by replacing a useful
(and used) feature, as issue_pending() is currently a no-op for cyclic
transactions. The newly issued transaction is never considered, and
calling terminate_all() will cancel the issued transactions. By
extending the behaviour of issue_pending(), we're making a new use case
possible, without restricting any other feature, and without "stealing"
issue_pending() and preventing it from implementing another useful
behaviour.

In a nutshell, an important reason why I like using issue_pending() for
this purpose is because it makes cyclic and non-cyclic transactions
behave more similarly, which I think is good from an API consistency
point of view.

> Also, we need to keep in mind that the dmaengine wont stop a cyclic
> txn. It would be running and start next transfer (in this case do
> from start) while it also gives you an interrupt. Here we would be
> required to stop it and then start a new one...

We wouldn't be required to stop it in the middle, the expected behaviour
is for the DMA engine to complete the cyclic transaction until the end
of the cycle and then replace it by the new one. That's exactly what
happens for non-cyclic transactions when you call issue_pending(), which
makes me like this solution.

> Or perhaps remove the cyclic setting from the txn when a new one
> arrives and that behaviour IMO is controller dependent, not sure if
> all controllers support it..

At the very least I would assume controllers to be able to stop a cyclic
transaction forcefully, otherwise terminate_all() could never be
implemented. This may not lead to a gracefully switch from one cyclic
transaction to another one if the hardware doesn't allow doing so. In
that case I think tx_submit() could return an error, or we could turn
issue_pending() into an int operation to signal the error. Note that
there's no need to mass-patch drivers here, if a DMA engine client
issues a second cyclic transaction while one is in progress, the second
transaction won't be considered today. Signalling an error is in my
opinion a useful feature, but not doing so in DMA engine drivers can't
be a regression. We could also add a flag to tell whether this mode of
operation is supported.

> > > That would be a clean way to handle it. We were missing this API for a
> > > long time to be able to cancel the ongoing transfer (whether it is
> > > cyclic or slave_sg, or memcpy) and move to the next one if there is one
> > > pending.
> > 
> > Note that this new terminate API wouldn't terminate the ongoing transfer
> > immediately, it would complete first, until the end of the cycle for
> > cyclic transfers, and until the end of the whole transfer otherwise.
> > This new operation would thus essentially be a no-op for non-cyclic
> > transfers. I don't see how it would help :-) Do you have any particular
> > use case in mind ?
> 
> Yeah that is something more to think about. Do we really abort here or
> wait for the txn to complete. I think Peter needs the former and your
> falls in the latter category

I definitely need the latter, otherwise the display will flicker (or
completely misoperate) every time a new frame is displayed, which isn't
a good idea :-) I'm not sure about Peter's use cases, but it seems to me
that aborting a transaction immediately is racy in most cases, unless
the DMA engine supports byte-level residue reporting. One non-intrusive
option would be to add a flag to signal that a newly issued transaction
should interrupt the current transaction immediately.
Peter Ujfalusi Feb. 17, 2020, 10 a.m. UTC | #19
Hi Laurent, Vinod,

On 14/02/2020 18.22, Laurent Pinchart wrote:
>>> It does, but I really wonder why we need a new terminate operation that
>>> would terminate a single transfer. If we call issue_pending at step B.3,
>>> when the new txn submitted, we can terminate the current transfer at the
>>> point. It changes the semantics of issue_pending, but only for cyclic
>>> transfers (this whole discussions it only about cyclic transfers). As a
>>> cyclic transfer will be repeated forever until terminated, there's no
>>> use case for issuing a new transfer without terminating the one in
>>> progress. I thus don't think we need a new terminate operation: the only
>>> thing that makes sense to do when submitting a new cyclic transfer is to
>>> terminate the current one and switch to the new one, and we already have
>>> all the APIs we need to enable this behaviour.
>>
>> The issue_pending() is a NOP when engine is already running.
> 
> That's not totally right. issue_pending() still moves submitted but not
> issued transactions from the submitted queue to the issued queue. The
> DMA engine only considers the issued queue, so issue_pending()
> essentially tells the DMA engine to consider the submitted transaction
> for processing after the already issued transactions complete (in the
> non-cyclic case).

Vinod's point is for the cyclic case at the current state. It is NOP
essentially as we don't have way to not kill the whole channel.

Just a sidenote: it is not even that clean cut for slave transfers
either as the slave_config must _not_ change between the issued
transfers. Iow, you can not switch between 16bit and 32bit word lengths
with some DMA. EDMA, sDMA can do that, but UDMA can not for example...

>> The design of APIs is that we submit a txn to pending_list and then the
>> pending_list is started when issue_pending() is called.
>> Or if the engine is already running, it will take next txn from
>> pending_list() when current txn completes.
>>
>> The only consideration here in this case is that the cyclic txn never
>> completes. Do we really treat a new txn submission as an 'indication' of
>> completeness? That is indeed a point to ponder upon.
> 
> The reason why I think we should is two-fold:
> 
> 1. I believe it's semantically aligned with the existing behaviour of
> issue_pending(). As explained above, the operation tells the DMA engine
> to consider submitted transactions for processing when the current (and
> other issued) transactions complete. If we extend the definition of
> complete to cover cyclic transactions, I think it's a good match.

We will end up with different behavior between cyclic and non cyclic
transfers and the new behavior should be somehow supported by existing
drivers.
Yes, issue_pending is moving the submitted tx to the issued queue to be
executed on HW when the current transfer finished.
We only needed this for non cyclic uses so far. Some DMA hw can replace
the current transfer with a new one (re-trigger to fetch the new
configuration, like your's), but some can not (none of the system DMAs
on TI platforms can).
If we say that this is the behavior the DMA drivers must follow then we
will have non compliant DMA drivers. You can not move simply to other
DMA or can not create generic DMA code shared by drivers.

> 2. There's really nothing else we could do with cyclic transactions.
> They never complete today and have to be terminated manually with
> terminate_all(). Using issue_pending() to move to a next cyclic
> transaction doesn't change the existing behaviour by replacing a useful
> (and used) feature, as issue_pending() is currently a no-op for cyclic
> transactions. The newly issued transaction is never considered, and
> calling terminate_all() will cancel the issued transactions. By
> extending the behaviour of issue_pending(), we're making a new use case
> possible, without restricting any other feature, and without "stealing"
> issue_pending() and preventing it from implementing another useful
> behaviour.

But at the same time we make existing drivers non compliant...

Imo a new callback to 'kill' / 'terminate' / 'replace' / 'abort' an
issued cookie would be cleaner.

cookie1 = dmaengine_issue_pending();
// will start the transfer
cookie2 = dmaengine_issue_pending();
// cookie1 still runs, cookie2 is waiting to be executed
dmaengine_abort_tx(chan);
// will kill cookie1 and executes cookie2

dmaengine_abort_tx() could take a cookie as parameter if we wish, so you
can say selectively which issued tx you want to remove, if it is the
running one, then stop it and move to the next one.
In place of the cookie parameter a 0 could imply that I don't know the
cookie, but kill the running one.

We would preserve what issue_pending does atm and would give us a
generic flow of how other drivers should handle such cases.

Note that this is not only useful for cyclic cases. Any driver which
currently uses brute-force termination can be upgraded.
Prime example is UART RX. We issue an RX buffer to receive data, but it
is not guarantied that the remote will send data which would fill the
buffer and we hit a timeout waiting. We could issue the next buffer and
kill the stale transfer to reclaim the received data.

I think this can be even implemented for DMAs which can not do the same
thing as your DMA can.

> In a nutshell, an important reason why I like using issue_pending() for
> this purpose is because it makes cyclic and non-cyclic transactions
> behave more similarly, which I think is good from an API consistency
> point of view.
> 
>> Also, we need to keep in mind that the dmaengine wont stop a cyclic
>> txn. It would be running and start next transfer (in this case do
>> from start) while it also gives you an interrupt. Here we would be
>> required to stop it and then start a new one...
> 
> We wouldn't be required to stop it in the middle, the expected behaviour
> is for the DMA engine to complete the cyclic transaction until the end
> of the cycle and then replace it by the new one. That's exactly what
> happens for non-cyclic transactions when you call issue_pending(), which
> makes me like this solution.

Right, so we have two different use cases. Replace the current transfers
with the next issued one and abort the current transfer now and arm the
next issued one.
dmaengine_abort_tx(chan, cookie, forced) ?
forced == false: replace it at cyclic boundary
forced == true: right away (as HW allows), do not wait for cyclic round

>> Or perhaps remove the cyclic setting from the txn when a new one
>> arrives and that behaviour IMO is controller dependent, not sure if
>> all controllers support it..
> 
> At the very least I would assume controllers to be able to stop a cyclic
> transaction forcefully, otherwise terminate_all() could never be
> implemented. This may not lead to a gracefully switch from one cyclic
> transaction to another one if the hardware doesn't allow doing so. In
> that case I think tx_submit() could return an error, or we could turn
> issue_pending() into an int operation to signal the error. Note that
> there's no need to mass-patch drivers here, if a DMA engine client
> issues a second cyclic transaction while one is in progress, the second
> transaction won't be considered today. Signalling an error is in my
> opinion a useful feature, but not doing so in DMA engine drivers can't
> be a regression. We could also add a flag to tell whether this mode of
> operation is supported.

My problems is that it is changing the behavior of issue_pending() for
cyclic. If we document this than all existing DMA drivers are broken
(not complaint with the API documentation) as they don't do this.


>>>> That would be a clean way to handle it. We were missing this API for a
>>>> long time to be able to cancel the ongoing transfer (whether it is
>>>> cyclic or slave_sg, or memcpy) and move to the next one if there is one
>>>> pending.
>>>
>>> Note that this new terminate API wouldn't terminate the ongoing transfer
>>> immediately, it would complete first, until the end of the cycle for
>>> cyclic transfers, and until the end of the whole transfer otherwise.
>>> This new operation would thus essentially be a no-op for non-cyclic
>>> transfers. I don't see how it would help :-) Do you have any particular
>>> use case in mind ?
>>
>> Yeah that is something more to think about. Do we really abort here or
>> wait for the txn to complete. I think Peter needs the former and your
>> falls in the latter category
> 
> I definitely need the latter, otherwise the display will flicker (or
> completely misoperate) every time a new frame is displayed, which isn't
> a good idea :-)

Sure, and it is a great feature.

> I'm not sure about Peter's use cases, but it seems to me
> that aborting a transaction immediately is racy in most cases, unless
> the DMA engine supports byte-level residue reporting.

Sort of yes. With EDMA, sDMA I can just kill the channel and set up a
new one right away.
UDMA on the other hand is not that forgiving... I would need to kill the
channel, wait for the termination to complete, reconfigure the channel
and execute the new transfer.

But with a separate callback API at least there will be an entry point
when this can be initiated and handled.
Fwiw, I think it should be simple to add this functionality to them, the
code is kind of handling it in other parts, but implementing it in the
issue_pending() is not really a clean solution.

In a channel you can run slave_sg transfers followed by cyclic if you
wish. A slave channel is what it is, slave channel which can be capable
to execute slave_sg and/or cyclic (and/or interleaved).
If issue_pending() is to take care then we need to check if the current
transfer is cyclic or not and decide based on that.

With a separate callback we in the DMA driver just need to do what the
client is asking for and no need to think.

> One non-intrusive
> option would be to add a flag to signal that a newly issued transaction
> should interrupt the current transaction immediately.

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Vinod Koul Feb. 19, 2020, 9:25 a.m. UTC | #20
On 17-02-20, 12:00, Peter Ujfalusi wrote:
> Hi Laurent, Vinod,
> 
> On 14/02/2020 18.22, Laurent Pinchart wrote:
> >>> It does, but I really wonder why we need a new terminate operation that
> >>> would terminate a single transfer. If we call issue_pending at step B.3,
> >>> when the new txn submitted, we can terminate the current transfer at the
> >>> point. It changes the semantics of issue_pending, but only for cyclic
> >>> transfers (this whole discussions it only about cyclic transfers). As a
> >>> cyclic transfer will be repeated forever until terminated, there's no
> >>> use case for issuing a new transfer without terminating the one in
> >>> progress. I thus don't think we need a new terminate operation: the only
> >>> thing that makes sense to do when submitting a new cyclic transfer is to
> >>> terminate the current one and switch to the new one, and we already have
> >>> all the APIs we need to enable this behaviour.
> >>
> >> The issue_pending() is a NOP when engine is already running.
> > 
> > That's not totally right. issue_pending() still moves submitted but not
> > issued transactions from the submitted queue to the issued queue. The
> > DMA engine only considers the issued queue, so issue_pending()
> > essentially tells the DMA engine to consider the submitted transaction
> > for processing after the already issued transactions complete (in the
> > non-cyclic case).
> 
> Vinod's point is for the cyclic case at the current state. It is NOP
> essentially as we don't have way to not kill the whole channel.

Or IOW there is no descriptor movement to hardware..

> Just a sidenote: it is not even that clean cut for slave transfers
> either as the slave_config must _not_ change between the issued
> transfers. Iow, you can not switch between 16bit and 32bit word lengths
> with some DMA. EDMA, sDMA can do that, but UDMA can not for example...
> 
> >> The design of APIs is that we submit a txn to pending_list and then the
> >> pending_list is started when issue_pending() is called.
> >> Or if the engine is already running, it will take next txn from
> >> pending_list() when current txn completes.
> >>
> >> The only consideration here in this case is that the cyclic txn never
> >> completes. Do we really treat a new txn submission as an 'indication' of
> >> completeness? That is indeed a point to ponder upon.
> > 
> > The reason why I think we should is two-fold:
> > 
> > 1. I believe it's semantically aligned with the existing behaviour of
> > issue_pending(). As explained above, the operation tells the DMA engine
> > to consider submitted transactions for processing when the current (and
> > other issued) transactions complete. If we extend the definition of
> > complete to cover cyclic transactions, I think it's a good match.
> 
> We will end up with different behavior between cyclic and non cyclic
> transfers and the new behavior should be somehow supported by existing
> drivers.
> Yes, issue_pending is moving the submitted tx to the issued queue to be
> executed on HW when the current transfer finished.
> We only needed this for non cyclic uses so far. Some DMA hw can replace
> the current transfer with a new one (re-trigger to fetch the new
> configuration, like your's), but some can not (none of the system DMAs
> on TI platforms can).
> If we say that this is the behavior the DMA drivers must follow then we
> will have non compliant DMA drivers. You can not move simply to other
> DMA or can not create generic DMA code shared by drivers.

That is very important point for API. We want no implicit behaviour, so
if we want an behaviour let us do that explicitly.

> > 2. There's really nothing else we could do with cyclic transactions.
> > They never complete today and have to be terminated manually with
> > terminate_all(). Using issue_pending() to move to a next cyclic
> > transaction doesn't change the existing behaviour by replacing a useful
> > (and used) feature, as issue_pending() is currently a no-op for cyclic
> > transactions. The newly issued transaction is never considered, and
> > calling terminate_all() will cancel the issued transactions. By
> > extending the behaviour of issue_pending(), we're making a new use case
> > possible, without restricting any other feature, and without "stealing"
> > issue_pending() and preventing it from implementing another useful
> > behaviour.
> 
> But at the same time we make existing drivers non compliant...
> 
> Imo a new callback to 'kill' / 'terminate' / 'replace' / 'abort' an
> issued cookie would be cleaner.
> 
> cookie1 = dmaengine_issue_pending();
> // will start the transfer
> cookie2 = dmaengine_issue_pending();
> // cookie1 still runs, cookie2 is waiting to be executed
> dmaengine_abort_tx(chan);
> // will kill cookie1 and executes cookie2

Right and we need a kill mode which kills the cookie1 at the end of
transfer (conditional to hw supporting that)

I think it should be generic API and usable in both the cyclic and
non-cyclic case

> 
> dmaengine_abort_tx() could take a cookie as parameter if we wish, so you
> can say selectively which issued tx you want to remove, if it is the
> running one, then stop it and move to the next one.
> In place of the cookie parameter a 0 could imply that I don't know the
> cookie, but kill the running one.
> 
> We would preserve what issue_pending does atm and would give us a
> generic flow of how other drivers should handle such cases.
> 
> Note that this is not only useful for cyclic cases. Any driver which
> currently uses brute-force termination can be upgraded.
> Prime example is UART RX. We issue an RX buffer to receive data, but it
> is not guarantied that the remote will send data which would fill the
> buffer and we hit a timeout waiting. We could issue the next buffer and
> kill the stale transfer to reclaim the received data.
> 
> I think this can be even implemented for DMAs which can not do the same
> thing as your DMA can.
> 
> > In a nutshell, an important reason why I like using issue_pending() for
> > this purpose is because it makes cyclic and non-cyclic transactions
> > behave more similarly, which I think is good from an API consistency
> > point of view.
> > 
> >> Also, we need to keep in mind that the dmaengine wont stop a cyclic
> >> txn. It would be running and start next transfer (in this case do
> >> from start) while it also gives you an interrupt. Here we would be
> >> required to stop it and then start a new one...
> > 
> > We wouldn't be required to stop it in the middle, the expected behaviour
> > is for the DMA engine to complete the cyclic transaction until the end
> > of the cycle and then replace it by the new one. That's exactly what
> > happens for non-cyclic transactions when you call issue_pending(), which
> > makes me like this solution.
> 
> Right, so we have two different use cases. Replace the current transfers
> with the next issued one and abort the current transfer now and arm the
> next issued one.
> dmaengine_abort_tx(chan, cookie, forced) ?
> forced == false: replace it at cyclic boundary
> forced == true: right away (as HW allows), do not wait for cyclic round
> 
> >> Or perhaps remove the cyclic setting from the txn when a new one
> >> arrives and that behaviour IMO is controller dependent, not sure if
> >> all controllers support it..
> > 
> > At the very least I would assume controllers to be able to stop a cyclic
> > transaction forcefully, otherwise terminate_all() could never be
> > implemented. This may not lead to a gracefully switch from one cyclic
> > transaction to another one if the hardware doesn't allow doing so. In
> > that case I think tx_submit() could return an error, or we could turn
> > issue_pending() into an int operation to signal the error. Note that
> > there's no need to mass-patch drivers here, if a DMA engine client
> > issues a second cyclic transaction while one is in progress, the second
> > transaction won't be considered today. Signalling an error is in my
> > opinion a useful feature, but not doing so in DMA engine drivers can't
> > be a regression. We could also add a flag to tell whether this mode of
> > operation is supported.
> 
> My problems is that it is changing the behavior of issue_pending() for
> cyclic. If we document this than all existing DMA drivers are broken
> (not complaint with the API documentation) as they don't do this.
> 
> 
> >>>> That would be a clean way to handle it. We were missing this API for a
> >>>> long time to be able to cancel the ongoing transfer (whether it is
> >>>> cyclic or slave_sg, or memcpy) and move to the next one if there is one
> >>>> pending.
> >>>
> >>> Note that this new terminate API wouldn't terminate the ongoing transfer
> >>> immediately, it would complete first, until the end of the cycle for
> >>> cyclic transfers, and until the end of the whole transfer otherwise.
> >>> This new operation would thus essentially be a no-op for non-cyclic
> >>> transfers. I don't see how it would help :-) Do you have any particular
> >>> use case in mind ?
> >>
> >> Yeah that is something more to think about. Do we really abort here or
> >> wait for the txn to complete. I think Peter needs the former and your
> >> falls in the latter category
> > 
> > I definitely need the latter, otherwise the display will flicker (or
> > completely misoperate) every time a new frame is displayed, which isn't
> > a good idea :-)
> 
> Sure, and it is a great feature.
> 
> > I'm not sure about Peter's use cases, but it seems to me
> > that aborting a transaction immediately is racy in most cases, unless
> > the DMA engine supports byte-level residue reporting.
> 
> Sort of yes. With EDMA, sDMA I can just kill the channel and set up a
> new one right away.
> UDMA on the other hand is not that forgiving... I would need to kill the
> channel, wait for the termination to complete, reconfigure the channel
> and execute the new transfer.
> 
> But with a separate callback API at least there will be an entry point
> when this can be initiated and handled.
> Fwiw, I think it should be simple to add this functionality to them, the
> code is kind of handling it in other parts, but implementing it in the
> issue_pending() is not really a clean solution.
> 
> In a channel you can run slave_sg transfers followed by cyclic if you
> wish. A slave channel is what it is, slave channel which can be capable
> to execute slave_sg and/or cyclic (and/or interleaved).
> If issue_pending() is to take care then we need to check if the current
> transfer is cyclic or not and decide based on that.
> 
> With a separate callback we in the DMA driver just need to do what the
> client is asking for and no need to think.
> 
> > One non-intrusive
> > option would be to add a flag to signal that a newly issued transaction
> > should interrupt the current transaction immediately.
> 
> - Péter
> 
> Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
> Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Laurent Pinchart Feb. 26, 2020, 4:24 p.m. UTC | #21
Hi Peter,

On Mon, Feb 17, 2020 at 12:00:02PM +0200, Peter Ujfalusi wrote:
> On 14/02/2020 18.22, Laurent Pinchart wrote:
> >>> It does, but I really wonder why we need a new terminate operation that
> >>> would terminate a single transfer. If we call issue_pending at step B.3,
> >>> when the new txn submitted, we can terminate the current transfer at the
> >>> point. It changes the semantics of issue_pending, but only for cyclic
> >>> transfers (this whole discussions it only about cyclic transfers). As a
> >>> cyclic transfer will be repeated forever until terminated, there's no
> >>> use case for issuing a new transfer without terminating the one in
> >>> progress. I thus don't think we need a new terminate operation: the only
> >>> thing that makes sense to do when submitting a new cyclic transfer is to
> >>> terminate the current one and switch to the new one, and we already have
> >>> all the APIs we need to enable this behaviour.
> >>
> >> The issue_pending() is a NOP when engine is already running.
> > 
> > That's not totally right. issue_pending() still moves submitted but not
> > issued transactions from the submitted queue to the issued queue. The
> > DMA engine only considers the issued queue, so issue_pending()
> > essentially tells the DMA engine to consider the submitted transaction
> > for processing after the already issued transactions complete (in the
> > non-cyclic case).
> 
> Vinod's point is for the cyclic case at the current state. It is NOP
> essentially as we don't have way to not kill the whole channel.

Considering the current implementation of issue_pending(), for cyclic
transfers, that's correct.

My point was that, semantically, and as is implemented today for
non-cyclic transfers, issue_pending() is meant to tell the DMA engine to
consider the submitted transactions for processing after the already
issued transactions complete. For cyclic transactions, .issue_pending
has no defined semantics, and is implemented as a NOP. My proposal is to
extend the existing semantics of issue_pending() as defined for the
non-cyclic transactions to also cover the cyclic transactions. This
won't cause any breakage (the issue_pending() operation being unused for
cyclic transactions, it won't cause any change to existing code), and
will make the API more consistent as the same semantics (moving to the
next submitted transaction when the current one completes) will be
implemented using the same operation.

> Just a sidenote: it is not even that clean cut for slave transfers
> either as the slave_config must _not_ change between the issued
> transfers. Iow, you can not switch between 16bit and 32bit word lengths
> with some DMA. EDMA, sDMA can do that, but UDMA can not for example...

I agree this can be an issue, but I'm not sure how it's related :-) I
believe we need to consider this feature, and specify the API better,
but that's fairly unrelated, isn't it ?

> >> The design of APIs is that we submit a txn to pending_list and then the
> >> pending_list is started when issue_pending() is called.
> >> Or if the engine is already running, it will take next txn from
> >> pending_list() when current txn completes.
> >>
> >> The only consideration here in this case is that the cyclic txn never
> >> completes. Do we really treat a new txn submission as an 'indication' of
> >> completeness? That is indeed a point to ponder upon.
> > 
> > The reason why I think we should is two-fold:
> > 
> > 1. I believe it's semantically aligned with the existing behaviour of
> > issue_pending(). As explained above, the operation tells the DMA engine
> > to consider submitted transactions for processing when the current (and
> > other issued) transactions complete. If we extend the definition of
> > complete to cover cyclic transactions, I think it's a good match.
> 
> We will end up with different behavior between cyclic and non cyclic
> transfers and the new behavior should be somehow supported by existing
> drivers.
> Yes, issue_pending is moving the submitted tx to the issued queue to be
> executed on HW when the current transfer finished.
> We only needed this for non cyclic uses so far. Some DMA hw can replace
> the current transfer with a new one (re-trigger to fetch the new
> configuration, like your's), but some can not (none of the system DMAs
> on TI platforms can).
> If we say that this is the behavior the DMA drivers must follow then we
> will have non compliant DMA drivers. You can not move simply to other
> DMA or can not create generic DMA code shared by drivers.

I think that's a matter of reporting the capabilities of the DMA engine,
and I believe a flag is enough for this. My proposal really gives a
purpose to an API that is unused today (.issue_pending() for cyclic
transfers), and that purpose is semantically coherent with the purpose
of the same function for non-cyclic transfers. I thus believe it brings
the cyclic and non-cyclic cases closer, making their behaviour more
similar, not different.

> > 2. There's really nothing else we could do with cyclic transactions.
> > They never complete today and have to be terminated manually with
> > terminate_all(). Using issue_pending() to move to a next cyclic
> > transaction doesn't change the existing behaviour by replacing a useful
> > (and used) feature, as issue_pending() is currently a no-op for cyclic
> > transactions. The newly issued transaction is never considered, and
> > calling terminate_all() will cancel the issued transactions. By
> > extending the behaviour of issue_pending(), we're making a new use case
> > possible, without restricting any other feature, and without "stealing"
> > issue_pending() and preventing it from implementing another useful
> > behaviour.
> 
> But at the same time we make existing drivers non compliant...

With a flag to report this new feature, that's not a problem.

> Imo a new callback to 'kill' / 'terminate' / 'replace' / 'abort' an
> issued cookie would be cleaner.
> 
> cookie1 = dmaengine_issue_pending();
> // will start the transfer
> cookie2 = dmaengine_issue_pending();
> // cookie1 still runs, cookie2 is waiting to be executed
> dmaengine_abort_tx(chan);
> // will kill cookie1 and executes cookie2
> 
> dmaengine_abort_tx() could take a cookie as parameter if we wish, so you
> can say selectively which issued tx you want to remove, if it is the
> running one, then stop it and move to the next one.
> In place of the cookie parameter a 0 could imply that I don't know the
> cookie, but kill the running one.
> 
> We would preserve what issue_pending does atm and would give us a
> generic flow of how other drivers should handle such cases.
> 
> Note that this is not only useful for cyclic cases. Any driver which
> currently uses brute-force termination can be upgraded.
> Prime example is UART RX. We issue an RX buffer to receive data, but it
> is not guarantied that the remote will send data which would fill the
> buffer and we hit a timeout waiting. We could issue the next buffer and
> kill the stale transfer to reclaim the received data.
> 
> I think this can be even implemented for DMAs which can not do the same
> thing as your DMA can.

But that's a different use case. What I'm after is *not*
killing/aborting a currently running transfer, it's moving to the next
submitted transfer at the next available sync point. I don't want to
abort the transfer in progress immediately, that would kill the display.

I understand that the above can be useful, but I really don't see why
I'd need to implement support for a more complex use case that I have no
need for, and could hardly even test properly, when what I'm after is
fixing what I view as a bug in the existing implementation: we have an
operation, issue_pending(), with a defined purpose, and it happens that
for one of the transfer types that operation doesn't work. I really see
no reason to implement a brand new API in this case.

Note that using issue_pending() as I propose doesn't preclude anyone
(you, or someone else) to implement your above proposal, but please
don't make me do your work :-) This is becoming a case of yak shaving
where I'm asked to fix shortcomings of the DMA engine API when they're
unrelated to my use case.

> > In a nutshell, an important reason why I like using issue_pending() for
> > this purpose is because it makes cyclic and non-cyclic transactions
> > behave more similarly, which I think is good from an API consistency
> > point of view.
> > 
> >> Also, we need to keep in mind that the dmaengine wont stop a cyclic
> >> txn. It would be running and start next transfer (in this case do
> >> from start) while it also gives you an interrupt. Here we would be
> >> required to stop it and then start a new one...
> > 
> > We wouldn't be required to stop it in the middle, the expected behaviour
> > is for the DMA engine to complete the cyclic transaction until the end
> > of the cycle and then replace it by the new one. That's exactly what
> > happens for non-cyclic transactions when you call issue_pending(), which
> > makes me like this solution.
> 
> Right, so we have two different use cases. Replace the current transfers
> with the next issued one and abort the current transfer now and arm the
> next issued one.
> dmaengine_abort_tx(chan, cookie, forced) ?
> forced == false: replace it at cyclic boundary
> forced == true: right away (as HW allows), do not wait for cyclic round

See the above. You're making this more complicated than it should be,
designing an API that contains a small part that could help solving my
problem, and asking me to implement the 90% for free. Not fair :-)

> >> Or perhaps remove the cyclic setting from the txn when a new one
> >> arrives and that behaviour IMO is controller dependent, not sure if
> >> all controllers support it..
> > 
> > At the very least I would assume controllers to be able to stop a cyclic
> > transaction forcefully, otherwise terminate_all() could never be
> > implemented. This may not lead to a gracefully switch from one cyclic
> > transaction to another one if the hardware doesn't allow doing so. In
> > that case I think tx_submit() could return an error, or we could turn
> > issue_pending() into an int operation to signal the error. Note that
> > there's no need to mass-patch drivers here, if a DMA engine client
> > issues a second cyclic transaction while one is in progress, the second
> > transaction won't be considered today. Signalling an error is in my
> > opinion a useful feature, but not doing so in DMA engine drivers can't
> > be a regression. We could also add a flag to tell whether this mode of
> > operation is supported.
> 
> My problems is that it is changing the behavior of issue_pending() for
> cyclic. If we document this than all existing DMA drivers are broken
> (not complaint with the API documentation) as they don't do this.

Again, see above. I argue that it's not a behavioural change as such, as
the current behaviour is unused, because it's implemented as a NOP and
useless. With a simple flag to report if a DMA engine supports replacing
cyclic transfers, we would have a more consistent API as issue_pending()
will operate the same way for *all* types of transfers.

> >>>> That would be a clean way to handle it. We were missing this API for a
> >>>> long time to be able to cancel the ongoing transfer (whether it is
> >>>> cyclic or slave_sg, or memcpy) and move to the next one if there is one
> >>>> pending.
> >>>
> >>> Note that this new terminate API wouldn't terminate the ongoing transfer
> >>> immediately, it would complete first, until the end of the cycle for
> >>> cyclic transfers, and until the end of the whole transfer otherwise.
> >>> This new operation would thus essentially be a no-op for non-cyclic
> >>> transfers. I don't see how it would help :-) Do you have any particular
> >>> use case in mind ?
> >>
> >> Yeah that is something more to think about. Do we really abort here or
> >> wait for the txn to complete. I think Peter needs the former and your
> >> falls in the latter category
> > 
> > I definitely need the latter, otherwise the display will flicker (or
> > completely misoperate) every time a new frame is displayed, which isn't
> > a good idea :-)
> 
> Sure, and it is a great feature.
> 
> > I'm not sure about Peter's use cases, but it seems to me
> > that aborting a transaction immediately is racy in most cases, unless
> > the DMA engine supports byte-level residue reporting.
> 
> Sort of yes. With EDMA, sDMA I can just kill the channel and set up a
> new one right away.
> UDMA on the other hand is not that forgiving... I would need to kill the
> channel, wait for the termination to complete, reconfigure the channel
> and execute the new transfer.
> 
> But with a separate callback API at least there will be an entry point
> when this can be initiated and handled.
> Fwiw, I think it should be simple to add this functionality to them, the
> code is kind of handling it in other parts, but implementing it in the
> issue_pending() is not really a clean solution.
> 
> In a channel you can run slave_sg transfers followed by cyclic if you
> wish. A slave channel is what it is, slave channel which can be capable
> to execute slave_sg and/or cyclic (and/or interleaved).
> If issue_pending() is to take care then we need to check if the current
> transfer is cyclic or not and decide based on that.
> 
> With a separate callback we in the DMA driver just need to do what the
> client is asking for and no need to think.

Let's put it that way: are you volunteering to implement your proposal
(with proper API documentation) in a reasonable time frame, so that I
can try it for my use case ? Otherwise I see no reason to push against
my proposal.

> > One non-intrusive
> > option would be to add a flag to signal that a newly issued transaction
> > should interrupt the current transaction immediately.
Laurent Pinchart Feb. 26, 2020, 4:30 p.m. UTC | #22
Hi Vinod,

On Wed, Feb 19, 2020 at 02:55:14PM +0530, Vinod Koul wrote:
> On 17-02-20, 12:00, Peter Ujfalusi wrote:
> > On 14/02/2020 18.22, Laurent Pinchart wrote:
> >>>> It does, but I really wonder why we need a new terminate operation that
> >>>> would terminate a single transfer. If we call issue_pending at step B.3,
> >>>> when the new txn submitted, we can terminate the current transfer at the
> >>>> point. It changes the semantics of issue_pending, but only for cyclic
> >>>> transfers (this whole discussions it only about cyclic transfers). As a
> >>>> cyclic transfer will be repeated forever until terminated, there's no
> >>>> use case for issuing a new transfer without terminating the one in
> >>>> progress. I thus don't think we need a new terminate operation: the only
> >>>> thing that makes sense to do when submitting a new cyclic transfer is to
> >>>> terminate the current one and switch to the new one, and we already have
> >>>> all the APIs we need to enable this behaviour.
> >>>
> >>> The issue_pending() is a NOP when engine is already running.
> >> 
> >> That's not totally right. issue_pending() still moves submitted but not
> >> issued transactions from the submitted queue to the issued queue. The
> >> DMA engine only considers the issued queue, so issue_pending()
> >> essentially tells the DMA engine to consider the submitted transaction
> >> for processing after the already issued transactions complete (in the
> >> non-cyclic case).
> > 
> > Vinod's point is for the cyclic case at the current state. It is NOP
> > essentially as we don't have way to not kill the whole channel.
> 
> Or IOW there is no descriptor movement to hardware..
> 
> > Just a sidenote: it is not even that clean cut for slave transfers
> > either as the slave_config must _not_ change between the issued
> > transfers. Iow, you can not switch between 16bit and 32bit word lengths
> > with some DMA. EDMA, sDMA can do that, but UDMA can not for example...
> > 
> >>> The design of APIs is that we submit a txn to pending_list and then the
> >>> pending_list is started when issue_pending() is called.
> >>> Or if the engine is already running, it will take next txn from
> >>> pending_list() when current txn completes.
> >>>
> >>> The only consideration here in this case is that the cyclic txn never
> >>> completes. Do we really treat a new txn submission as an 'indication' of
> >>> completeness? That is indeed a point to ponder upon.
> >> 
> >> The reason why I think we should is two-fold:
> >> 
> >> 1. I believe it's semantically aligned with the existing behaviour of
> >> issue_pending(). As explained above, the operation tells the DMA engine
> >> to consider submitted transactions for processing when the current (and
> >> other issued) transactions complete. If we extend the definition of
> >> complete to cover cyclic transactions, I think it's a good match.
> > 
> > We will end up with different behavior between cyclic and non cyclic
> > transfers and the new behavior should be somehow supported by existing
> > drivers.
> > Yes, issue_pending is moving the submitted tx to the issued queue to be
> > executed on HW when the current transfer finished.
> > We only needed this for non cyclic uses so far. Some DMA hw can replace
> > the current transfer with a new one (re-trigger to fetch the new
> > configuration, like your's), but some can not (none of the system DMAs
> > on TI platforms can).
> > If we say that this is the behavior the DMA drivers must follow then we
> > will have non compliant DMA drivers. You can not move simply to other
> > DMA or can not create generic DMA code shared by drivers.
> 
> That is very important point for API. We want no implicit behaviour, so
> if we want an behaviour let us do that explicitly.

As I've just explained in my reply to Peter, there's nothing implicit in
my proposal :-) It's however missing a flag to report if the DMA engine
driver supports this feature, put apart from that, it makes the API
*more* consistent by making issue_pending() cover *all* transfer types
with the *same* semantics.

> >> 2. There's really nothing else we could do with cyclic transactions.
> >> They never complete today and have to be terminated manually with
> >> terminate_all(). Using issue_pending() to move to a next cyclic
> >> transaction doesn't change the existing behaviour by replacing a useful
> >> (and used) feature, as issue_pending() is currently a no-op for cyclic
> >> transactions. The newly issued transaction is never considered, and
> >> calling terminate_all() will cancel the issued transactions. By
> >> extending the behaviour of issue_pending(), we're making a new use case
> >> possible, without restricting any other feature, and without "stealing"
> >> issue_pending() and preventing it from implementing another useful
> >> behaviour.
> > 
> > But at the same time we make existing drivers non compliant...
> > 
> > Imo a new callback to 'kill' / 'terminate' / 'replace' / 'abort' an
> > issued cookie would be cleaner.
> > 
> > cookie1 = dmaengine_issue_pending();
> > // will start the transfer
> > cookie2 = dmaengine_issue_pending();
> > // cookie1 still runs, cookie2 is waiting to be executed
> > dmaengine_abort_tx(chan);
> > // will kill cookie1 and executes cookie2
> 
> Right and we need a kill mode which kills the cookie1 at the end of
> transfer (conditional to hw supporting that)
> 
> I think it should be generic API and usable in both the cyclic and
> non-cyclic case

I have no issue with an API that can abort ongoing transfers without
killing the whole queue of pending transfers, but that's not what I'm
after, it's not my use case. Again, as explained in my reply to Peter,
I'm not looking for a way to abort a transfer immediately, but to move
to the next transfer at the end of the current one. It's very different,
and the DMA engine API already supports this for all transfers but
cyclic transfers. I'd go as far as saying that my proposal is fixing a
bug in the current implementation :-)

> > dmaengine_abort_tx() could take a cookie as parameter if we wish, so you
> > can say selectively which issued tx you want to remove, if it is the
> > running one, then stop it and move to the next one.
> > In place of the cookie parameter a 0 could imply that I don't know the
> > cookie, but kill the running one.
> > 
> > We would preserve what issue_pending does atm and would give us a
> > generic flow of how other drivers should handle such cases.
> > 
> > Note that this is not only useful for cyclic cases. Any driver which
> > currently uses brute-force termination can be upgraded.
> > Prime example is UART RX. We issue an RX buffer to receive data, but it
> > is not guarantied that the remote will send data which would fill the
> > buffer and we hit a timeout waiting. We could issue the next buffer and
> > kill the stale transfer to reclaim the received data.
> > 
> > I think this can be even implemented for DMAs which can not do the same
> > thing as your DMA can.
> > 
> >> In a nutshell, an important reason why I like using issue_pending() for
> >> this purpose is because it makes cyclic and non-cyclic transactions
> >> behave more similarly, which I think is good from an API consistency
> >> point of view.
> >> 
> >>> Also, we need to keep in mind that the dmaengine wont stop a cyclic
> >>> txn. It would be running and start next transfer (in this case do
> >>> from start) while it also gives you an interrupt. Here we would be
> >>> required to stop it and then start a new one...
> >> 
> >> We wouldn't be required to stop it in the middle, the expected behaviour
> >> is for the DMA engine to complete the cyclic transaction until the end
> >> of the cycle and then replace it by the new one. That's exactly what
> >> happens for non-cyclic transactions when you call issue_pending(), which
> >> makes me like this solution.
> > 
> > Right, so we have two different use cases. Replace the current transfers
> > with the next issued one and abort the current transfer now and arm the
> > next issued one.
> > dmaengine_abort_tx(chan, cookie, forced) ?
> > forced == false: replace it at cyclic boundary
> > forced == true: right away (as HW allows), do not wait for cyclic round
> > 
> >>> Or perhaps remove the cyclic setting from the txn when a new one
> >>> arrives and that behaviour IMO is controller dependent, not sure if
> >>> all controllers support it..
> >> 
> >> At the very least I would assume controllers to be able to stop a cyclic
> >> transaction forcefully, otherwise terminate_all() could never be
> >> implemented. This may not lead to a gracefully switch from one cyclic
> >> transaction to another one if the hardware doesn't allow doing so. In
> >> that case I think tx_submit() could return an error, or we could turn
> >> issue_pending() into an int operation to signal the error. Note that
> >> there's no need to mass-patch drivers here, if a DMA engine client
> >> issues a second cyclic transaction while one is in progress, the second
> >> transaction won't be considered today. Signalling an error is in my
> >> opinion a useful feature, but not doing so in DMA engine drivers can't
> >> be a regression. We could also add a flag to tell whether this mode of
> >> operation is supported.
> > 
> > My problems is that it is changing the behavior of issue_pending() for
> > cyclic. If we document this than all existing DMA drivers are broken
> > (not complaint with the API documentation) as they don't do this.
> > 
> > 
> >>>>> That would be a clean way to handle it. We were missing this API for a
> >>>>> long time to be able to cancel the ongoing transfer (whether it is
> >>>>> cyclic or slave_sg, or memcpy) and move to the next one if there is one
> >>>>> pending.
> >>>>
> >>>> Note that this new terminate API wouldn't terminate the ongoing transfer
> >>>> immediately, it would complete first, until the end of the cycle for
> >>>> cyclic transfers, and until the end of the whole transfer otherwise.
> >>>> This new operation would thus essentially be a no-op for non-cyclic
> >>>> transfers. I don't see how it would help :-) Do you have any particular
> >>>> use case in mind ?
> >>>
> >>> Yeah that is something more to think about. Do we really abort here or
> >>> wait for the txn to complete. I think Peter needs the former and your
> >>> falls in the latter category
> >> 
> >> I definitely need the latter, otherwise the display will flicker (or
> >> completely misoperate) every time a new frame is displayed, which isn't
> >> a good idea :-)
> > 
> > Sure, and it is a great feature.
> > 
> >> I'm not sure about Peter's use cases, but it seems to me
> >> that aborting a transaction immediately is racy in most cases, unless
> >> the DMA engine supports byte-level residue reporting.
> > 
> > Sort of yes. With EDMA, sDMA I can just kill the channel and set up a
> > new one right away.
> > UDMA on the other hand is not that forgiving... I would need to kill the
> > channel, wait for the termination to complete, reconfigure the channel
> > and execute the new transfer.
> > 
> > But with a separate callback API at least there will be an entry point
> > when this can be initiated and handled.
> > Fwiw, I think it should be simple to add this functionality to them, the
> > code is kind of handling it in other parts, but implementing it in the
> > issue_pending() is not really a clean solution.
> > 
> > In a channel you can run slave_sg transfers followed by cyclic if you
> > wish. A slave channel is what it is, slave channel which can be capable
> > to execute slave_sg and/or cyclic (and/or interleaved).
> > If issue_pending() is to take care then we need to check if the current
> > transfer is cyclic or not and decide based on that.
> > 
> > With a separate callback we in the DMA driver just need to do what the
> > client is asking for and no need to think.
> > 
> >> One non-intrusive
> >> option would be to add a flag to signal that a newly issued transaction
> >> should interrupt the current transaction immediately.
Vinod Koul March 2, 2020, 3:42 a.m. UTC | #23
On 26-02-20, 18:24, Laurent Pinchart wrote:
> Hi Peter,
> 
> On Mon, Feb 17, 2020 at 12:00:02PM +0200, Peter Ujfalusi wrote:
> > On 14/02/2020 18.22, Laurent Pinchart wrote:
> > >>> It does, but I really wonder why we need a new terminate operation that
> > >>> would terminate a single transfer. If we call issue_pending at step B.3,
> > >>> when the new txn submitted, we can terminate the current transfer at the
> > >>> point. It changes the semantics of issue_pending, but only for cyclic
> > >>> transfers (this whole discussions it only about cyclic transfers). As a
> > >>> cyclic transfer will be repeated forever until terminated, there's no
> > >>> use case for issuing a new transfer without terminating the one in
> > >>> progress. I thus don't think we need a new terminate operation: the only
> > >>> thing that makes sense to do when submitting a new cyclic transfer is to
> > >>> terminate the current one and switch to the new one, and we already have
> > >>> all the APIs we need to enable this behaviour.
> > >>
> > >> The issue_pending() is a NOP when engine is already running.
> > > 
> > > That's not totally right. issue_pending() still moves submitted but not
> > > issued transactions from the submitted queue to the issued queue. The
> > > DMA engine only considers the issued queue, so issue_pending()
> > > essentially tells the DMA engine to consider the submitted transaction
> > > for processing after the already issued transactions complete (in the
> > > non-cyclic case).
> > 
> > Vinod's point is for the cyclic case at the current state. It is NOP
> > essentially as we don't have way to not kill the whole channel.
> 
> Considering the current implementation of issue_pending(), for cyclic
> transfers, that's correct.
> 
> My point was that, semantically, and as is implemented today for
> non-cyclic transfers, issue_pending() is meant to tell the DMA engine to
> consider the submitted transactions for processing after the already
> issued transactions complete. For cyclic transactions, .issue_pending
> has no defined semantics, and is implemented as a NOP. My proposal is to
> extend the existing semantics of issue_pending() as defined for the
> non-cyclic transactions to also cover the cyclic transactions. This
> won't cause any breakage (the issue_pending() operation being unused for
> cyclic transactions, it won't cause any change to existing code), and
> will make the API more consistent as the same semantics (moving to the
> next submitted transaction when the current one completes) will be
> implemented using the same operation.

Only problem is cyclic by defination never completes, so we need to add
additional semantics for completion which is something me and Peter do
not seem to like :)

> 
> > Just a sidenote: it is not even that clean cut for slave transfers
> > either as the slave_config must _not_ change between the issued
> > transfers. Iow, you can not switch between 16bit and 32bit word lengths
> > with some DMA. EDMA, sDMA can do that, but UDMA can not for example...
> 
> I agree this can be an issue, but I'm not sure how it's related :-) I
> believe we need to consider this feature, and specify the API better,
> but that's fairly unrelated, isn't it ?
> 
> > >> The design of APIs is that we submit a txn to pending_list and then the
> > >> pending_list is started when issue_pending() is called.
> > >> Or if the engine is already running, it will take next txn from
> > >> pending_list() when current txn completes.
> > >>
> > >> The only consideration here in this case is that the cyclic txn never
> > >> completes. Do we really treat a new txn submission as an 'indication' of
> > >> completeness? That is indeed a point to ponder upon.
> > > 
> > > The reason why I think we should is two-fold:
> > > 
> > > 1. I believe it's semantically aligned with the existing behaviour of
> > > issue_pending(). As explained above, the operation tells the DMA engine
> > > to consider submitted transactions for processing when the current (and
> > > other issued) transactions complete. If we extend the definition of
> > > complete to cover cyclic transactions, I think it's a good match.
> > 
> > We will end up with different behavior between cyclic and non cyclic
> > transfers and the new behavior should be somehow supported by existing
> > drivers.
> > Yes, issue_pending is moving the submitted tx to the issued queue to be
> > executed on HW when the current transfer finished.
> > We only needed this for non cyclic uses so far. Some DMA hw can replace
> > the current transfer with a new one (re-trigger to fetch the new
> > configuration, like your's), but some can not (none of the system DMAs
> > on TI platforms can).
> > If we say that this is the behavior the DMA drivers must follow then we
> > will have non compliant DMA drivers. You can not move simply to other
> > DMA or can not create generic DMA code shared by drivers.
> 
> I think that's a matter of reporting the capabilities of the DMA engine,
> and I believe a flag is enough for this. My proposal really gives a
> purpose to an API that is unused today (.issue_pending() for cyclic
> transfers), and that purpose is semantically coherent with the purpose
> of the same function for non-cyclic transfers. I thus believe it brings
> the cyclic and non-cyclic cases closer, making their behaviour more
> similar, not different.
> 
> > > 2. There's really nothing else we could do with cyclic transactions.
> > > They never complete today and have to be terminated manually with
> > > terminate_all(). Using issue_pending() to move to a next cyclic
> > > transaction doesn't change the existing behaviour by replacing a useful
> > > (and used) feature, as issue_pending() is currently a no-op for cyclic
> > > transactions. The newly issued transaction is never considered, and
> > > calling terminate_all() will cancel the issued transactions. By
> > > extending the behaviour of issue_pending(), we're making a new use case
> > > possible, without restricting any other feature, and without "stealing"
> > > issue_pending() and preventing it from implementing another useful
> > > behaviour.
> > 
> > But at the same time we make existing drivers non compliant...
> 
> With a flag to report this new feature, that's not a problem.
> 
> > Imo a new callback to 'kill' / 'terminate' / 'replace' / 'abort' an
> > issued cookie would be cleaner.
> > 
> > cookie1 = dmaengine_issue_pending();
> > // will start the transfer
> > cookie2 = dmaengine_issue_pending();
> > // cookie1 still runs, cookie2 is waiting to be executed
> > dmaengine_abort_tx(chan);
> > // will kill cookie1 and executes cookie2
> > 
> > dmaengine_abort_tx() could take a cookie as parameter if we wish, so you
> > can say selectively which issued tx you want to remove, if it is the
> > running one, then stop it and move to the next one.
> > In place of the cookie parameter a 0 could imply that I don't know the
> > cookie, but kill the running one.
> > 
> > We would preserve what issue_pending does atm and would give us a
> > generic flow of how other drivers should handle such cases.
> > 
> > Note that this is not only useful for cyclic cases. Any driver which
> > currently uses brute-force termination can be upgraded.
> > Prime example is UART RX. We issue an RX buffer to receive data, but it
> > is not guarantied that the remote will send data which would fill the
> > buffer and we hit a timeout waiting. We could issue the next buffer and
> > kill the stale transfer to reclaim the received data.
> > 
> > I think this can be even implemented for DMAs which can not do the same
> > thing as your DMA can.
> 
> But that's a different use case. What I'm after is *not*
> killing/aborting a currently running transfer, it's moving to the next
> submitted transfer at the next available sync point. I don't want to
> abort the transfer in progress immediately, that would kill the display.
> 
> I understand that the above can be useful, but I really don't see why
> I'd need to implement support for a more complex use case that I have no
> need for, and could hardly even test properly, when what I'm after is
> fixing what I view as a bug in the existing implementation: we have an
> operation, issue_pending(), with a defined purpose, and it happens that
> for one of the transfer types that operation doesn't work. I really see
> no reason to implement a brand new API in this case.
> 
> Note that using issue_pending() as I propose doesn't preclude anyone
> (you, or someone else) to implement your above proposal, but please
> don't make me do your work :-) This is becoming a case of yak shaving
> where I'm asked to fix shortcomings of the DMA engine API when they're
> unrelated to my use case.
> 
> > > In a nutshell, an important reason why I like using issue_pending() for
> > > this purpose is because it makes cyclic and non-cyclic transactions
> > > behave more similarly, which I think is good from an API consistency
> > > point of view.
> > > 
> > >> Also, we need to keep in mind that the dmaengine wont stop a cyclic
> > >> txn. It would be running and start next transfer (in this case do
> > >> from start) while it also gives you an interrupt. Here we would be
> > >> required to stop it and then start a new one...
> > > 
> > > We wouldn't be required to stop it in the middle, the expected behaviour
> > > is for the DMA engine to complete the cyclic transaction until the end
> > > of the cycle and then replace it by the new one. That's exactly what
> > > happens for non-cyclic transactions when you call issue_pending(), which
> > > makes me like this solution.
> > 
> > Right, so we have two different use cases. Replace the current transfers
> > with the next issued one and abort the current transfer now and arm the
> > next issued one.
> > dmaengine_abort_tx(chan, cookie, forced) ?
> > forced == false: replace it at cyclic boundary
> > forced == true: right away (as HW allows), do not wait for cyclic round
> 
> See the above. You're making this more complicated than it should be,
> designing an API that contains a small part that could help solving my
> problem, and asking me to implement the 90% for free. Not fair :-)

I agree it may help in other cases, but my view here is that if we want
to terminate the cyclic, let us be explicit about it. I would rather
call an API to do so and explicitly convey that current cyclic txn is
ending rather than implictly submit a new one.

> > >> Or perhaps remove the cyclic setting from the txn when a new one
> > >> arrives and that behaviour IMO is controller dependent, not sure if
> > >> all controllers support it..
> > > 
> > > At the very least I would assume controllers to be able to stop a cyclic
> > > transaction forcefully, otherwise terminate_all() could never be
> > > implemented. This may not lead to a gracefully switch from one cyclic
> > > transaction to another one if the hardware doesn't allow doing so. In
> > > that case I think tx_submit() could return an error, or we could turn
> > > issue_pending() into an int operation to signal the error. Note that
> > > there's no need to mass-patch drivers here, if a DMA engine client
> > > issues a second cyclic transaction while one is in progress, the second
> > > transaction won't be considered today. Signalling an error is in my
> > > opinion a useful feature, but not doing so in DMA engine drivers can't
> > > be a regression. We could also add a flag to tell whether this mode of
> > > operation is supported.
> > 
> > My problems is that it is changing the behavior of issue_pending() for
> > cyclic. If we document this than all existing DMA drivers are broken
> > (not complaint with the API documentation) as they don't do this.
> 
> Again, see above. I argue that it's not a behavioural change as such, as
> the current behaviour is unused, because it's implemented as a NOP and
> useless. With a simple flag to report if a DMA engine supports replacing
> cyclic transfers, we would have a more consistent API as issue_pending()
> will operate the same way for *all* types of transfers.
> 
> > >>>> That would be a clean way to handle it. We were missing this API for a
> > >>>> long time to be able to cancel the ongoing transfer (whether it is
> > >>>> cyclic or slave_sg, or memcpy) and move to the next one if there is one
> > >>>> pending.
> > >>>
> > >>> Note that this new terminate API wouldn't terminate the ongoing transfer
> > >>> immediately, it would complete first, until the end of the cycle for
> > >>> cyclic transfers, and until the end of the whole transfer otherwise.
> > >>> This new operation would thus essentially be a no-op for non-cyclic
> > >>> transfers. I don't see how it would help :-) Do you have any particular
> > >>> use case in mind ?
> > >>
> > >> Yeah that is something more to think about. Do we really abort here or
> > >> wait for the txn to complete. I think Peter needs the former and your
> > >> falls in the latter category
> > > 
> > > I definitely need the latter, otherwise the display will flicker (or
> > > completely misoperate) every time a new frame is displayed, which isn't
> > > a good idea :-)
> > 
> > Sure, and it is a great feature.
> > 
> > > I'm not sure about Peter's use cases, but it seems to me
> > > that aborting a transaction immediately is racy in most cases, unless
> > > the DMA engine supports byte-level residue reporting.
> > 
> > Sort of yes. With EDMA, sDMA I can just kill the channel and set up a
> > new one right away.
> > UDMA on the other hand is not that forgiving... I would need to kill the
> > channel, wait for the termination to complete, reconfigure the channel
> > and execute the new transfer.
> > 
> > But with a separate callback API at least there will be an entry point
> > when this can be initiated and handled.
> > Fwiw, I think it should be simple to add this functionality to them, the
> > code is kind of handling it in other parts, but implementing it in the
> > issue_pending() is not really a clean solution.
> > 
> > In a channel you can run slave_sg transfers followed by cyclic if you
> > wish. A slave channel is what it is, slave channel which can be capable
> > to execute slave_sg and/or cyclic (and/or interleaved).
> > If issue_pending() is to take care then we need to check if the current
> > transfer is cyclic or not and decide based on that.
> > 
> > With a separate callback we in the DMA driver just need to do what the
> > client is asking for and no need to think.
> 
> Let's put it that way: are you volunteering to implement your proposal
> (with proper API documentation) in a reasonable time frame, so that I
> can try it for my use case ? Otherwise I see no reason to push against
> my proposal.
> 
> > > One non-intrusive
> > > option would be to add a flag to signal that a newly issued transaction
> > > should interrupt the current transaction immediately.
> 
> -- 
> Regards,
> 
> Laurent Pinchart
Vinod Koul March 2, 2020, 3:47 a.m. UTC | #24
Hi Laurent,

On 26-02-20, 18:30, Laurent Pinchart wrote:
> On Wed, Feb 19, 2020 at 02:55:14PM +0530, Vinod Koul wrote:
> > On 17-02-20, 12:00, Peter Ujfalusi wrote:
> > > On 14/02/2020 18.22, Laurent Pinchart wrote:
> > >>>> It does, but I really wonder why we need a new terminate operation that
> > >>>> would terminate a single transfer. If we call issue_pending at step B.3,
> > >>>> when the new txn submitted, we can terminate the current transfer at the
> > >>>> point. It changes the semantics of issue_pending, but only for cyclic
> > >>>> transfers (this whole discussions it only about cyclic transfers). As a
> > >>>> cyclic transfer will be repeated forever until terminated, there's no
> > >>>> use case for issuing a new transfer without terminating the one in
> > >>>> progress. I thus don't think we need a new terminate operation: the only
> > >>>> thing that makes sense to do when submitting a new cyclic transfer is to
> > >>>> terminate the current one and switch to the new one, and we already have
> > >>>> all the APIs we need to enable this behaviour.
> > >>>
> > >>> The issue_pending() is a NOP when engine is already running.
> > >> 
> > >> That's not totally right. issue_pending() still moves submitted but not
> > >> issued transactions from the submitted queue to the issued queue. The
> > >> DMA engine only considers the issued queue, so issue_pending()
> > >> essentially tells the DMA engine to consider the submitted transaction
> > >> for processing after the already issued transactions complete (in the
> > >> non-cyclic case).
> > > 
> > > Vinod's point is for the cyclic case at the current state. It is NOP
> > > essentially as we don't have way to not kill the whole channel.
> > 
> > Or IOW there is no descriptor movement to hardware..
> > 
> > > Just a sidenote: it is not even that clean cut for slave transfers
> > > either as the slave_config must _not_ change between the issued
> > > transfers. Iow, you can not switch between 16bit and 32bit word lengths
> > > with some DMA. EDMA, sDMA can do that, but UDMA can not for example...
> > > 
> > >>> The design of APIs is that we submit a txn to pending_list and then the
> > >>> pending_list is started when issue_pending() is called.
> > >>> Or if the engine is already running, it will take next txn from
> > >>> pending_list() when current txn completes.
> > >>>
> > >>> The only consideration here in this case is that the cyclic txn never
> > >>> completes. Do we really treat a new txn submission as an 'indication' of
> > >>> completeness? That is indeed a point to ponder upon.
> > >> 
> > >> The reason why I think we should is two-fold:
> > >> 
> > >> 1. I believe it's semantically aligned with the existing behaviour of
> > >> issue_pending(). As explained above, the operation tells the DMA engine
> > >> to consider submitted transactions for processing when the current (and
> > >> other issued) transactions complete. If we extend the definition of
> > >> complete to cover cyclic transactions, I think it's a good match.
> > > 
> > > We will end up with different behavior between cyclic and non cyclic
> > > transfers and the new behavior should be somehow supported by existing
> > > drivers.
> > > Yes, issue_pending is moving the submitted tx to the issued queue to be
> > > executed on HW when the current transfer finished.
> > > We only needed this for non cyclic uses so far. Some DMA hw can replace
> > > the current transfer with a new one (re-trigger to fetch the new
> > > configuration, like your's), but some can not (none of the system DMAs
> > > on TI platforms can).
> > > If we say that this is the behavior the DMA drivers must follow then we
> > > will have non compliant DMA drivers. You can not move simply to other
> > > DMA or can not create generic DMA code shared by drivers.
> > 
> > That is very important point for API. We want no implicit behaviour, so
> > if we want an behaviour let us do that explicitly.
> 
> As I've just explained in my reply to Peter, there's nothing implicit in
> my proposal :-) It's however missing a flag to report if the DMA engine
> driver supports this feature, put apart from that, it makes the API
> *more* consistent by making issue_pending() cover *all* transfer types
> with the *same* semantics.

I would be more comfortable in calling an API to do so :)
The flow I am thinking is:

- prep cyclic1 txn
- submit cyclic1 txn
- call issue_pending() (cyclic one starts)

- prep cyclic2 txn
- submit cyclic2 txn
- signal_cyclic1_txn aka terminate_cookie()
- cyclic1 completes, switch to cyclic2 (dmaengine driver)
- get callback for cyclic1 (optional)

To check if hw supports terminate_cookie() or not we can check if the
callback support is implemented

> 
> > >> 2. There's really nothing else we could do with cyclic transactions.
> > >> They never complete today and have to be terminated manually with
> > >> terminate_all(). Using issue_pending() to move to a next cyclic
> > >> transaction doesn't change the existing behaviour by replacing a useful
> > >> (and used) feature, as issue_pending() is currently a no-op for cyclic
> > >> transactions. The newly issued transaction is never considered, and
> > >> calling terminate_all() will cancel the issued transactions. By
> > >> extending the behaviour of issue_pending(), we're making a new use case
> > >> possible, without restricting any other feature, and without "stealing"
> > >> issue_pending() and preventing it from implementing another useful
> > >> behaviour.
> > > 
> > > But at the same time we make existing drivers non compliant...
> > > 
> > > Imo a new callback to 'kill' / 'terminate' / 'replace' / 'abort' an
> > > issued cookie would be cleaner.
> > > 
> > > cookie1 = dmaengine_issue_pending();
> > > // will start the transfer
> > > cookie2 = dmaengine_issue_pending();
> > > // cookie1 still runs, cookie2 is waiting to be executed
> > > dmaengine_abort_tx(chan);
> > > // will kill cookie1 and executes cookie2
> > 
> > Right and we need a kill mode which kills the cookie1 at the end of
> > transfer (conditional to hw supporting that)
> > 
> > I think it should be generic API and usable in both the cyclic and
> > non-cyclic case
> 
> I have no issue with an API that can abort ongoing transfers without
> killing the whole queue of pending transfers, but that's not what I'm
> after, it's not my use case. Again, as explained in my reply to Peter,
> I'm not looking for a way to abort a transfer immediately, but to move
> to the next transfer at the end of the current one. It's very different,
> and the DMA engine API already supports this for all transfers but
> cyclic transfers. I'd go as far as saying that my proposal is fixing a
> bug in the current implementation :-)
> 
> > > dmaengine_abort_tx() could take a cookie as parameter if we wish, so you
> > > can say selectively which issued tx you want to remove, if it is the
> > > running one, then stop it and move to the next one.
> > > In place of the cookie parameter a 0 could imply that I don't know the
> > > cookie, but kill the running one.
> > > 
> > > We would preserve what issue_pending does atm and would give us a
> > > generic flow of how other drivers should handle such cases.
> > > 
> > > Note that this is not only useful for cyclic cases. Any driver which
> > > currently uses brute-force termination can be upgraded.
> > > Prime example is UART RX. We issue an RX buffer to receive data, but it
> > > is not guarantied that the remote will send data which would fill the
> > > buffer and we hit a timeout waiting. We could issue the next buffer and
> > > kill the stale transfer to reclaim the received data.
> > > 
> > > I think this can be even implemented for DMAs which can not do the same
> > > thing as your DMA can.
> > > 
> > >> In a nutshell, an important reason why I like using issue_pending() for
> > >> this purpose is because it makes cyclic and non-cyclic transactions
> > >> behave more similarly, which I think is good from an API consistency
> > >> point of view.
> > >> 
> > >>> Also, we need to keep in mind that the dmaengine wont stop a cyclic
> > >>> txn. It would be running and start next transfer (in this case do
> > >>> from start) while it also gives you an interrupt. Here we would be
> > >>> required to stop it and then start a new one...
> > >> 
> > >> We wouldn't be required to stop it in the middle, the expected behaviour
> > >> is for the DMA engine to complete the cyclic transaction until the end
> > >> of the cycle and then replace it by the new one. That's exactly what
> > >> happens for non-cyclic transactions when you call issue_pending(), which
> > >> makes me like this solution.
> > > 
> > > Right, so we have two different use cases. Replace the current transfers
> > > with the next issued one and abort the current transfer now and arm the
> > > next issued one.
> > > dmaengine_abort_tx(chan, cookie, forced) ?
> > > forced == false: replace it at cyclic boundary
> > > forced == true: right away (as HW allows), do not wait for cyclic round
> > > 
> > >>> Or perhaps remove the cyclic setting from the txn when a new one
> > >>> arrives and that behaviour IMO is controller dependent, not sure if
> > >>> all controllers support it..
> > >> 
> > >> At the very least I would assume controllers to be able to stop a cyclic
> > >> transaction forcefully, otherwise terminate_all() could never be
> > >> implemented. This may not lead to a gracefully switch from one cyclic
> > >> transaction to another one if the hardware doesn't allow doing so. In
> > >> that case I think tx_submit() could return an error, or we could turn
> > >> issue_pending() into an int operation to signal the error. Note that
> > >> there's no need to mass-patch drivers here, if a DMA engine client
> > >> issues a second cyclic transaction while one is in progress, the second
> > >> transaction won't be considered today. Signalling an error is in my
> > >> opinion a useful feature, but not doing so in DMA engine drivers can't
> > >> be a regression. We could also add a flag to tell whether this mode of
> > >> operation is supported.
> > > 
> > > My problems is that it is changing the behavior of issue_pending() for
> > > cyclic. If we document this than all existing DMA drivers are broken
> > > (not complaint with the API documentation) as they don't do this.
> > > 
> > > 
> > >>>>> That would be a clean way to handle it. We were missing this API for a
> > >>>>> long time to be able to cancel the ongoing transfer (whether it is
> > >>>>> cyclic or slave_sg, or memcpy) and move to the next one if there is one
> > >>>>> pending.
> > >>>>
> > >>>> Note that this new terminate API wouldn't terminate the ongoing transfer
> > >>>> immediately, it would complete first, until the end of the cycle for
> > >>>> cyclic transfers, and until the end of the whole transfer otherwise.
> > >>>> This new operation would thus essentially be a no-op for non-cyclic
> > >>>> transfers. I don't see how it would help :-) Do you have any particular
> > >>>> use case in mind ?
> > >>>
> > >>> Yeah that is something more to think about. Do we really abort here or
> > >>> wait for the txn to complete. I think Peter needs the former and your
> > >>> falls in the latter category
> > >> 
> > >> I definitely need the latter, otherwise the display will flicker (or
> > >> completely misoperate) every time a new frame is displayed, which isn't
> > >> a good idea :-)
> > > 
> > > Sure, and it is a great feature.
> > > 
> > >> I'm not sure about Peter's use cases, but it seems to me
> > >> that aborting a transaction immediately is racy in most cases, unless
> > >> the DMA engine supports byte-level residue reporting.
> > > 
> > > Sort of yes. With EDMA, sDMA I can just kill the channel and set up a
> > > new one right away.
> > > UDMA on the other hand is not that forgiving... I would need to kill the
> > > channel, wait for the termination to complete, reconfigure the channel
> > > and execute the new transfer.
> > > 
> > > But with a separate callback API at least there will be an entry point
> > > when this can be initiated and handled.
> > > Fwiw, I think it should be simple to add this functionality to them, the
> > > code is kind of handling it in other parts, but implementing it in the
> > > issue_pending() is not really a clean solution.
> > > 
> > > In a channel you can run slave_sg transfers followed by cyclic if you
> > > wish. A slave channel is what it is, slave channel which can be capable
> > > to execute slave_sg and/or cyclic (and/or interleaved).
> > > If issue_pending() is to take care then we need to check if the current
> > > transfer is cyclic or not and decide based on that.
> > > 
> > > With a separate callback we in the DMA driver just need to do what the
> > > client is asking for and no need to think.
> > > 
> > >> One non-intrusive
> > >> option would be to add a flag to signal that a newly issued transaction
> > >> should interrupt the current transaction immediately.
> 
> -- 
> Regards,
> 
> Laurent Pinchart
Laurent Pinchart March 2, 2020, 7:37 a.m. UTC | #25
Hi Vinod,

On Mon, Mar 02, 2020 at 09:17:35AM +0530, Vinod Koul wrote:
> On 26-02-20, 18:30, Laurent Pinchart wrote:
> > On Wed, Feb 19, 2020 at 02:55:14PM +0530, Vinod Koul wrote:
> >> On 17-02-20, 12:00, Peter Ujfalusi wrote:
> >>> On 14/02/2020 18.22, Laurent Pinchart wrote:
> >>>>>> It does, but I really wonder why we need a new terminate operation that
> >>>>>> would terminate a single transfer. If we call issue_pending at step B.3,
> >>>>>> when the new txn submitted, we can terminate the current transfer at the
> >>>>>> point. It changes the semantics of issue_pending, but only for cyclic
> >>>>>> transfers (this whole discussions it only about cyclic transfers). As a
> >>>>>> cyclic transfer will be repeated forever until terminated, there's no
> >>>>>> use case for issuing a new transfer without terminating the one in
> >>>>>> progress. I thus don't think we need a new terminate operation: the only
> >>>>>> thing that makes sense to do when submitting a new cyclic transfer is to
> >>>>>> terminate the current one and switch to the new one, and we already have
> >>>>>> all the APIs we need to enable this behaviour.
> >>>>>
> >>>>> The issue_pending() is a NOP when engine is already running.
> >>>> 
> >>>> That's not totally right. issue_pending() still moves submitted but not
> >>>> issued transactions from the submitted queue to the issued queue. The
> >>>> DMA engine only considers the issued queue, so issue_pending()
> >>>> essentially tells the DMA engine to consider the submitted transaction
> >>>> for processing after the already issued transactions complete (in the
> >>>> non-cyclic case).
> >>> 
> >>> Vinod's point is for the cyclic case at the current state. It is NOP
> >>> essentially as we don't have way to not kill the whole channel.
> >> 
> >> Or IOW there is no descriptor movement to hardware..
> >> 
> >>> Just a sidenote: it is not even that clean cut for slave transfers
> >>> either as the slave_config must _not_ change between the issued
> >>> transfers. Iow, you can not switch between 16bit and 32bit word lengths
> >>> with some DMA. EDMA, sDMA can do that, but UDMA can not for example...
> >>> 
> >>>>> The design of APIs is that we submit a txn to pending_list and then the
> >>>>> pending_list is started when issue_pending() is called.
> >>>>> Or if the engine is already running, it will take next txn from
> >>>>> pending_list() when current txn completes.
> >>>>>
> >>>>> The only consideration here in this case is that the cyclic txn never
> >>>>> completes. Do we really treat a new txn submission as an 'indication' of
> >>>>> completeness? That is indeed a point to ponder upon.
> >>>> 
> >>>> The reason why I think we should is two-fold:
> >>>> 
> >>>> 1. I believe it's semantically aligned with the existing behaviour of
> >>>> issue_pending(). As explained above, the operation tells the DMA engine
> >>>> to consider submitted transactions for processing when the current (and
> >>>> other issued) transactions complete. If we extend the definition of
> >>>> complete to cover cyclic transactions, I think it's a good match.
> >>> 
> >>> We will end up with different behavior between cyclic and non cyclic
> >>> transfers and the new behavior should be somehow supported by existing
> >>> drivers.
> >>> Yes, issue_pending is moving the submitted tx to the issued queue to be
> >>> executed on HW when the current transfer finished.
> >>> We only needed this for non cyclic uses so far. Some DMA hw can replace
> >>> the current transfer with a new one (re-trigger to fetch the new
> >>> configuration, like your's), but some can not (none of the system DMAs
> >>> on TI platforms can).
> >>> If we say that this is the behavior the DMA drivers must follow then we
> >>> will have non compliant DMA drivers. You can not move simply to other
> >>> DMA or can not create generic DMA code shared by drivers.
> >> 
> >> That is very important point for API. We want no implicit behaviour, so
> >> if we want an behaviour let us do that explicitly.
> > 
> > As I've just explained in my reply to Peter, there's nothing implicit in
> > my proposal :-) It's however missing a flag to report if the DMA engine
> > driver supports this feature, put apart from that, it makes the API
> > *more* consistent by making issue_pending() cover *all* transfer types
> > with the *same* semantics.
> 
> I would be more comfortable in calling an API to do so :)
> The flow I am thinking is:
> 
> - prep cyclic1 txn
> - submit cyclic1 txn
> - call issue_pending() (cyclic one starts)
> 
> - prep cyclic2 txn
> - submit cyclic2 txn
> - signal_cyclic1_txn aka terminate_cookie()
> - cyclic1 completes, switch to cyclic2 (dmaengine driver)
> - get callback for cyclic1 (optional)
> 
> To check if hw supports terminate_cookie() or not we can check if the
> callback support is implemented

Two questions though:

- Where is .issue_pending() called for cyclic2 in your above sequence ?
  Surely it should be called somewhere, as the DMA engine API requires
  .issue_pending() to be called for a transfer to be executed, otherwise
  it stays in the submitted but not pending queue.

- With the introduction of a new .terminate_cookie() operation, we need
  to specify that operation for all transfer types. What's its
  envisioned semantics for non-cyclic transfers ? And how do DMA engine
  drivers report that they support .terminate_cookie() for cyclic
  transfers but not for other transfer types (the counterpart of
  reporting, in my proposition, that .issue_pending() isn't supported
  replace the current cyclic transfer) ?

> >>>> 2. There's really nothing else we could do with cyclic transactions.
> >>>> They never complete today and have to be terminated manually with
> >>>> terminate_all(). Using issue_pending() to move to a next cyclic
> >>>> transaction doesn't change the existing behaviour by replacing a useful
> >>>> (and used) feature, as issue_pending() is currently a no-op for cyclic
> >>>> transactions. The newly issued transaction is never considered, and
> >>>> calling terminate_all() will cancel the issued transactions. By
> >>>> extending the behaviour of issue_pending(), we're making a new use case
> >>>> possible, without restricting any other feature, and without "stealing"
> >>>> issue_pending() and preventing it from implementing another useful
> >>>> behaviour.
> >>> 
> >>> But at the same time we make existing drivers non compliant...
> >>> 
> >>> Imo a new callback to 'kill' / 'terminate' / 'replace' / 'abort' an
> >>> issued cookie would be cleaner.
> >>> 
> >>> cookie1 = dmaengine_issue_pending();
> >>> // will start the transfer
> >>> cookie2 = dmaengine_issue_pending();
> >>> // cookie1 still runs, cookie2 is waiting to be executed
> >>> dmaengine_abort_tx(chan);
> >>> // will kill cookie1 and executes cookie2
> >> 
> >> Right and we need a kill mode which kills the cookie1 at the end of
> >> transfer (conditional to hw supporting that)
> >> 
> >> I think it should be generic API and usable in both the cyclic and
> >> non-cyclic case
> > 
> > I have no issue with an API that can abort ongoing transfers without
> > killing the whole queue of pending transfers, but that's not what I'm
> > after, it's not my use case. Again, as explained in my reply to Peter,
> > I'm not looking for a way to abort a transfer immediately, but to move
> > to the next transfer at the end of the current one. It's very different,
> > and the DMA engine API already supports this for all transfers but
> > cyclic transfers. I'd go as far as saying that my proposal is fixing a
> > bug in the current implementation :-)
> > 
> >>> dmaengine_abort_tx() could take a cookie as parameter if we wish, so you
> >>> can say selectively which issued tx you want to remove, if it is the
> >>> running one, then stop it and move to the next one.
> >>> In place of the cookie parameter a 0 could imply that I don't know the
> >>> cookie, but kill the running one.
> >>> 
> >>> We would preserve what issue_pending does atm and would give us a
> >>> generic flow of how other drivers should handle such cases.
> >>> 
> >>> Note that this is not only useful for cyclic cases. Any driver which
> >>> currently uses brute-force termination can be upgraded.
> >>> Prime example is UART RX. We issue an RX buffer to receive data, but it
> >>> is not guarantied that the remote will send data which would fill the
> >>> buffer and we hit a timeout waiting. We could issue the next buffer and
> >>> kill the stale transfer to reclaim the received data.
> >>> 
> >>> I think this can be even implemented for DMAs which can not do the same
> >>> thing as your DMA can.
> >>> 
> >>>> In a nutshell, an important reason why I like using issue_pending() for
> >>>> this purpose is because it makes cyclic and non-cyclic transactions
> >>>> behave more similarly, which I think is good from an API consistency
> >>>> point of view.
> >>>> 
> >>>>> Also, we need to keep in mind that the dmaengine wont stop a cyclic
> >>>>> txn. It would be running and start next transfer (in this case do
> >>>>> from start) while it also gives you an interrupt. Here we would be
> >>>>> required to stop it and then start a new one...
> >>>> 
> >>>> We wouldn't be required to stop it in the middle, the expected behaviour
> >>>> is for the DMA engine to complete the cyclic transaction until the end
> >>>> of the cycle and then replace it by the new one. That's exactly what
> >>>> happens for non-cyclic transactions when you call issue_pending(), which
> >>>> makes me like this solution.
> >>> 
> >>> Right, so we have two different use cases. Replace the current transfers
> >>> with the next issued one and abort the current transfer now and arm the
> >>> next issued one.
> >>> dmaengine_abort_tx(chan, cookie, forced) ?
> >>> forced == false: replace it at cyclic boundary
> >>> forced == true: right away (as HW allows), do not wait for cyclic round
> >>> 
> >>>>> Or perhaps remove the cyclic setting from the txn when a new one
> >>>>> arrives and that behaviour IMO is controller dependent, not sure if
> >>>>> all controllers support it..
> >>>> 
> >>>> At the very least I would assume controllers to be able to stop a cyclic
> >>>> transaction forcefully, otherwise terminate_all() could never be
> >>>> implemented. This may not lead to a gracefully switch from one cyclic
> >>>> transaction to another one if the hardware doesn't allow doing so. In
> >>>> that case I think tx_submit() could return an error, or we could turn
> >>>> issue_pending() into an int operation to signal the error. Note that
> >>>> there's no need to mass-patch drivers here, if a DMA engine client
> >>>> issues a second cyclic transaction while one is in progress, the second
> >>>> transaction won't be considered today. Signalling an error is in my
> >>>> opinion a useful feature, but not doing so in DMA engine drivers can't
> >>>> be a regression. We could also add a flag to tell whether this mode of
> >>>> operation is supported.
> >>> 
> >>> My problems is that it is changing the behavior of issue_pending() for
> >>> cyclic. If we document this than all existing DMA drivers are broken
> >>> (not complaint with the API documentation) as they don't do this.
> >>> 
> >>> 
> >>>>>>> That would be a clean way to handle it. We were missing this API for a
> >>>>>>> long time to be able to cancel the ongoing transfer (whether it is
> >>>>>>> cyclic or slave_sg, or memcpy) and move to the next one if there is one
> >>>>>>> pending.
> >>>>>>
> >>>>>> Note that this new terminate API wouldn't terminate the ongoing transfer
> >>>>>> immediately, it would complete first, until the end of the cycle for
> >>>>>> cyclic transfers, and until the end of the whole transfer otherwise.
> >>>>>> This new operation would thus essentially be a no-op for non-cyclic
> >>>>>> transfers. I don't see how it would help :-) Do you have any particular
> >>>>>> use case in mind ?
> >>>>>
> >>>>> Yeah that is something more to think about. Do we really abort here or
> >>>>> wait for the txn to complete. I think Peter needs the former and your
> >>>>> falls in the latter category
> >>>> 
> >>>> I definitely need the latter, otherwise the display will flicker (or
> >>>> completely misoperate) every time a new frame is displayed, which isn't
> >>>> a good idea :-)
> >>> 
> >>> Sure, and it is a great feature.
> >>> 
> >>>> I'm not sure about Peter's use cases, but it seems to me
> >>>> that aborting a transaction immediately is racy in most cases, unless
> >>>> the DMA engine supports byte-level residue reporting.
> >>> 
> >>> Sort of yes. With EDMA, sDMA I can just kill the channel and set up a
> >>> new one right away.
> >>> UDMA on the other hand is not that forgiving... I would need to kill the
> >>> channel, wait for the termination to complete, reconfigure the channel
> >>> and execute the new transfer.
> >>> 
> >>> But with a separate callback API at least there will be an entry point
> >>> when this can be initiated and handled.
> >>> Fwiw, I think it should be simple to add this functionality to them, the
> >>> code is kind of handling it in other parts, but implementing it in the
> >>> issue_pending() is not really a clean solution.
> >>> 
> >>> In a channel you can run slave_sg transfers followed by cyclic if you
> >>> wish. A slave channel is what it is, slave channel which can be capable
> >>> to execute slave_sg and/or cyclic (and/or interleaved).
> >>> If issue_pending() is to take care then we need to check if the current
> >>> transfer is cyclic or not and decide based on that.
> >>> 
> >>> With a separate callback we in the DMA driver just need to do what the
> >>> client is asking for and no need to think.
> >>> 
> >>>> One non-intrusive
> >>>> option would be to add a flag to signal that a newly issued transaction
> >>>> should interrupt the current transaction immediately.
Vinod Koul March 3, 2020, 4:32 a.m. UTC | #26
Hi Laurent,

On 02-03-20, 09:37, Laurent Pinchart wrote:

> > I would be more comfortable in calling an API to do so :)
> > The flow I am thinking is:
> > 
> > - prep cyclic1 txn
> > - submit cyclic1 txn
> > - call issue_pending() (cyclic one starts)
> > 
> > - prep cyclic2 txn
> > - submit cyclic2 txn
> > - signal_cyclic1_txn aka terminate_cookie()
> > - cyclic1 completes, switch to cyclic2 (dmaengine driver)
> > - get callback for cyclic1 (optional)
> > 
> > To check if hw supports terminate_cookie() or not we can check if the
> > callback support is implemented
> 
> Two questions though:
> 
> - Where is .issue_pending() called for cyclic2 in your above sequence ?
>   Surely it should be called somewhere, as the DMA engine API requires
>   .issue_pending() to be called for a transfer to be executed, otherwise
>   it stays in the submitted but not pending queue.

Sorry missed that one, I would do that after submit cyclic2 txn step and
then signal signal_cyclic1_txn termination

> - With the introduction of a new .terminate_cookie() operation, we need
>   to specify that operation for all transfer types. What's its

Correct

>   envisioned semantics for non-cyclic transfers ? And how do DMA engine
>   drivers report that they support .terminate_cookie() for cyclic
>   transfers but not for other transfer types (the counterpart of
>   reporting, in my proposition, that .issue_pending() isn't supported
>   replace the current cyclic transfer) ?

Typically for dmaengine controller cyclic is *not* a special mode, only
change is that a list provided to controller is circular.

So, the .terminate_cookie() should be a feature for all type of txn's.
If for some reason (dont discount what hw designers can do) a controller
supports this for some specific type(s), then they should return
-ENOTSUPP for cookies that do not support and let the caller know.
Laurent Pinchart March 3, 2020, 7:22 p.m. UTC | #27
Hi Vinod,

On Tue, Mar 03, 2020 at 10:02:54AM +0530, Vinod Koul wrote:
> On 02-03-20, 09:37, Laurent Pinchart wrote:
> 
> > > I would be more comfortable in calling an API to do so :)
> > > The flow I am thinking is:
> > > 
> > > - prep cyclic1 txn
> > > - submit cyclic1 txn
> > > - call issue_pending() (cyclic one starts)
> > > 
> > > - prep cyclic2 txn
> > > - submit cyclic2 txn
> > > - signal_cyclic1_txn aka terminate_cookie()
> > > - cyclic1 completes, switch to cyclic2 (dmaengine driver)
> > > - get callback for cyclic1 (optional)
> > > 
> > > To check if hw supports terminate_cookie() or not we can check if the
> > > callback support is implemented
> > 
> > Two questions though:
> > 
> > - Where is .issue_pending() called for cyclic2 in your above sequence ?
> >   Surely it should be called somewhere, as the DMA engine API requires
> >   .issue_pending() to be called for a transfer to be executed, otherwise
> >   it stays in the submitted but not pending queue.
> 
> Sorry missed that one, I would do that after submit cyclic2 txn step and
> then signal signal_cyclic1_txn termination

OK, that matches my understanding, good :-)

> > - With the introduction of a new .terminate_cookie() operation, we need
> >   to specify that operation for all transfer types. What's its
> 
> Correct
> 
> >   envisioned semantics for non-cyclic transfers ? And how do DMA engine
> >   drivers report that they support .terminate_cookie() for cyclic
> >   transfers but not for other transfer types (the counterpart of
> >   reporting, in my proposition, that .issue_pending() isn't supported
> >   replace the current cyclic transfer) ?
> 
> Typically for dmaengine controller cyclic is *not* a special mode, only
> change is that a list provided to controller is circular.

I don't agree with this. For cyclic transfers to be replaceable in a
clean way, the feature must be specifically implemented at the hardware
level. A DMA engine that supports chaining transfers with an explicit
way to override that chaining, and without the logic to report if the
inherent race was lost or not, really can't support this API.

Furthemore, for non-cyclic transfers, what would .terminate_cookie() do
? I need it to be defined as terminating the current transfer when it
ends for the cyclic case, not terminating it immediately. All non-cyclic
transfers terminate by themselves when they end, so what would this new
operation do ?

> So, the .terminate_cookie() should be a feature for all type of txn's.
> If for some reason (dont discount what hw designers can do) a controller
> supports this for some specific type(s), then they should return
> -ENOTSUPP for cookies that do not support and let the caller know.

But then the caller can't know ahead of time, it will only find out when
it's too late, and can't decide not to use the DMA engine if it doesn't
support the feature. I don't think that's a very good option.
Vinod Koul March 4, 2020, 5:13 a.m. UTC | #28
On 03-03-20, 21:22, Laurent Pinchart wrote:
> Hi Vinod,
> 
> On Tue, Mar 03, 2020 at 10:02:54AM +0530, Vinod Koul wrote:
> > On 02-03-20, 09:37, Laurent Pinchart wrote:
> > 
> > > > I would be more comfortable in calling an API to do so :)
> > > > The flow I am thinking is:
> > > > 
> > > > - prep cyclic1 txn
> > > > - submit cyclic1 txn
> > > > - call issue_pending() (cyclic one starts)
> > > > 
> > > > - prep cyclic2 txn
> > > > - submit cyclic2 txn
> > > > - signal_cyclic1_txn aka terminate_cookie()
> > > > - cyclic1 completes, switch to cyclic2 (dmaengine driver)
> > > > - get callback for cyclic1 (optional)
> > > > 
> > > > To check if hw supports terminate_cookie() or not we can check if the
> > > > callback support is implemented
> > > 
> > > Two questions though:
> > > 
> > > - Where is .issue_pending() called for cyclic2 in your above sequence ?
> > >   Surely it should be called somewhere, as the DMA engine API requires
> > >   .issue_pending() to be called for a transfer to be executed, otherwise
> > >   it stays in the submitted but not pending queue.
> > 
> > Sorry missed that one, I would do that after submit cyclic2 txn step and
> > then signal signal_cyclic1_txn termination
> 
> OK, that matches my understanding, good :-)
> 
> > > - With the introduction of a new .terminate_cookie() operation, we need
> > >   to specify that operation for all transfer types. What's its
> > 
> > Correct
> > 
> > >   envisioned semantics for non-cyclic transfers ? And how do DMA engine
> > >   drivers report that they support .terminate_cookie() for cyclic
> > >   transfers but not for other transfer types (the counterpart of
> > >   reporting, in my proposition, that .issue_pending() isn't supported
> > >   replace the current cyclic transfer) ?
> > 
> > Typically for dmaengine controller cyclic is *not* a special mode, only
> > change is that a list provided to controller is circular.
> 
> I don't agree with this. For cyclic transfers to be replaceable in a
> clean way, the feature must be specifically implemented at the hardware
> level. A DMA engine that supports chaining transfers with an explicit
> way to override that chaining, and without the logic to report if the
> inherent race was lost or not, really can't support this API.

Well chaining is a typical feature in dmaengine and making last chain
point to first makes it circular. I have seen couple of engines and this
was the implementation in the hardware.

There can exist special hardware for this purposes as well, but the
point is that the cyclic can be treated as circular list.

> Furthemore, for non-cyclic transfers, what would .terminate_cookie() do
> ? I need it to be defined as terminating the current transfer when it
> ends for the cyclic case, not terminating it immediately. All non-cyclic
> transfers terminate by themselves when they end, so what would this new
> operation do ?

I would use it for two purposes, cancelling txn but at the end of
current txn. I have couple of usages where this would
be helpful. Second in error handling where some engines do not support
aborting (unless we reset the whole controller)

But yes the .terminate_cookie() semantics should indicate if the
termination should be immediate or end of current txn. I see people
using it for both.

And with this I think it would make sense to also add this to
capabilities :)

> > So, the .terminate_cookie() should be a feature for all type of txn's.
> > If for some reason (dont discount what hw designers can do) a controller
> > supports this for some specific type(s), then they should return
> > -ENOTSUPP for cookies that do not support and let the caller know.
> 
> But then the caller can't know ahead of time, it will only find out when
> it's too late, and can't decide not to use the DMA engine if it doesn't
> support the feature. I don't think that's a very good option.

Agreed so lets go with adding these in caps.
Laurent Pinchart March 4, 2020, 8:01 a.m. UTC | #29
Hi Vinod,

On Wed, Mar 04, 2020 at 10:43:01AM +0530, Vinod Koul wrote:
> On 03-03-20, 21:22, Laurent Pinchart wrote:
> > On Tue, Mar 03, 2020 at 10:02:54AM +0530, Vinod Koul wrote:
> > > On 02-03-20, 09:37, Laurent Pinchart wrote:
> > > > > I would be more comfortable in calling an API to do so :)
> > > > > The flow I am thinking is:
> > > > > 
> > > > > - prep cyclic1 txn
> > > > > - submit cyclic1 txn
> > > > > - call issue_pending() (cyclic one starts)
> > > > > 
> > > > > - prep cyclic2 txn
> > > > > - submit cyclic2 txn
> > > > > - signal_cyclic1_txn aka terminate_cookie()
> > > > > - cyclic1 completes, switch to cyclic2 (dmaengine driver)
> > > > > - get callback for cyclic1 (optional)
> > > > > 
> > > > > To check if hw supports terminate_cookie() or not we can check if the
> > > > > callback support is implemented
> > > > 
> > > > Two questions though:
> > > > 
> > > > - Where is .issue_pending() called for cyclic2 in your above sequence ?
> > > >   Surely it should be called somewhere, as the DMA engine API requires
> > > >   .issue_pending() to be called for a transfer to be executed, otherwise
> > > >   it stays in the submitted but not pending queue.
> > > 
> > > Sorry missed that one, I would do that after submit cyclic2 txn step and
> > > then signal signal_cyclic1_txn termination
> > 
> > OK, that matches my understanding, good :-)
> > 
> > > > - With the introduction of a new .terminate_cookie() operation, we need
> > > >   to specify that operation for all transfer types. What's its
> > > 
> > > Correct
> > > 
> > > >   envisioned semantics for non-cyclic transfers ? And how do DMA engine
> > > >   drivers report that they support .terminate_cookie() for cyclic
> > > >   transfers but not for other transfer types (the counterpart of
> > > >   reporting, in my proposition, that .issue_pending() isn't supported
> > > >   replace the current cyclic transfer) ?
> > > 
> > > Typically for dmaengine controller cyclic is *not* a special mode, only
> > > change is that a list provided to controller is circular.
> > 
> > I don't agree with this. For cyclic transfers to be replaceable in a
> > clean way, the feature must be specifically implemented at the hardware
> > level. A DMA engine that supports chaining transfers with an explicit
> > way to override that chaining, and without the logic to report if the
> > inherent race was lost or not, really can't support this API.
> 
> Well chaining is a typical feature in dmaengine and making last chain
> point to first makes it circular. I have seen couple of engines and this
> was the implementation in the hardware.
> 
> There can exist special hardware for this purposes as well, but the
> point is that the cyclic can be treated as circular list.
> 
> > Furthemore, for non-cyclic transfers, what would .terminate_cookie() do
> > ? I need it to be defined as terminating the current transfer when it
> > ends for the cyclic case, not terminating it immediately. All non-cyclic
> > transfers terminate by themselves when they end, so what would this new
> > operation do ?
> 
> I would use it for two purposes, cancelling txn but at the end of
> current txn. I have couple of usages where this would be helpful.

I fail to see how that would help. Non-cyclic transfers always stop at
the end of the transfer. "Cancelling txn but at the end of current txn"
is what DMA engine drivers already do if you call .terminate_cookie() on
the ongoing transfer. It would thus be a no-op.

> Second in error handling where some engines do not support
> aborting (unless we reset the whole controller)

Could you explain that one ? I'm not sure to understand it.

> But yes the .terminate_cookie() semantics should indicate if the
> termination should be immediate or end of current txn. I see people
> using it for both.

Immediate termination is *not* something I'll implement as I have no
good way to test that semantics. I assume you would be fine with leaving
that for later, when someone will need it ?

> And with this I think it would make sense to also add this to
> capabilities :)

I'll repeat the comment I made to Peter: you want me to implement a
feature that you think would be useful, but is completely unrelated to
my use case, while there's a more natural way to handle my issue with
the current API, without precluding in any way the addition of your new
feature in the future. Not fair.

> > > So, the .terminate_cookie() should be a feature for all type of txn's.
> > > If for some reason (dont discount what hw designers can do) a controller
> > > supports this for some specific type(s), then they should return
> > > -ENOTSUPP for cookies that do not support and let the caller know.
> > 
> > But then the caller can't know ahead of time, it will only find out when
> > it's too late, and can't decide not to use the DMA engine if it doesn't
> > support the feature. I don't think that's a very good option.
> 
> Agreed so lets go with adding these in caps.

So if there's a need for caps anyway, why not a cap that marks
.issue_pending() as moving from the current cyclic transfer to the next
one ?
Vinod Koul March 4, 2020, 3:37 p.m. UTC | #30
On 04-03-20, 10:01, Laurent Pinchart wrote:
> Hi Vinod,
> 
> On Wed, Mar 04, 2020 at 10:43:01AM +0530, Vinod Koul wrote:
> > On 03-03-20, 21:22, Laurent Pinchart wrote:
> > > On Tue, Mar 03, 2020 at 10:02:54AM +0530, Vinod Koul wrote:
> > > > On 02-03-20, 09:37, Laurent Pinchart wrote:
> > > > > > I would be more comfortable in calling an API to do so :)
> > > > > > The flow I am thinking is:
> > > > > > 
> > > > > > - prep cyclic1 txn
> > > > > > - submit cyclic1 txn
> > > > > > - call issue_pending() (cyclic one starts)
> > > > > > 
> > > > > > - prep cyclic2 txn
> > > > > > - submit cyclic2 txn
> > > > > > - signal_cyclic1_txn aka terminate_cookie()
> > > > > > - cyclic1 completes, switch to cyclic2 (dmaengine driver)
> > > > > > - get callback for cyclic1 (optional)
> > > > > > 
> > > > > > To check if hw supports terminate_cookie() or not we can check if the
> > > > > > callback support is implemented
> > > > > 
> > > > > Two questions though:
> > > > > 
> > > > > - Where is .issue_pending() called for cyclic2 in your above sequence ?
> > > > >   Surely it should be called somewhere, as the DMA engine API requires
> > > > >   .issue_pending() to be called for a transfer to be executed, otherwise
> > > > >   it stays in the submitted but not pending queue.
> > > > 
> > > > Sorry missed that one, I would do that after submit cyclic2 txn step and
> > > > then signal signal_cyclic1_txn termination
> > > 
> > > OK, that matches my understanding, good :-)
> > > 
> > > > > - With the introduction of a new .terminate_cookie() operation, we need
> > > > >   to specify that operation for all transfer types. What's its
> > > > 
> > > > Correct
> > > > 
> > > > >   envisioned semantics for non-cyclic transfers ? And how do DMA engine
> > > > >   drivers report that they support .terminate_cookie() for cyclic
> > > > >   transfers but not for other transfer types (the counterpart of
> > > > >   reporting, in my proposition, that .issue_pending() isn't supported
> > > > >   replace the current cyclic transfer) ?
> > > > 
> > > > Typically for dmaengine controller cyclic is *not* a special mode, only
> > > > change is that a list provided to controller is circular.
> > > 
> > > I don't agree with this. For cyclic transfers to be replaceable in a
> > > clean way, the feature must be specifically implemented at the hardware
> > > level. A DMA engine that supports chaining transfers with an explicit
> > > way to override that chaining, and without the logic to report if the
> > > inherent race was lost or not, really can't support this API.
> > 
> > Well chaining is a typical feature in dmaengine and making last chain
> > point to first makes it circular. I have seen couple of engines and this
> > was the implementation in the hardware.
> > 
> > There can exist special hardware for this purposes as well, but the
> > point is that the cyclic can be treated as circular list.
> > 
> > > Furthemore, for non-cyclic transfers, what would .terminate_cookie() do
> > > ? I need it to be defined as terminating the current transfer when it
> > > ends for the cyclic case, not terminating it immediately. All non-cyclic
> > > transfers terminate by themselves when they end, so what would this new
> > > operation do ?
> > 
> > I would use it for two purposes, cancelling txn but at the end of
> > current txn. I have couple of usages where this would be helpful.
> 
> I fail to see how that would help. Non-cyclic transfers always stop at
> the end of the transfer. "Cancelling txn but at the end of current txn"
> is what DMA engine drivers already do if you call .terminate_cookie() on
> the ongoing transfer. It would thus be a no-op.

Well that actually depends on the hardware, some of them support abort
so people cancel it (terminate_all approach atm)

> 
> > Second in error handling where some engines do not support
> > aborting (unless we reset the whole controller)
> 
> Could you explain that one ? I'm not sure to understand it.

So I have dma to a slow peripheral and it is stuck for some reason. I
want to abort the cookie and let subsequent ones runs (btw this is for
non cyclic case), so I would use that here. Today we terminate_all and
then resubmit...

> > But yes the .terminate_cookie() semantics should indicate if the
> > termination should be immediate or end of current txn. I see people
> > using it for both.
> 
> Immediate termination is *not* something I'll implement as I have no
> good way to test that semantics. I assume you would be fine with leaving
> that for later, when someone will need it ?

Sure, if you have hw to support please test. If not, you will not
implement that.

The point is that API should support it and people can add support in
the controllers and test :)

> > And with this I think it would make sense to also add this to
> > capabilities :)
> 
> I'll repeat the comment I made to Peter: you want me to implement a
> feature that you think would be useful, but is completely unrelated to
> my use case, while there's a more natural way to handle my issue with
> the current API, without precluding in any way the addition of your new
> feature in the future. Not fair.

So from API design pov, I would like this to support both the features.
This helps us to not rework the API again for the immediate abort.

I am not expecting this to be implemented by you if your hw doesn't
support it. The core changes are pretty minimal and callback in the
driver is the one which does the job and yours wont do this

> > > > So, the .terminate_cookie() should be a feature for all type of txn's.
> > > > If for some reason (dont discount what hw designers can do) a controller
> > > > supports this for some specific type(s), then they should return
> > > > -ENOTSUPP for cookies that do not support and let the caller know.
> > > 
> > > But then the caller can't know ahead of time, it will only find out when
> > > it's too late, and can't decide not to use the DMA engine if it doesn't
> > > support the feature. I don't think that's a very good option.
> > 
> > Agreed so lets go with adding these in caps.
> 
> So if there's a need for caps anyway, why not a cap that marks
> .issue_pending() as moving from the current cyclic transfer to the next
> one ? 

Is the overhead really too much on that :) If you like I can send the
core patches and you would need to implement the driver side?
Laurent Pinchart March 4, 2020, 4 p.m. UTC | #31
Hi Vinod,

On Wed, Mar 04, 2020 at 09:07:18PM +0530, Vinod Koul wrote:
> On 04-03-20, 10:01, Laurent Pinchart wrote:
> > On Wed, Mar 04, 2020 at 10:43:01AM +0530, Vinod Koul wrote:
> >> On 03-03-20, 21:22, Laurent Pinchart wrote:
> >>> On Tue, Mar 03, 2020 at 10:02:54AM +0530, Vinod Koul wrote:
> >>>> On 02-03-20, 09:37, Laurent Pinchart wrote:
> >>>>>> I would be more comfortable in calling an API to do so :)
> >>>>>> The flow I am thinking is:
> >>>>>> 
> >>>>>> - prep cyclic1 txn
> >>>>>> - submit cyclic1 txn
> >>>>>> - call issue_pending() (cyclic one starts)
> >>>>>> 
> >>>>>> - prep cyclic2 txn
> >>>>>> - submit cyclic2 txn
> >>>>>> - signal_cyclic1_txn aka terminate_cookie()
> >>>>>> - cyclic1 completes, switch to cyclic2 (dmaengine driver)
> >>>>>> - get callback for cyclic1 (optional)
> >>>>>> 
> >>>>>> To check if hw supports terminate_cookie() or not we can check if the
> >>>>>> callback support is implemented
> >>>>> 
> >>>>> Two questions though:
> >>>>> 
> >>>>> - Where is .issue_pending() called for cyclic2 in your above sequence ?
> >>>>>   Surely it should be called somewhere, as the DMA engine API requires
> >>>>>   .issue_pending() to be called for a transfer to be executed, otherwise
> >>>>>   it stays in the submitted but not pending queue.
> >>>> 
> >>>> Sorry missed that one, I would do that after submit cyclic2 txn step and
> >>>> then signal signal_cyclic1_txn termination
> >>> 
> >>> OK, that matches my understanding, good :-)
> >>> 
> >>>>> - With the introduction of a new .terminate_cookie() operation, we need
> >>>>>   to specify that operation for all transfer types. What's its
> >>>> 
> >>>> Correct
> >>>> 
> >>>>>   envisioned semantics for non-cyclic transfers ? And how do DMA engine
> >>>>>   drivers report that they support .terminate_cookie() for cyclic
> >>>>>   transfers but not for other transfer types (the counterpart of
> >>>>>   reporting, in my proposition, that .issue_pending() isn't supported
> >>>>>   replace the current cyclic transfer) ?
> >>>> 
> >>>> Typically for dmaengine controller cyclic is *not* a special mode, only
> >>>> change is that a list provided to controller is circular.
> >>> 
> >>> I don't agree with this. For cyclic transfers to be replaceable in a
> >>> clean way, the feature must be specifically implemented at the hardware
> >>> level. A DMA engine that supports chaining transfers with an explicit
> >>> way to override that chaining, and without the logic to report if the
> >>> inherent race was lost or not, really can't support this API.
> >> 
> >> Well chaining is a typical feature in dmaengine and making last chain
> >> point to first makes it circular. I have seen couple of engines and this
> >> was the implementation in the hardware.
> >> 
> >> There can exist special hardware for this purposes as well, but the
> >> point is that the cyclic can be treated as circular list.
> >> 
> >>> Furthemore, for non-cyclic transfers, what would .terminate_cookie() do
> >>> ? I need it to be defined as terminating the current transfer when it
> >>> ends for the cyclic case, not terminating it immediately. All non-cyclic
> >>> transfers terminate by themselves when they end, so what would this new
> >>> operation do ?
> >> 
> >> I would use it for two purposes, cancelling txn but at the end of
> >> current txn. I have couple of usages where this would be helpful.
> > 
> > I fail to see how that would help. Non-cyclic transfers always stop at
> > the end of the transfer. "Cancelling txn but at the end of current txn"
> > is what DMA engine drivers already do if you call .terminate_cookie() on
> > the ongoing transfer. It would thus be a no-op.
> 
> Well that actually depends on the hardware, some of them support abort
> so people cancel it (terminate_all approach atm)

In that case it's not terminating at the end of the current transfer,
but terminating immediately (a.k.a. aborting), right ? Cancelling at the
end of the current transfer still seems to be a no-op to me for
non-cyclic transfers, as that's what they do on their own already.

> >> Second in error handling where some engines do not support
> >> aborting (unless we reset the whole controller)
> > 
> > Could you explain that one ? I'm not sure to understand it.
> 
> So I have dma to a slow peripheral and it is stuck for some reason. I
> want to abort the cookie and let subsequent ones runs (btw this is for
> non cyclic case), so I would use that here. Today we terminate_all and
> then resubmit...

That's also for immediate abort, right ?

For this to work properly we need very accurate residue reporting, as
the client will usually need to know exactly what has been transferred.
The device would need to support DMA_RESIDUE_GRANULARITY_BURST when
aborting an ongoing transfer. What hardware supports this ?

> >> But yes the .terminate_cookie() semantics should indicate if the
> >> termination should be immediate or end of current txn. I see people
> >> using it for both.
> > 
> > Immediate termination is *not* something I'll implement as I have no
> > good way to test that semantics. I assume you would be fine with leaving
> > that for later, when someone will need it ?
> 
> Sure, if you have hw to support please test. If not, you will not
> implement that.
> 
> The point is that API should support it and people can add support in
> the controllers and test :)

I still think this is a different API. We'll have

1. Existing .issue_pending(), queueing the next transfer for non-cyclic
   cases, and being a no-op for cyclic cases.
2. New .terminate_cookie(AT_END_OF_TRANSFER), being a no-op for
   non-cyclic cases, and moving to the next transfer for cyclic cases.
3. New .terminate_cookie(ABORT_IMMEDIATELY), applicable to both cyclic
   and non-cyclic cases.

3. is an API I don't need, and can't easily test. I agree that it can
have use cases (provided the DMA device can abort an ongoing transfer
*and* still support DMA_RESIDUE_GRANULARITY_BURST in that case).

I'm troubled by my inability to convince you that 1. and 2. are really
the same, with 1. addressing the non-cyclic case and 2. addressing the
cyclic case :-) This is why I think they should both be implemeted using
.issue_pending() (no other option for 1., that's what it uses today).
This wouldn't prevent implementing 3. with a new .terminate_cookie()
operation, that wouldn't need to take a flag as it would always operate
in ABORT_IMMEDIATELY mode. There would also be no need to report a new
capability for 3., as the presence of the .terminate_cookie() handler
would be enough to tell clients that the API is supported. Only a new
capability for 2. would be needed.

> >> And with this I think it would make sense to also add this to
> >> capabilities :)
> > 
> > I'll repeat the comment I made to Peter: you want me to implement a
> > feature that you think would be useful, but is completely unrelated to
> > my use case, while there's a more natural way to handle my issue with
> > the current API, without precluding in any way the addition of your new
> > feature in the future. Not fair.
> 
> So from API design pov, I would like this to support both the features.
> This helps us to not rework the API again for the immediate abort.
> 
> I am not expecting this to be implemented by you if your hw doesn't
> support it. The core changes are pretty minimal and callback in the
> driver is the one which does the job and yours wont do this

Xilinx DMA drivers don't support DMA_RESIDUE_GRANULARITY_BURST so I
can't test this indeed.

> >>>> So, the .terminate_cookie() should be a feature for all type of txn's.
> >>>> If for some reason (dont discount what hw designers can do) a controller
> >>>> supports this for some specific type(s), then they should return
> >>>> -ENOTSUPP for cookies that do not support and let the caller know.
> >>> 
> >>> But then the caller can't know ahead of time, it will only find out when
> >>> it's too late, and can't decide not to use the DMA engine if it doesn't
> >>> support the feature. I don't think that's a very good option.
> >> 
> >> Agreed so lets go with adding these in caps.
> > 
> > So if there's a need for caps anyway, why not a cap that marks
> > .issue_pending() as moving from the current cyclic transfer to the next
> > one ? 
> 
> Is the overhead really too much on that :) If you like I can send the
> core patches and you would need to implement the driver side?

We can try that as a compromise. One of main concerns with developing
the core patches myself is that the .terminate_cookie() API still seems
ill-defined to me, so it would be much more efficient if you translate
the idea you have in your idea into code than trying to communicate it
to me in all details (one of the grey areas is what should
.terminate_cookie() do if the cookie passed to the function corresponds
to an already terminated or, more tricky from a completion callback
point of view, an issued but not-yet-started transfer, or also a
submitted but not issued transfer). If you implement the core part, then
that problem will go away.

How about the implementation in virt-dma.[ch] by the way ?
Vinod Koul March 4, 2020, 4:24 p.m. UTC | #32
Hi Laurent,

On 04-03-20, 18:00, Laurent Pinchart wrote:
> On Wed, Mar 04, 2020 at 09:07:18PM +0530, Vinod Koul wrote:
> > On 04-03-20, 10:01, Laurent Pinchart wrote:
> > > On Wed, Mar 04, 2020 at 10:43:01AM +0530, Vinod Koul wrote:
> > >> On 03-03-20, 21:22, Laurent Pinchart wrote:
> > >>> On Tue, Mar 03, 2020 at 10:02:54AM +0530, Vinod Koul wrote:
> > >>>> On 02-03-20, 09:37, Laurent Pinchart wrote:
> > >>>>>> I would be more comfortable in calling an API to do so :)
> > >>>>>> The flow I am thinking is:
> > >>>>>> 
> > >>>>>> - prep cyclic1 txn
> > >>>>>> - submit cyclic1 txn
> > >>>>>> - call issue_pending() (cyclic one starts)
> > >>>>>> 
> > >>>>>> - prep cyclic2 txn
> > >>>>>> - submit cyclic2 txn
> > >>>>>> - signal_cyclic1_txn aka terminate_cookie()
> > >>>>>> - cyclic1 completes, switch to cyclic2 (dmaengine driver)
> > >>>>>> - get callback for cyclic1 (optional)
> > >>>>>> 
> > >>>>>> To check if hw supports terminate_cookie() or not we can check if the
> > >>>>>> callback support is implemented
> > >>>>> 
> > >>>>> Two questions though:
> > >>>>> 
> > >>>>> - Where is .issue_pending() called for cyclic2 in your above sequence ?
> > >>>>>   Surely it should be called somewhere, as the DMA engine API requires
> > >>>>>   .issue_pending() to be called for a transfer to be executed, otherwise
> > >>>>>   it stays in the submitted but not pending queue.
> > >>>> 
> > >>>> Sorry missed that one, I would do that after submit cyclic2 txn step and
> > >>>> then signal signal_cyclic1_txn termination
> > >>> 
> > >>> OK, that matches my understanding, good :-)
> > >>> 
> > >>>>> - With the introduction of a new .terminate_cookie() operation, we need
> > >>>>>   to specify that operation for all transfer types. What's its
> > >>>> 
> > >>>> Correct
> > >>>> 
> > >>>>>   envisioned semantics for non-cyclic transfers ? And how do DMA engine
> > >>>>>   drivers report that they support .terminate_cookie() for cyclic
> > >>>>>   transfers but not for other transfer types (the counterpart of
> > >>>>>   reporting, in my proposition, that .issue_pending() isn't supported
> > >>>>>   replace the current cyclic transfer) ?
> > >>>> 
> > >>>> Typically for dmaengine controller cyclic is *not* a special mode, only
> > >>>> change is that a list provided to controller is circular.
> > >>> 
> > >>> I don't agree with this. For cyclic transfers to be replaceable in a
> > >>> clean way, the feature must be specifically implemented at the hardware
> > >>> level. A DMA engine that supports chaining transfers with an explicit
> > >>> way to override that chaining, and without the logic to report if the
> > >>> inherent race was lost or not, really can't support this API.
> > >> 
> > >> Well chaining is a typical feature in dmaengine and making last chain
> > >> point to first makes it circular. I have seen couple of engines and this
> > >> was the implementation in the hardware.
> > >> 
> > >> There can exist special hardware for this purposes as well, but the
> > >> point is that the cyclic can be treated as circular list.
> > >> 
> > >>> Furthemore, for non-cyclic transfers, what would .terminate_cookie() do
> > >>> ? I need it to be defined as terminating the current transfer when it
> > >>> ends for the cyclic case, not terminating it immediately. All non-cyclic
> > >>> transfers terminate by themselves when they end, so what would this new
> > >>> operation do ?
> > >> 
> > >> I would use it for two purposes, cancelling txn but at the end of
> > >> current txn. I have couple of usages where this would be helpful.
> > > 
> > > I fail to see how that would help. Non-cyclic transfers always stop at
> > > the end of the transfer. "Cancelling txn but at the end of current txn"
> > > is what DMA engine drivers already do if you call .terminate_cookie() on
> > > the ongoing transfer. It would thus be a no-op.
> > 
> > Well that actually depends on the hardware, some of them support abort
> > so people cancel it (terminate_all approach atm)
> 
> In that case it's not terminating at the end of the current transfer,
> but terminating immediately (a.k.a. aborting), right ? Cancelling at the
> end of the current transfer still seems to be a no-op to me for
> non-cyclic transfers, as that's what they do on their own already.

Correct, it is abort for current txn.

> > >> Second in error handling where some engines do not support
> > >> aborting (unless we reset the whole controller)
> > > 
> > > Could you explain that one ? I'm not sure to understand it.
> > 
> > So I have dma to a slow peripheral and it is stuck for some reason. I
> > want to abort the cookie and let subsequent ones runs (btw this is for
> > non cyclic case), so I would use that here. Today we terminate_all and
> > then resubmit...
> 
> That's also for immediate abort, right ?

Right

> For this to work properly we need very accurate residue reporting, as
> the client will usually need to know exactly what has been transferred.
> The device would need to support DMA_RESIDUE_GRANULARITY_BURST when
> aborting an ongoing transfer. What hardware supports this ?

 git grep DMA_RESIDUE_GRANULARITY_BURST drivers/dma/ |wc -l
27

So it seems many do support the burst reporting.

> > >> But yes the .terminate_cookie() semantics should indicate if the
> > >> termination should be immediate or end of current txn. I see people
> > >> using it for both.
> > > 
> > > Immediate termination is *not* something I'll implement as I have no
> > > good way to test that semantics. I assume you would be fine with leaving
> > > that for later, when someone will need it ?
> > 
> > Sure, if you have hw to support please test. If not, you will not
> > implement that.
> > 
> > The point is that API should support it and people can add support in
> > the controllers and test :)
> 
> I still think this is a different API. We'll have
> 
> 1. Existing .issue_pending(), queueing the next transfer for non-cyclic
>    cases, and being a no-op for cyclic cases.
> 2. New .terminate_cookie(AT_END_OF_TRANSFER), being a no-op for
>    non-cyclic cases, and moving to the next transfer for cyclic cases.
> 3. New .terminate_cookie(ABORT_IMMEDIATELY), applicable to both cyclic
>    and non-cyclic cases.
> 
> 3. is an API I don't need, and can't easily test. I agree that it can
> have use cases (provided the DMA device can abort an ongoing transfer
> *and* still support DMA_RESIDUE_GRANULARITY_BURST in that case).
> 
> I'm troubled by my inability to convince you that 1. and 2. are really
> the same, with 1. addressing the non-cyclic case and 2. addressing the
> cyclic case :-) This is why I think they should both be implemeted using
> .issue_pending() (no other option for 1., that's what it uses today).
> This wouldn't prevent implementing 3. with a new .terminate_cookie()
> operation, that wouldn't need to take a flag as it would always operate
> in ABORT_IMMEDIATELY mode. There would also be no need to report a new
> capability for 3., as the presence of the .terminate_cookie() handler
> would be enough to tell clients that the API is supported. Only a new
> capability for 2. would be needed.

Well I agree 1 & 2 seem similar but I would like to define the behaviour
not dependent on the txn being cyclic or not. That is my concern and
hence the idea that:

1. .issue_pending() will push txn to pending_queue, you may have a case
where that is done only once (due to nature of txn), but no other
implication

2. .terminate_cookie(EOT) will abort the transfer at the end. Maybe not
used for cyclic but irrespective of that, the behaviour would be abort
at end of cyclic

3. .terminate_cookie(IMMEDIATE) will abort immediately. If there is
anything in pending_queue that will get pushed to hardware.

4. Cyclic by nature never completes
   - as a consequence needs to be stopped by terminate_all/terminate_cookie

Does these rules make sense :)

> > >> And with this I think it would make sense to also add this to
> > >> capabilities :)
> > > 
> > > I'll repeat the comment I made to Peter: you want me to implement a
> > > feature that you think would be useful, but is completely unrelated to
> > > my use case, while there's a more natural way to handle my issue with
> > > the current API, without precluding in any way the addition of your new
> > > feature in the future. Not fair.
> > 
> > So from API design pov, I would like this to support both the features.
> > This helps us to not rework the API again for the immediate abort.
> > 
> > I am not expecting this to be implemented by you if your hw doesn't
> > support it. The core changes are pretty minimal and callback in the
> > driver is the one which does the job and yours wont do this
> 
> Xilinx DMA drivers don't support DMA_RESIDUE_GRANULARITY_BURST so I
> can't test this indeed.

Sure I understand that! Am sure folks will respond to CFT and I guess
Peter will also be interested in testing.

> > >>>> So, the .terminate_cookie() should be a feature for all type of txn's.
> > >>>> If for some reason (dont discount what hw designers can do) a controller
> > >>>> supports this for some specific type(s), then they should return
> > >>>> -ENOTSUPP for cookies that do not support and let the caller know.
> > >>> 
> > >>> But then the caller can't know ahead of time, it will only find out when
> > >>> it's too late, and can't decide not to use the DMA engine if it doesn't
> > >>> support the feature. I don't think that's a very good option.
> > >> 
> > >> Agreed so lets go with adding these in caps.
> > > 
> > > So if there's a need for caps anyway, why not a cap that marks
> > > .issue_pending() as moving from the current cyclic transfer to the next
> > > one ? 
> > 
> > Is the overhead really too much on that :) If you like I can send the
> > core patches and you would need to implement the driver side?
> 
> We can try that as a compromise. One of main concerns with developing
> the core patches myself is that the .terminate_cookie() API still seems
> ill-defined to me, so it would be much more efficient if you translate

yeah lets take a stab at defining this and see if we come up with
something meaningful

> the idea you have in your idea into code than trying to communicate it
> to me in all details (one of the grey areas is what should
> .terminate_cookie() do if the cookie passed to the function corresponds
> to an already terminated or, more tricky from a completion callback
> point of view, an issued but not-yet-started transfer, or also a
> submitted but not issued transfer). If you implement the core part, then
> that problem will go away.
> 
> How about the implementation in virt-dma.[ch] by the way ?

It needs to be comprehended and tested as well.. since these are simple
callbacks to driver, we should not need huge changes here (i need to
double check though)
Peter Ujfalusi March 6, 2020, 2:49 p.m. UTC | #33
Laureant,

On 04/03/2020 18.00, Laurent Pinchart wrote:
> I still think this is a different API. We'll have
> 
> 1. Existing .issue_pending(), queueing the next transfer for non-cyclic
>    cases, and being a no-op for cyclic cases.
> 2. New .terminate_cookie(AT_END_OF_TRANSFER), being a no-op for
>    non-cyclic cases, and moving to the next transfer for cyclic cases.
> 3. New .terminate_cookie(ABORT_IMMEDIATELY), applicable to both cyclic
>    and non-cyclic cases.
> 
> 3. is an API I don't need, and can't easily test. I agree that it can
> have use cases (provided the DMA device can abort an ongoing transfer
> *and* still support DMA_RESIDUE_GRANULARITY_BURST in that case).
> 
> I'm troubled by my inability to convince you that 1. and 2. are really
> the same, with 1. addressing the non-cyclic case and 2. addressing the
> cyclic case :-) This is why I think they should both be implemeted using
> .issue_pending() (no other option for 1., that's what it uses today).
> This wouldn't prevent implementing 3. with a new .terminate_cookie()
> operation, that wouldn't need to take a flag as it would always operate
> in ABORT_IMMEDIATELY mode. There would also be no need to report a new
> capability for 3., as the presence of the .terminate_cookie() handler
> would be enough to tell clients that the API is supported. Only a new
> capability for 2. would be needed.

Let's see the two cases, AT_END_OF_TRANSFER and ABORT_IMMEDIATELY
against cyclic and slave for simplicity:
- AT_END_OF_TRANSFER
...
issue_pending(1)
issue_pending(2)
terminate_cookie(AT_END_OF_TRANSFER)

In case of cyclic:
When cookie1 finishes a tx cookie2 will start.

Same sequence in case of slave:
When cookie1 finishes a tx cookie2 will start.
 Yes, terminate_cookie(AT_END_OF_TRANSFER) is NOP

- ABORT_IMMEDIATELY
...
issue_pending(1)
issue_pending(2)
terminate_cookie(ABORT_IMMEDIATELY)

In case of cyclic and slave:
Abort cookie1 right away and start cookie2.

In case of cyclic:
When cookie1 finishes a tx cookie2 will start.

True, we have NOP operation, but as you can see the semantics of the two
cases are well defined and consistent among different operations.

Imho the only thing which is not really defined is the
AT_END_OF_TRANSFER, is it after the current period, or when finishing
the buffer / after a frame or all frames are consumed in the current tx
for interleaved.


>>>> And with this I think it would make sense to also add this to
>>>> capabilities :)
>>>
>>> I'll repeat the comment I made to Peter: you want me to implement a
>>> feature that you think would be useful, but is completely unrelated to
>>> my use case, while there's a more natural way to handle my issue with
>>> the current API, without precluding in any way the addition of your new
>>> feature in the future. Not fair.
>>
>> So from API design pov, I would like this to support both the features.
>> This helps us to not rework the API again for the immediate abort.
>>
>> I am not expecting this to be implemented by you if your hw doesn't
>> support it. The core changes are pretty minimal and callback in the
>> driver is the one which does the job and yours wont do this
> 
> Xilinx DMA drivers don't support DMA_RESIDUE_GRANULARITY_BURST so I
> can't test this indeed.

All TI DMA supports it ;)

> 
>>>>>> So, the .terminate_cookie() should be a feature for all type of txn's.
>>>>>> If for some reason (dont discount what hw designers can do) a controller
>>>>>> supports this for some specific type(s), then they should return
>>>>>> -ENOTSUPP for cookies that do not support and let the caller know.
>>>>>
>>>>> But then the caller can't know ahead of time, it will only find out when
>>>>> it's too late, and can't decide not to use the DMA engine if it doesn't
>>>>> support the feature. I don't think that's a very good option.
>>>>
>>>> Agreed so lets go with adding these in caps.
>>>
>>> So if there's a need for caps anyway, why not a cap that marks
>>> .issue_pending() as moving from the current cyclic transfer to the next
>>> one ? 
>>
>> Is the overhead really too much on that :) If you like I can send the
>> core patches and you would need to implement the driver side?
> 
> We can try that as a compromise. One of main concerns with developing
> the core patches myself is that the .terminate_cookie() API still seems
> ill-defined to me, so it would be much more efficient if you translate
> the idea you have in your idea into code than trying to communicate it
> to me in all details (one of the grey areas is what should
> .terminate_cookie() do if the cookie passed to the function corresponds
> to an already terminated or, more tricky from a completion callback
> point of view, an issued but not-yet-started transfer, or also a
> submitted but not issued transfer). If you implement the core part, then
> that problem will go away.
> 
> How about the implementation in virt-dma.[ch] by the way ?
> 

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Laurent Pinchart March 11, 2020, 3:52 p.m. UTC | #34
Hi Vinod,

On Wed, Mar 04, 2020 at 09:54:26PM +0530, Vinod Koul wrote:
> On 04-03-20, 18:00, Laurent Pinchart wrote:
> > On Wed, Mar 04, 2020 at 09:07:18PM +0530, Vinod Koul wrote:
> >> On 04-03-20, 10:01, Laurent Pinchart wrote:
> >>> On Wed, Mar 04, 2020 at 10:43:01AM +0530, Vinod Koul wrote:
> >>>> On 03-03-20, 21:22, Laurent Pinchart wrote:
> >>>>> On Tue, Mar 03, 2020 at 10:02:54AM +0530, Vinod Koul wrote:
> >>>>>> On 02-03-20, 09:37, Laurent Pinchart wrote:
> >>>>>>>> I would be more comfortable in calling an API to do so :)
> >>>>>>>> The flow I am thinking is:
> >>>>>>>> 
> >>>>>>>> - prep cyclic1 txn
> >>>>>>>> - submit cyclic1 txn
> >>>>>>>> - call issue_pending() (cyclic one starts)
> >>>>>>>> 
> >>>>>>>> - prep cyclic2 txn
> >>>>>>>> - submit cyclic2 txn
> >>>>>>>> - signal_cyclic1_txn aka terminate_cookie()
> >>>>>>>> - cyclic1 completes, switch to cyclic2 (dmaengine driver)
> >>>>>>>> - get callback for cyclic1 (optional)
> >>>>>>>> 
> >>>>>>>> To check if hw supports terminate_cookie() or not we can check if the
> >>>>>>>> callback support is implemented
> >>>>>>> 
> >>>>>>> Two questions though:
> >>>>>>> 
> >>>>>>> - Where is .issue_pending() called for cyclic2 in your above sequence ?
> >>>>>>>   Surely it should be called somewhere, as the DMA engine API requires
> >>>>>>>   .issue_pending() to be called for a transfer to be executed, otherwise
> >>>>>>>   it stays in the submitted but not pending queue.
> >>>>>> 
> >>>>>> Sorry missed that one, I would do that after submit cyclic2 txn step and
> >>>>>> then signal signal_cyclic1_txn termination
> >>>>> 
> >>>>> OK, that matches my understanding, good :-)
> >>>>> 
> >>>>>>> - With the introduction of a new .terminate_cookie() operation, we need
> >>>>>>>   to specify that operation for all transfer types. What's its
> >>>>>> 
> >>>>>> Correct
> >>>>>> 
> >>>>>>>   envisioned semantics for non-cyclic transfers ? And how do DMA engine
> >>>>>>>   drivers report that they support .terminate_cookie() for cyclic
> >>>>>>>   transfers but not for other transfer types (the counterpart of
> >>>>>>>   reporting, in my proposition, that .issue_pending() isn't supported
> >>>>>>>   replace the current cyclic transfer) ?
> >>>>>> 
> >>>>>> Typically for dmaengine controller cyclic is *not* a special mode, only
> >>>>>> change is that a list provided to controller is circular.
> >>>>> 
> >>>>> I don't agree with this. For cyclic transfers to be replaceable in a
> >>>>> clean way, the feature must be specifically implemented at the hardware
> >>>>> level. A DMA engine that supports chaining transfers with an explicit
> >>>>> way to override that chaining, and without the logic to report if the
> >>>>> inherent race was lost or not, really can't support this API.
> >>>> 
> >>>> Well chaining is a typical feature in dmaengine and making last chain
> >>>> point to first makes it circular. I have seen couple of engines and this
> >>>> was the implementation in the hardware.
> >>>> 
> >>>> There can exist special hardware for this purposes as well, but the
> >>>> point is that the cyclic can be treated as circular list.
> >>>> 
> >>>>> Furthemore, for non-cyclic transfers, what would .terminate_cookie() do
> >>>>> ? I need it to be defined as terminating the current transfer when it
> >>>>> ends for the cyclic case, not terminating it immediately. All non-cyclic
> >>>>> transfers terminate by themselves when they end, so what would this new
> >>>>> operation do ?
> >>>> 
> >>>> I would use it for two purposes, cancelling txn but at the end of
> >>>> current txn. I have couple of usages where this would be helpful.
> >>> 
> >>> I fail to see how that would help. Non-cyclic transfers always stop at
> >>> the end of the transfer. "Cancelling txn but at the end of current txn"
> >>> is what DMA engine drivers already do if you call .terminate_cookie() on
> >>> the ongoing transfer. It would thus be a no-op.
> >> 
> >> Well that actually depends on the hardware, some of them support abort
> >> so people cancel it (terminate_all approach atm)
> > 
> > In that case it's not terminating at the end of the current transfer,
> > but terminating immediately (a.k.a. aborting), right ? Cancelling at the
> > end of the current transfer still seems to be a no-op to me for
> > non-cyclic transfers, as that's what they do on their own already.
> 
> Correct, it is abort for current txn.
> 
> >>>> Second in error handling where some engines do not support
> >>>> aborting (unless we reset the whole controller)
> >>> 
> >>> Could you explain that one ? I'm not sure to understand it.
> >> 
> >> So I have dma to a slow peripheral and it is stuck for some reason. I
> >> want to abort the cookie and let subsequent ones runs (btw this is for
> >> non cyclic case), so I would use that here. Today we terminate_all and
> >> then resubmit...
> > 
> > That's also for immediate abort, right ?
> 
> Right
> 
> > For this to work properly we need very accurate residue reporting, as
> > the client will usually need to know exactly what has been transferred.
> > The device would need to support DMA_RESIDUE_GRANULARITY_BURST when
> > aborting an ongoing transfer. What hardware supports this ?
> 
>  git grep DMA_RESIDUE_GRANULARITY_BURST drivers/dma/ |wc -l
> 27
> 
> So it seems many do support the burst reporting.

Yes, but not all of those may support aborting a transfer *and*
reporting the exact residue of cancelled transfers. We need both to
implement your proposal.

> >>>> But yes the .terminate_cookie() semantics should indicate if the
> >>>> termination should be immediate or end of current txn. I see people
> >>>> using it for both.
> >>> 
> >>> Immediate termination is *not* something I'll implement as I have no
> >>> good way to test that semantics. I assume you would be fine with leaving
> >>> that for later, when someone will need it ?
> >> 
> >> Sure, if you have hw to support please test. If not, you will not
> >> implement that.
> >> 
> >> The point is that API should support it and people can add support in
> >> the controllers and test :)
> > 
> > I still think this is a different API. We'll have
> > 
> > 1. Existing .issue_pending(), queueing the next transfer for non-cyclic
> >    cases, and being a no-op for cyclic cases.
> > 2. New .terminate_cookie(AT_END_OF_TRANSFER), being a no-op for
> >    non-cyclic cases, and moving to the next transfer for cyclic cases.
> > 3. New .terminate_cookie(ABORT_IMMEDIATELY), applicable to both cyclic
> >    and non-cyclic cases.
> > 
> > 3. is an API I don't need, and can't easily test. I agree that it can
> > have use cases (provided the DMA device can abort an ongoing transfer
> > *and* still support DMA_RESIDUE_GRANULARITY_BURST in that case).
> > 
> > I'm troubled by my inability to convince you that 1. and 2. are really
> > the same, with 1. addressing the non-cyclic case and 2. addressing the
> > cyclic case :-) This is why I think they should both be implemeted using
> > .issue_pending() (no other option for 1., that's what it uses today).
> > This wouldn't prevent implementing 3. with a new .terminate_cookie()
> > operation, that wouldn't need to take a flag as it would always operate
> > in ABORT_IMMEDIATELY mode. There would also be no need to report a new
> > capability for 3., as the presence of the .terminate_cookie() handler
> > would be enough to tell clients that the API is supported. Only a new
> > capability for 2. would be needed.
> 
> Well I agree 1 & 2 seem similar but I would like to define the behaviour
> not dependent on the txn being cyclic or not. That is my concern and
> hence the idea that:
> 
> 1. .issue_pending() will push txn to pending_queue, you may have a case
> where that is done only once (due to nature of txn), but no other
> implication
> 
> 2. .terminate_cookie(EOT) will abort the transfer at the end. Maybe not
> used for cyclic but irrespective of that, the behaviour would be abort
> at end of cyclic

Did you mean "maybe not used for non-cyclic" ?

> 3. .terminate_cookie(IMMEDIATE) will abort immediately. If there is
> anything in pending_queue that will get pushed to hardware.
> 
> 4. Cyclic by nature never completes
>    - as a consequence needs to be stopped by terminate_all/terminate_cookie
> 
> Does these rules make sense :)

It's a set of rules that I think can handle my use case, but I still
believe my proposal based on just .issue_pending() would be simpler, in
line with the existing API concepts, and wouldn't preclude the addition
of .terminate_cookie(IMMEDIATE) at a later point. It's your call though,
especially if you provide the implementation :-) When do you think you
will be able to do so ?

> >>>> And with this I think it would make sense to also add this to
> >>>> capabilities :)
> >>> 
> >>> I'll repeat the comment I made to Peter: you want me to implement a
> >>> feature that you think would be useful, but is completely unrelated to
> >>> my use case, while there's a more natural way to handle my issue with
> >>> the current API, without precluding in any way the addition of your new
> >>> feature in the future. Not fair.
> >> 
> >> So from API design pov, I would like this to support both the features.
> >> This helps us to not rework the API again for the immediate abort.
> >> 
> >> I am not expecting this to be implemented by you if your hw doesn't
> >> support it. The core changes are pretty minimal and callback in the
> >> driver is the one which does the job and yours wont do this
> > 
> > Xilinx DMA drivers don't support DMA_RESIDUE_GRANULARITY_BURST so I
> > can't test this indeed.
> 
> Sure I understand that! Am sure folks will respond to CFT and I guess
> Peter will also be interested in testing.

s/testing/implementing it/ :-)

> >>>>>> So, the .terminate_cookie() should be a feature for all type of txn's.
> >>>>>> If for some reason (dont discount what hw designers can do) a controller
> >>>>>> supports this for some specific type(s), then they should return
> >>>>>> -ENOTSUPP for cookies that do not support and let the caller know.
> >>>>> 
> >>>>> But then the caller can't know ahead of time, it will only find out when
> >>>>> it's too late, and can't decide not to use the DMA engine if it doesn't
> >>>>> support the feature. I don't think that's a very good option.
> >>>> 
> >>>> Agreed so lets go with adding these in caps.
> >>> 
> >>> So if there's a need for caps anyway, why not a cap that marks
> >>> .issue_pending() as moving from the current cyclic transfer to the next
> >>> one ? 
> >> 
> >> Is the overhead really too much on that :) If you like I can send the
> >> core patches and you would need to implement the driver side?
> > 
> > We can try that as a compromise. One of main concerns with developing
> > the core patches myself is that the .terminate_cookie() API still seems
> > ill-defined to me, so it would be much more efficient if you translate
> 
> yeah lets take a stab at defining this and see if we come up with
> something meaningful
> 
> > the idea you have in your idea into code than trying to communicate it
> > to me in all details (one of the grey areas is what should
> > .terminate_cookie() do if the cookie passed to the function corresponds
> > to an already terminated or, more tricky from a completion callback
> > point of view, an issued but not-yet-started transfer, or also a
> > submitted but not issued transfer). If you implement the core part, then
> > that problem will go away.
> > 
> > How about the implementation in virt-dma.[ch] by the way ?
> 
> It needs to be comprehended and tested as well.. since these are simple
> callbacks to driver, we should not need huge changes here (i need to
> double check though)
Laurent Pinchart March 11, 2020, 11:15 p.m. UTC | #35
Hi Peter,

On Fri, Mar 06, 2020 at 04:49:01PM +0200, Peter Ujfalusi wrote:
> On 04/03/2020 18.00, Laurent Pinchart wrote:
> > I still think this is a different API. We'll have
> > 
> > 1. Existing .issue_pending(), queueing the next transfer for non-cyclic
> >    cases, and being a no-op for cyclic cases.
> > 2. New .terminate_cookie(AT_END_OF_TRANSFER), being a no-op for
> >    non-cyclic cases, and moving to the next transfer for cyclic cases.
> > 3. New .terminate_cookie(ABORT_IMMEDIATELY), applicable to both cyclic
> >    and non-cyclic cases.
> > 
> > 3. is an API I don't need, and can't easily test. I agree that it can
> > have use cases (provided the DMA device can abort an ongoing transfer
> > *and* still support DMA_RESIDUE_GRANULARITY_BURST in that case).
> > 
> > I'm troubled by my inability to convince you that 1. and 2. are really
> > the same, with 1. addressing the non-cyclic case and 2. addressing the
> > cyclic case :-) This is why I think they should both be implemeted using
> > .issue_pending() (no other option for 1., that's what it uses today).
> > This wouldn't prevent implementing 3. with a new .terminate_cookie()
> > operation, that wouldn't need to take a flag as it would always operate
> > in ABORT_IMMEDIATELY mode. There would also be no need to report a new
> > capability for 3., as the presence of the .terminate_cookie() handler
> > would be enough to tell clients that the API is supported. Only a new
> > capability for 2. would be needed.
> 
> Let's see the two cases, AT_END_OF_TRANSFER and ABORT_IMMEDIATELY
> against cyclic and slave for simplicity:
> - AT_END_OF_TRANSFER
> ...
> issue_pending(1)
> issue_pending(2)
> terminate_cookie(AT_END_OF_TRANSFER)
> 
> In case of cyclic:
> When cookie1 finishes a tx cookie2 will start.
> 
> Same sequence in case of slave:
> When cookie1 finishes a tx cookie2 will start.
>  Yes, terminate_cookie(AT_END_OF_TRANSFER) is NOP
> 
> - ABORT_IMMEDIATELY
> ...
> issue_pending(1)
> issue_pending(2)
> terminate_cookie(ABORT_IMMEDIATELY)
> 
> In case of cyclic and slave:
> Abort cookie1 right away and start cookie2.
> 
> In case of cyclic:
> When cookie1 finishes a tx cookie2 will start.

Is this paragraph a copy & paste leftover ?

> True, we have NOP operation, but as you can see the semantics of the two
> cases are well defined and consistent among different operations.

I'm not disputing that, but I still think that the semantics for the
proposal based solely on issue_pending() is well-defined too and
consistent among different operations :-) My point is that
terminate_cookie() is only required for the ABORT_IMMEDIATELY case,
which could be implemented on top of my proposal. Anyway, I seem to have
failed in my attempt to convincing Vinod, and he proposed providing the
implementation of terminate_cookie() in the DMA engine core and doc, so
I'll rebase the driver on top of that and submit the two together after
testing.

> Imho the only thing which is not really defined is the
> AT_END_OF_TRANSFER, is it after the current period, or when finishing
> the buffer / after a frame or all frames are consumed in the current tx
> for interleaved.

For 2D interleaved cyclic transfers, there's a single period, so that's
not an issue. For the existing cyclic API it's up to us to decide, and I
don't have enough insight on the expected usage and hardware features to
answer that question.

> >>>> And with this I think it would make sense to also add this to
> >>>> capabilities :)
> >>>
> >>> I'll repeat the comment I made to Peter: you want me to implement a
> >>> feature that you think would be useful, but is completely unrelated to
> >>> my use case, while there's a more natural way to handle my issue with
> >>> the current API, without precluding in any way the addition of your new
> >>> feature in the future. Not fair.
> >>
> >> So from API design pov, I would like this to support both the features.
> >> This helps us to not rework the API again for the immediate abort.
> >>
> >> I am not expecting this to be implemented by you if your hw doesn't
> >> support it. The core changes are pretty minimal and callback in the
> >> driver is the one which does the job and yours wont do this
> > 
> > Xilinx DMA drivers don't support DMA_RESIDUE_GRANULARITY_BURST so I
> > can't test this indeed.
> 
> All TI DMA supports it ;)

Great, so you can implement this feature ;-)

> >>>>>> So, the .terminate_cookie() should be a feature for all type of txn's.
> >>>>>> If for some reason (dont discount what hw designers can do) a controller
> >>>>>> supports this for some specific type(s), then they should return
> >>>>>> -ENOTSUPP for cookies that do not support and let the caller know.
> >>>>>
> >>>>> But then the caller can't know ahead of time, it will only find out when
> >>>>> it's too late, and can't decide not to use the DMA engine if it doesn't
> >>>>> support the feature. I don't think that's a very good option.
> >>>>
> >>>> Agreed so lets go with adding these in caps.
> >>>
> >>> So if there's a need for caps anyway, why not a cap that marks
> >>> .issue_pending() as moving from the current cyclic transfer to the next
> >>> one ? 
> >>
> >> Is the overhead really too much on that :) If you like I can send the
> >> core patches and you would need to implement the driver side?
> > 
> > We can try that as a compromise. One of main concerns with developing
> > the core patches myself is that the .terminate_cookie() API still seems
> > ill-defined to me, so it would be much more efficient if you translate
> > the idea you have in your idea into code than trying to communicate it
> > to me in all details (one of the grey areas is what should
> > .terminate_cookie() do if the cookie passed to the function corresponds
> > to an already terminated or, more tricky from a completion callback
> > point of view, an issued but not-yet-started transfer, or also a
> > submitted but not issued transfer). If you implement the core part, then
> > that problem will go away.
> > 
> > How about the implementation in virt-dma.[ch] by the way ?
Laurent Pinchart March 18, 2020, 3:14 p.m. UTC | #36
Hi Vinod,

On Wed, Mar 11, 2020 at 05:52:48PM +0200, Laurent Pinchart wrote:
> On Wed, Mar 04, 2020 at 09:54:26PM +0530, Vinod Koul wrote:
> > On 04-03-20, 18:00, Laurent Pinchart wrote:
> >> On Wed, Mar 04, 2020 at 09:07:18PM +0530, Vinod Koul wrote:
> >>> On 04-03-20, 10:01, Laurent Pinchart wrote:
> >>>> On Wed, Mar 04, 2020 at 10:43:01AM +0530, Vinod Koul wrote:
> >>>>> On 03-03-20, 21:22, Laurent Pinchart wrote:
> >>>>>> On Tue, Mar 03, 2020 at 10:02:54AM +0530, Vinod Koul wrote:
> >>>>>>> On 02-03-20, 09:37, Laurent Pinchart wrote:
> >>>>>>>>> I would be more comfortable in calling an API to do so :)
> >>>>>>>>> The flow I am thinking is:
> >>>>>>>>> 
> >>>>>>>>> - prep cyclic1 txn
> >>>>>>>>> - submit cyclic1 txn
> >>>>>>>>> - call issue_pending() (cyclic one starts)
> >>>>>>>>> 
> >>>>>>>>> - prep cyclic2 txn
> >>>>>>>>> - submit cyclic2 txn
> >>>>>>>>> - signal_cyclic1_txn aka terminate_cookie()
> >>>>>>>>> - cyclic1 completes, switch to cyclic2 (dmaengine driver)
> >>>>>>>>> - get callback for cyclic1 (optional)
> >>>>>>>>> 
> >>>>>>>>> To check if hw supports terminate_cookie() or not we can check if the
> >>>>>>>>> callback support is implemented
> >>>>>>>> 
> >>>>>>>> Two questions though:
> >>>>>>>> 
> >>>>>>>> - Where is .issue_pending() called for cyclic2 in your above sequence ?
> >>>>>>>>   Surely it should be called somewhere, as the DMA engine API requires
> >>>>>>>>   .issue_pending() to be called for a transfer to be executed, otherwise
> >>>>>>>>   it stays in the submitted but not pending queue.
> >>>>>>> 
> >>>>>>> Sorry missed that one, I would do that after submit cyclic2 txn step and
> >>>>>>> then signal signal_cyclic1_txn termination
> >>>>>> 
> >>>>>> OK, that matches my understanding, good :-)
> >>>>>> 
> >>>>>>>> - With the introduction of a new .terminate_cookie() operation, we need
> >>>>>>>>   to specify that operation for all transfer types. What's its
> >>>>>>> 
> >>>>>>> Correct
> >>>>>>> 
> >>>>>>>>   envisioned semantics for non-cyclic transfers ? And how do DMA engine
> >>>>>>>>   drivers report that they support .terminate_cookie() for cyclic
> >>>>>>>>   transfers but not for other transfer types (the counterpart of
> >>>>>>>>   reporting, in my proposition, that .issue_pending() isn't supported
> >>>>>>>>   replace the current cyclic transfer) ?
> >>>>>>> 
> >>>>>>> Typically for dmaengine controller cyclic is *not* a special mode, only
> >>>>>>> change is that a list provided to controller is circular.
> >>>>>> 
> >>>>>> I don't agree with this. For cyclic transfers to be replaceable in a
> >>>>>> clean way, the feature must be specifically implemented at the hardware
> >>>>>> level. A DMA engine that supports chaining transfers with an explicit
> >>>>>> way to override that chaining, and without the logic to report if the
> >>>>>> inherent race was lost or not, really can't support this API.
> >>>>> 
> >>>>> Well chaining is a typical feature in dmaengine and making last chain
> >>>>> point to first makes it circular. I have seen couple of engines and this
> >>>>> was the implementation in the hardware.
> >>>>> 
> >>>>> There can exist special hardware for this purposes as well, but the
> >>>>> point is that the cyclic can be treated as circular list.
> >>>>> 
> >>>>>> Furthemore, for non-cyclic transfers, what would .terminate_cookie() do
> >>>>>> ? I need it to be defined as terminating the current transfer when it
> >>>>>> ends for the cyclic case, not terminating it immediately. All non-cyclic
> >>>>>> transfers terminate by themselves when they end, so what would this new
> >>>>>> operation do ?
> >>>>> 
> >>>>> I would use it for two purposes, cancelling txn but at the end of
> >>>>> current txn. I have couple of usages where this would be helpful.
> >>>> 
> >>>> I fail to see how that would help. Non-cyclic transfers always stop at
> >>>> the end of the transfer. "Cancelling txn but at the end of current txn"
> >>>> is what DMA engine drivers already do if you call .terminate_cookie() on
> >>>> the ongoing transfer. It would thus be a no-op.
> >>> 
> >>> Well that actually depends on the hardware, some of them support abort
> >>> so people cancel it (terminate_all approach atm)
> >> 
> >> In that case it's not terminating at the end of the current transfer,
> >> but terminating immediately (a.k.a. aborting), right ? Cancelling at the
> >> end of the current transfer still seems to be a no-op to me for
> >> non-cyclic transfers, as that's what they do on their own already.
> > 
> > Correct, it is abort for current txn.
> > 
> >>>>> Second in error handling where some engines do not support
> >>>>> aborting (unless we reset the whole controller)
> >>>> 
> >>>> Could you explain that one ? I'm not sure to understand it.
> >>> 
> >>> So I have dma to a slow peripheral and it is stuck for some reason. I
> >>> want to abort the cookie and let subsequent ones runs (btw this is for
> >>> non cyclic case), so I would use that here. Today we terminate_all and
> >>> then resubmit...
> >> 
> >> That's also for immediate abort, right ?
> > 
> > Right
> > 
> >> For this to work properly we need very accurate residue reporting, as
> >> the client will usually need to know exactly what has been transferred.
> >> The device would need to support DMA_RESIDUE_GRANULARITY_BURST when
> >> aborting an ongoing transfer. What hardware supports this ?
> > 
> >  git grep DMA_RESIDUE_GRANULARITY_BURST drivers/dma/ |wc -l
> > 27
> > 
> > So it seems many do support the burst reporting.
> 
> Yes, but not all of those may support aborting a transfer *and*
> reporting the exact residue of cancelled transfers. We need both to
> implement your proposal.
> 
> >>>>> But yes the .terminate_cookie() semantics should indicate if the
> >>>>> termination should be immediate or end of current txn. I see people
> >>>>> using it for both.
> >>>> 
> >>>> Immediate termination is *not* something I'll implement as I have no
> >>>> good way to test that semantics. I assume you would be fine with leaving
> >>>> that for later, when someone will need it ?
> >>> 
> >>> Sure, if you have hw to support please test. If not, you will not
> >>> implement that.
> >>> 
> >>> The point is that API should support it and people can add support in
> >>> the controllers and test :)
> >> 
> >> I still think this is a different API. We'll have
> >> 
> >> 1. Existing .issue_pending(), queueing the next transfer for non-cyclic
> >>    cases, and being a no-op for cyclic cases.
> >> 2. New .terminate_cookie(AT_END_OF_TRANSFER), being a no-op for
> >>    non-cyclic cases, and moving to the next transfer for cyclic cases.
> >> 3. New .terminate_cookie(ABORT_IMMEDIATELY), applicable to both cyclic
> >>    and non-cyclic cases.
> >> 
> >> 3. is an API I don't need, and can't easily test. I agree that it can
> >> have use cases (provided the DMA device can abort an ongoing transfer
> >> *and* still support DMA_RESIDUE_GRANULARITY_BURST in that case).
> >> 
> >> I'm troubled by my inability to convince you that 1. and 2. are really
> >> the same, with 1. addressing the non-cyclic case and 2. addressing the
> >> cyclic case :-) This is why I think they should both be implemeted using
> >> .issue_pending() (no other option for 1., that's what it uses today).
> >> This wouldn't prevent implementing 3. with a new .terminate_cookie()
> >> operation, that wouldn't need to take a flag as it would always operate
> >> in ABORT_IMMEDIATELY mode. There would also be no need to report a new
> >> capability for 3., as the presence of the .terminate_cookie() handler
> >> would be enough to tell clients that the API is supported. Only a new
> >> capability for 2. would be needed.
> > 
> > Well I agree 1 & 2 seem similar but I would like to define the behaviour
> > not dependent on the txn being cyclic or not. That is my concern and
> > hence the idea that:
> > 
> > 1. .issue_pending() will push txn to pending_queue, you may have a case
> > where that is done only once (due to nature of txn), but no other
> > implication
> > 
> > 2. .terminate_cookie(EOT) will abort the transfer at the end. Maybe not
> > used for cyclic but irrespective of that, the behaviour would be abort
> > at end of cyclic
> 
> Did you mean "maybe not used for non-cyclic" ?
> 
> > 3. .terminate_cookie(IMMEDIATE) will abort immediately. If there is
> > anything in pending_queue that will get pushed to hardware.
> > 
> > 4. Cyclic by nature never completes
> >    - as a consequence needs to be stopped by terminate_all/terminate_cookie
> > 
> > Does these rules make sense :)
> 
> It's a set of rules that I think can handle my use case, but I still
> believe my proposal based on just .issue_pending() would be simpler, in
> line with the existing API concepts, and wouldn't preclude the addition
> of .terminate_cookie(IMMEDIATE) at a later point. It's your call though,
> especially if you provide the implementation :-) When do you think you
> will be able to do so ?

Gentle ping :-)

> >>>>> And with this I think it would make sense to also add this to
> >>>>> capabilities :)
> >>>> 
> >>>> I'll repeat the comment I made to Peter: you want me to implement a
> >>>> feature that you think would be useful, but is completely unrelated to
> >>>> my use case, while there's a more natural way to handle my issue with
> >>>> the current API, without precluding in any way the addition of your new
> >>>> feature in the future. Not fair.
> >>> 
> >>> So from API design pov, I would like this to support both the features.
> >>> This helps us to not rework the API again for the immediate abort.
> >>> 
> >>> I am not expecting this to be implemented by you if your hw doesn't
> >>> support it. The core changes are pretty minimal and callback in the
> >>> driver is the one which does the job and yours wont do this
> >> 
> >> Xilinx DMA drivers don't support DMA_RESIDUE_GRANULARITY_BURST so I
> >> can't test this indeed.
> > 
> > Sure I understand that! Am sure folks will respond to CFT and I guess
> > Peter will also be interested in testing.
> 
> s/testing/implementing it/ :-)
> 
> >>>>>>> So, the .terminate_cookie() should be a feature for all type of txn's.
> >>>>>>> If for some reason (dont discount what hw designers can do) a controller
> >>>>>>> supports this for some specific type(s), then they should return
> >>>>>>> -ENOTSUPP for cookies that do not support and let the caller know.
> >>>>>> 
> >>>>>> But then the caller can't know ahead of time, it will only find out when
> >>>>>> it's too late, and can't decide not to use the DMA engine if it doesn't
> >>>>>> support the feature. I don't think that's a very good option.
> >>>>> 
> >>>>> Agreed so lets go with adding these in caps.
> >>>> 
> >>>> So if there's a need for caps anyway, why not a cap that marks
> >>>> .issue_pending() as moving from the current cyclic transfer to the next
> >>>> one ? 
> >>> 
> >>> Is the overhead really too much on that :) If you like I can send the
> >>> core patches and you would need to implement the driver side?
> >> 
> >> We can try that as a compromise. One of main concerns with developing
> >> the core patches myself is that the .terminate_cookie() API still seems
> >> ill-defined to me, so it would be much more efficient if you translate
> > 
> > yeah lets take a stab at defining this and see if we come up with
> > something meaningful
> > 
> >> the idea you have in your idea into code than trying to communicate it
> >> to me in all details (one of the grey areas is what should
> >> .terminate_cookie() do if the cookie passed to the function corresponds
> >> to an already terminated or, more tricky from a completion callback
> >> point of view, an issued but not-yet-started transfer, or also a
> >> submitted but not issued transfer). If you implement the core part, then
> >> that problem will go away.
> >> 
> >> How about the implementation in virt-dma.[ch] by the way ?
> > 
> > It needs to be comprehended and tested as well.. since these are simple
> > callbacks to driver, we should not need huge changes here (i need to
> > double check though)
Laurent Pinchart March 25, 2020, 4 p.m. UTC | #37
Hi Vinod,

On Wed, Mar 18, 2020 at 05:14:27PM +0200, Laurent Pinchart wrote:
> On Wed, Mar 11, 2020 at 05:52:48PM +0200, Laurent Pinchart wrote:
> > On Wed, Mar 04, 2020 at 09:54:26PM +0530, Vinod Koul wrote:
> >> On 04-03-20, 18:00, Laurent Pinchart wrote:
> >>> On Wed, Mar 04, 2020 at 09:07:18PM +0530, Vinod Koul wrote:
> >>>> On 04-03-20, 10:01, Laurent Pinchart wrote:
> >>>>> On Wed, Mar 04, 2020 at 10:43:01AM +0530, Vinod Koul wrote:
> >>>>>> On 03-03-20, 21:22, Laurent Pinchart wrote:
> >>>>>>> On Tue, Mar 03, 2020 at 10:02:54AM +0530, Vinod Koul wrote:
> >>>>>>>> On 02-03-20, 09:37, Laurent Pinchart wrote:
> >>>>>>>>>> I would be more comfortable in calling an API to do so :)
> >>>>>>>>>> The flow I am thinking is:
> >>>>>>>>>> 
> >>>>>>>>>> - prep cyclic1 txn
> >>>>>>>>>> - submit cyclic1 txn
> >>>>>>>>>> - call issue_pending() (cyclic one starts)
> >>>>>>>>>> 
> >>>>>>>>>> - prep cyclic2 txn
> >>>>>>>>>> - submit cyclic2 txn
> >>>>>>>>>> - signal_cyclic1_txn aka terminate_cookie()
> >>>>>>>>>> - cyclic1 completes, switch to cyclic2 (dmaengine driver)
> >>>>>>>>>> - get callback for cyclic1 (optional)
> >>>>>>>>>> 
> >>>>>>>>>> To check if hw supports terminate_cookie() or not we can check if the
> >>>>>>>>>> callback support is implemented
> >>>>>>>>> 
> >>>>>>>>> Two questions though:
> >>>>>>>>> 
> >>>>>>>>> - Where is .issue_pending() called for cyclic2 in your above sequence ?
> >>>>>>>>>   Surely it should be called somewhere, as the DMA engine API requires
> >>>>>>>>>   .issue_pending() to be called for a transfer to be executed, otherwise
> >>>>>>>>>   it stays in the submitted but not pending queue.
> >>>>>>>> 
> >>>>>>>> Sorry missed that one, I would do that after submit cyclic2 txn step and
> >>>>>>>> then signal signal_cyclic1_txn termination
> >>>>>>> 
> >>>>>>> OK, that matches my understanding, good :-)
> >>>>>>> 
> >>>>>>>>> - With the introduction of a new .terminate_cookie() operation, we need
> >>>>>>>>>   to specify that operation for all transfer types. What's its
> >>>>>>>> 
> >>>>>>>> Correct
> >>>>>>>> 
> >>>>>>>>>   envisioned semantics for non-cyclic transfers ? And how do DMA engine
> >>>>>>>>>   drivers report that they support .terminate_cookie() for cyclic
> >>>>>>>>>   transfers but not for other transfer types (the counterpart of
> >>>>>>>>>   reporting, in my proposition, that .issue_pending() isn't supported
> >>>>>>>>>   replace the current cyclic transfer) ?
> >>>>>>>> 
> >>>>>>>> Typically for dmaengine controller cyclic is *not* a special mode, only
> >>>>>>>> change is that a list provided to controller is circular.
> >>>>>>> 
> >>>>>>> I don't agree with this. For cyclic transfers to be replaceable in a
> >>>>>>> clean way, the feature must be specifically implemented at the hardware
> >>>>>>> level. A DMA engine that supports chaining transfers with an explicit
> >>>>>>> way to override that chaining, and without the logic to report if the
> >>>>>>> inherent race was lost or not, really can't support this API.
> >>>>>> 
> >>>>>> Well chaining is a typical feature in dmaengine and making last chain
> >>>>>> point to first makes it circular. I have seen couple of engines and this
> >>>>>> was the implementation in the hardware.
> >>>>>> 
> >>>>>> There can exist special hardware for this purposes as well, but the
> >>>>>> point is that the cyclic can be treated as circular list.
> >>>>>> 
> >>>>>>> Furthemore, for non-cyclic transfers, what would .terminate_cookie() do
> >>>>>>> ? I need it to be defined as terminating the current transfer when it
> >>>>>>> ends for the cyclic case, not terminating it immediately. All non-cyclic
> >>>>>>> transfers terminate by themselves when they end, so what would this new
> >>>>>>> operation do ?
> >>>>>> 
> >>>>>> I would use it for two purposes, cancelling txn but at the end of
> >>>>>> current txn. I have couple of usages where this would be helpful.
> >>>>> 
> >>>>> I fail to see how that would help. Non-cyclic transfers always stop at
> >>>>> the end of the transfer. "Cancelling txn but at the end of current txn"
> >>>>> is what DMA engine drivers already do if you call .terminate_cookie() on
> >>>>> the ongoing transfer. It would thus be a no-op.
> >>>> 
> >>>> Well that actually depends on the hardware, some of them support abort
> >>>> so people cancel it (terminate_all approach atm)
> >>> 
> >>> In that case it's not terminating at the end of the current transfer,
> >>> but terminating immediately (a.k.a. aborting), right ? Cancelling at the
> >>> end of the current transfer still seems to be a no-op to me for
> >>> non-cyclic transfers, as that's what they do on their own already.
> >> 
> >> Correct, it is abort for current txn.
> >> 
> >>>>>> Second in error handling where some engines do not support
> >>>>>> aborting (unless we reset the whole controller)
> >>>>> 
> >>>>> Could you explain that one ? I'm not sure to understand it.
> >>>> 
> >>>> So I have dma to a slow peripheral and it is stuck for some reason. I
> >>>> want to abort the cookie and let subsequent ones runs (btw this is for
> >>>> non cyclic case), so I would use that here. Today we terminate_all and
> >>>> then resubmit...
> >>> 
> >>> That's also for immediate abort, right ?
> >> 
> >> Right
> >> 
> >>> For this to work properly we need very accurate residue reporting, as
> >>> the client will usually need to know exactly what has been transferred.
> >>> The device would need to support DMA_RESIDUE_GRANULARITY_BURST when
> >>> aborting an ongoing transfer. What hardware supports this ?
> >> 
> >>  git grep DMA_RESIDUE_GRANULARITY_BURST drivers/dma/ |wc -l
> >> 27
> >> 
> >> So it seems many do support the burst reporting.
> > 
> > Yes, but not all of those may support aborting a transfer *and*
> > reporting the exact residue of cancelled transfers. We need both to
> > implement your proposal.
> > 
> >>>>>> But yes the .terminate_cookie() semantics should indicate if the
> >>>>>> termination should be immediate or end of current txn. I see people
> >>>>>> using it for both.
> >>>>> 
> >>>>> Immediate termination is *not* something I'll implement as I have no
> >>>>> good way to test that semantics. I assume you would be fine with leaving
> >>>>> that for later, when someone will need it ?
> >>>> 
> >>>> Sure, if you have hw to support please test. If not, you will not
> >>>> implement that.
> >>>> 
> >>>> The point is that API should support it and people can add support in
> >>>> the controllers and test :)
> >>> 
> >>> I still think this is a different API. We'll have
> >>> 
> >>> 1. Existing .issue_pending(), queueing the next transfer for non-cyclic
> >>>    cases, and being a no-op for cyclic cases.
> >>> 2. New .terminate_cookie(AT_END_OF_TRANSFER), being a no-op for
> >>>    non-cyclic cases, and moving to the next transfer for cyclic cases.
> >>> 3. New .terminate_cookie(ABORT_IMMEDIATELY), applicable to both cyclic
> >>>    and non-cyclic cases.
> >>> 
> >>> 3. is an API I don't need, and can't easily test. I agree that it can
> >>> have use cases (provided the DMA device can abort an ongoing transfer
> >>> *and* still support DMA_RESIDUE_GRANULARITY_BURST in that case).
> >>> 
> >>> I'm troubled by my inability to convince you that 1. and 2. are really
> >>> the same, with 1. addressing the non-cyclic case and 2. addressing the
> >>> cyclic case :-) This is why I think they should both be implemeted using
> >>> .issue_pending() (no other option for 1., that's what it uses today).
> >>> This wouldn't prevent implementing 3. with a new .terminate_cookie()
> >>> operation, that wouldn't need to take a flag as it would always operate
> >>> in ABORT_IMMEDIATELY mode. There would also be no need to report a new
> >>> capability for 3., as the presence of the .terminate_cookie() handler
> >>> would be enough to tell clients that the API is supported. Only a new
> >>> capability for 2. would be needed.
> >> 
> >> Well I agree 1 & 2 seem similar but I would like to define the behaviour
> >> not dependent on the txn being cyclic or not. That is my concern and
> >> hence the idea that:
> >> 
> >> 1. .issue_pending() will push txn to pending_queue, you may have a case
> >> where that is done only once (due to nature of txn), but no other
> >> implication
> >> 
> >> 2. .terminate_cookie(EOT) will abort the transfer at the end. Maybe not
> >> used for cyclic but irrespective of that, the behaviour would be abort
> >> at end of cyclic
> > 
> > Did you mean "maybe not used for non-cyclic" ?
> > 
> >> 3. .terminate_cookie(IMMEDIATE) will abort immediately. If there is
> >> anything in pending_queue that will get pushed to hardware.
> >> 
> >> 4. Cyclic by nature never completes
> >>    - as a consequence needs to be stopped by terminate_all/terminate_cookie
> >> 
> >> Does these rules make sense :)
> > 
> > It's a set of rules that I think can handle my use case, but I still
> > believe my proposal based on just .issue_pending() would be simpler, in
> > line with the existing API concepts, and wouldn't preclude the addition
> > of .terminate_cookie(IMMEDIATE) at a later point. It's your call though,
> > especially if you provide the implementation :-) When do you think you
> > will be able to do so ?
> 
> Gentle ping :-)

Any update ?

> >>>>>> And with this I think it would make sense to also add this to
> >>>>>> capabilities :)
> >>>>> 
> >>>>> I'll repeat the comment I made to Peter: you want me to implement a
> >>>>> feature that you think would be useful, but is completely unrelated to
> >>>>> my use case, while there's a more natural way to handle my issue with
> >>>>> the current API, without precluding in any way the addition of your new
> >>>>> feature in the future. Not fair.
> >>>> 
> >>>> So from API design pov, I would like this to support both the features.
> >>>> This helps us to not rework the API again for the immediate abort.
> >>>> 
> >>>> I am not expecting this to be implemented by you if your hw doesn't
> >>>> support it. The core changes are pretty minimal and callback in the
> >>>> driver is the one which does the job and yours wont do this
> >>> 
> >>> Xilinx DMA drivers don't support DMA_RESIDUE_GRANULARITY_BURST so I
> >>> can't test this indeed.
> >> 
> >> Sure I understand that! Am sure folks will respond to CFT and I guess
> >> Peter will also be interested in testing.
> > 
> > s/testing/implementing it/ :-)
> > 
> >>>>>>>> So, the .terminate_cookie() should be a feature for all type of txn's.
> >>>>>>>> If for some reason (dont discount what hw designers can do) a controller
> >>>>>>>> supports this for some specific type(s), then they should return
> >>>>>>>> -ENOTSUPP for cookies that do not support and let the caller know.
> >>>>>>> 
> >>>>>>> But then the caller can't know ahead of time, it will only find out when
> >>>>>>> it's too late, and can't decide not to use the DMA engine if it doesn't
> >>>>>>> support the feature. I don't think that's a very good option.
> >>>>>> 
> >>>>>> Agreed so lets go with adding these in caps.
> >>>>> 
> >>>>> So if there's a need for caps anyway, why not a cap that marks
> >>>>> .issue_pending() as moving from the current cyclic transfer to the next
> >>>>> one ? 
> >>>> 
> >>>> Is the overhead really too much on that :) If you like I can send the
> >>>> core patches and you would need to implement the driver side?
> >>> 
> >>> We can try that as a compromise. One of main concerns with developing
> >>> the core patches myself is that the .terminate_cookie() API still seems
> >>> ill-defined to me, so it would be much more efficient if you translate
> >> 
> >> yeah lets take a stab at defining this and see if we come up with
> >> something meaningful
> >> 
> >>> the idea you have in your idea into code than trying to communicate it
> >>> to me in all details (one of the grey areas is what should
> >>> .terminate_cookie() do if the cookie passed to the function corresponds
> >>> to an already terminated or, more tricky from a completion callback
> >>> point of view, an issued but not-yet-started transfer, or also a
> >>> submitted but not issued transfer). If you implement the core part, then
> >>> that problem will go away.
> >>> 
> >>> How about the implementation in virt-dma.[ch] by the way ?
> >> 
> >> It needs to be comprehended and tested as well.. since these are simple
> >> callbacks to driver, we should not need huge changes here (i need to
> >> double check though)
Vinod Koul March 26, 2020, 7:02 a.m. UTC | #38
Hi Laurent,

Sorry for delay in replying..

On 11-03-20, 17:52, Laurent Pinchart wrote:
> On Wed, Mar 04, 2020 at 09:54:26PM +0530, Vinod Koul wrote:
> > >>>> Second in error handling where some engines do not support
> > >>>> aborting (unless we reset the whole controller)
> > >>> 
> > >>> Could you explain that one ? I'm not sure to understand it.
> > >> 
> > >> So I have dma to a slow peripheral and it is stuck for some reason. I
> > >> want to abort the cookie and let subsequent ones runs (btw this is for
> > >> non cyclic case), so I would use that here. Today we terminate_all and
> > >> then resubmit...
> > > 
> > > That's also for immediate abort, right ?
> > 
> > Right
> > 
> > > For this to work properly we need very accurate residue reporting, as
> > > the client will usually need to know exactly what has been transferred.
> > > The device would need to support DMA_RESIDUE_GRANULARITY_BURST when
> > > aborting an ongoing transfer. What hardware supports this ?
> > 
> >  git grep DMA_RESIDUE_GRANULARITY_BURST drivers/dma/ |wc -l
> > 27
> > 
> > So it seems many do support the burst reporting.
> 
> Yes, but not all of those may support aborting a transfer *and*
> reporting the exact residue of cancelled transfers. We need both to
> implement your proposal.

Reporting residue is already implemented, please see  struct
dmaengine_result. This can be passed by a callback
dma_async_tx_callback_result() in struct dma_async_tx_descriptor.

> > >>>> But yes the .terminate_cookie() semantics should indicate if the
> > >>>> termination should be immediate or end of current txn. I see people
> > >>>> using it for both.
> > >>> 
> > >>> Immediate termination is *not* something I'll implement as I have no
> > >>> good way to test that semantics. I assume you would be fine with leaving
> > >>> that for later, when someone will need it ?
> > >> 
> > >> Sure, if you have hw to support please test. If not, you will not
> > >> implement that.
> > >> 
> > >> The point is that API should support it and people can add support in
> > >> the controllers and test :)
> > > 
> > > I still think this is a different API. We'll have
> > > 
> > > 1. Existing .issue_pending(), queueing the next transfer for non-cyclic
> > >    cases, and being a no-op for cyclic cases.
> > > 2. New .terminate_cookie(AT_END_OF_TRANSFER), being a no-op for
> > >    non-cyclic cases, and moving to the next transfer for cyclic cases.
> > > 3. New .terminate_cookie(ABORT_IMMEDIATELY), applicable to both cyclic
> > >    and non-cyclic cases.
> > > 
> > > 3. is an API I don't need, and can't easily test. I agree that it can
> > > have use cases (provided the DMA device can abort an ongoing transfer
> > > *and* still support DMA_RESIDUE_GRANULARITY_BURST in that case).
> > > 
> > > I'm troubled by my inability to convince you that 1. and 2. are really
> > > the same, with 1. addressing the non-cyclic case and 2. addressing the
> > > cyclic case :-) This is why I think they should both be implemeted using
> > > .issue_pending() (no other option for 1., that's what it uses today).
> > > This wouldn't prevent implementing 3. with a new .terminate_cookie()
> > > operation, that wouldn't need to take a flag as it would always operate
> > > in ABORT_IMMEDIATELY mode. There would also be no need to report a new
> > > capability for 3., as the presence of the .terminate_cookie() handler
> > > would be enough to tell clients that the API is supported. Only a new
> > > capability for 2. would be needed.
> > 
> > Well I agree 1 & 2 seem similar but I would like to define the behaviour
> > not dependent on the txn being cyclic or not. That is my concern and
> > hence the idea that:
> > 
> > 1. .issue_pending() will push txn to pending_queue, you may have a case
> > where that is done only once (due to nature of txn), but no other
> > implication
> > 
> > 2. .terminate_cookie(EOT) will abort the transfer at the end. Maybe not
> > used for cyclic but irrespective of that, the behaviour would be abort
> > at end of cyclic
> 
> Did you mean "maybe not used for non-cyclic" ?

Yes I think so..

> > 3. .terminate_cookie(IMMEDIATE) will abort immediately. If there is
> > anything in pending_queue that will get pushed to hardware.
> > 
> > 4. Cyclic by nature never completes
> >    - as a consequence needs to be stopped by terminate_all/terminate_cookie
> > 
> > Does these rules make sense :)
> 
> It's a set of rules that I think can handle my use case, but I still
> believe my proposal based on just .issue_pending() would be simpler, in
> line with the existing API concepts, and wouldn't preclude the addition
> of .terminate_cookie(IMMEDIATE) at a later point. It's your call though,
> especially if you provide the implementation :-) When do you think you
> will be able to do so ?

I will try to take a stab at it once merge window opens.. will let you
and Peter for sneak preview once I start on it :)

> > >>>> And with this I think it would make sense to also add this to
> > >>>> capabilities :)
> > >>> 
> > >>> I'll repeat the comment I made to Peter: you want me to implement a
> > >>> feature that you think would be useful, but is completely unrelated to
> > >>> my use case, while there's a more natural way to handle my issue with
> > >>> the current API, without precluding in any way the addition of your new
> > >>> feature in the future. Not fair.
> > >> 
> > >> So from API design pov, I would like this to support both the features.
> > >> This helps us to not rework the API again for the immediate abort.
> > >> 
> > >> I am not expecting this to be implemented by you if your hw doesn't
> > >> support it. The core changes are pretty minimal and callback in the
> > >> driver is the one which does the job and yours wont do this
> > > 
> > > Xilinx DMA drivers don't support DMA_RESIDUE_GRANULARITY_BURST so I
> > > can't test this indeed.
> > 
> > Sure I understand that! Am sure folks will respond to CFT and I guess
> > Peter will also be interested in testing.
> 
> s/testing/implementing it/ :-)

Even better :)
Laurent Pinchart April 8, 2020, 5 p.m. UTC | #39
Hi Vinod,

On Thu, Mar 26, 2020 at 12:32:34PM +0530, Vinod Koul wrote:
> On 11-03-20, 17:52, Laurent Pinchart wrote:
> > On Wed, Mar 04, 2020 at 09:54:26PM +0530, Vinod Koul wrote:
> >>>>>> Second in error handling where some engines do not support
> >>>>>> aborting (unless we reset the whole controller)
> >>>>> 
> >>>>> Could you explain that one ? I'm not sure to understand it.
> >>>> 
> >>>> So I have dma to a slow peripheral and it is stuck for some reason. I
> >>>> want to abort the cookie and let subsequent ones runs (btw this is for
> >>>> non cyclic case), so I would use that here. Today we terminate_all and
> >>>> then resubmit...
> >>> 
> >>> That's also for immediate abort, right ?
> >> 
> >> Right
> >> 
> >>> For this to work properly we need very accurate residue reporting, as
> >>> the client will usually need to know exactly what has been transferred.
> >>> The device would need to support DMA_RESIDUE_GRANULARITY_BURST when
> >>> aborting an ongoing transfer. What hardware supports this ?
> >> 
> >>  git grep DMA_RESIDUE_GRANULARITY_BURST drivers/dma/ |wc -l
> >> 27
> >> 
> >> So it seems many do support the burst reporting.
> > 
> > Yes, but not all of those may support aborting a transfer *and*
> > reporting the exact residue of cancelled transfers. We need both to
> > implement your proposal.
> 
> Reporting residue is already implemented, please see  struct
> dmaengine_result. This can be passed by a callback
> dma_async_tx_callback_result() in struct dma_async_tx_descriptor.

I mean that I don't know if the driver that support
DMA_RESIDUE_GRANULARITY_BURST only support reporting the residue when
the transfer is active, or also support reporting it when cancelling a
transfer. Maybe all of them do, maybe a subset of them do, so I can't
tell if this would be a feature that could be widely supported.

> >>>>>> But yes the .terminate_cookie() semantics should indicate if the
> >>>>>> termination should be immediate or end of current txn. I see people
> >>>>>> using it for both.
> >>>>> 
> >>>>> Immediate termination is *not* something I'll implement as I have no
> >>>>> good way to test that semantics. I assume you would be fine with leaving
> >>>>> that for later, when someone will need it ?
> >>>> 
> >>>> Sure, if you have hw to support please test. If not, you will not
> >>>> implement that.
> >>>> 
> >>>> The point is that API should support it and people can add support in
> >>>> the controllers and test :)
> >>> 
> >>> I still think this is a different API. We'll have
> >>> 
> >>> 1. Existing .issue_pending(), queueing the next transfer for non-cyclic
> >>>    cases, and being a no-op for cyclic cases.
> >>> 2. New .terminate_cookie(AT_END_OF_TRANSFER), being a no-op for
> >>>    non-cyclic cases, and moving to the next transfer for cyclic cases.
> >>> 3. New .terminate_cookie(ABORT_IMMEDIATELY), applicable to both cyclic
> >>>    and non-cyclic cases.
> >>> 
> >>> 3. is an API I don't need, and can't easily test. I agree that it can
> >>> have use cases (provided the DMA device can abort an ongoing transfer
> >>> *and* still support DMA_RESIDUE_GRANULARITY_BURST in that case).
> >>> 
> >>> I'm troubled by my inability to convince you that 1. and 2. are really
> >>> the same, with 1. addressing the non-cyclic case and 2. addressing the
> >>> cyclic case :-) This is why I think they should both be implemeted using
> >>> .issue_pending() (no other option for 1., that's what it uses today).
> >>> This wouldn't prevent implementing 3. with a new .terminate_cookie()
> >>> operation, that wouldn't need to take a flag as it would always operate
> >>> in ABORT_IMMEDIATELY mode. There would also be no need to report a new
> >>> capability for 3., as the presence of the .terminate_cookie() handler
> >>> would be enough to tell clients that the API is supported. Only a new
> >>> capability for 2. would be needed.
> >> 
> >> Well I agree 1 & 2 seem similar but I would like to define the behaviour
> >> not dependent on the txn being cyclic or not. That is my concern and
> >> hence the idea that:
> >> 
> >> 1. .issue_pending() will push txn to pending_queue, you may have a case
> >> where that is done only once (due to nature of txn), but no other
> >> implication
> >> 
> >> 2. .terminate_cookie(EOT) will abort the transfer at the end. Maybe not
> >> used for cyclic but irrespective of that, the behaviour would be abort
> >> at end of cyclic
> > 
> > Did you mean "maybe not used for non-cyclic" ?
> 
> Yes I think so..
> 
> >> 3. .terminate_cookie(IMMEDIATE) will abort immediately. If there is
> >> anything in pending_queue that will get pushed to hardware.
> >> 
> >> 4. Cyclic by nature never completes
> >>    - as a consequence needs to be stopped by terminate_all/terminate_cookie
> >> 
> >> Does these rules make sense :)
> > 
> > It's a set of rules that I think can handle my use case, but I still
> > believe my proposal based on just .issue_pending() would be simpler, in
> > line with the existing API concepts, and wouldn't preclude the addition
> > of .terminate_cookie(IMMEDIATE) at a later point. It's your call though,
> > especially if you provide the implementation :-) When do you think you
> > will be able to do so ?
> 
> I will try to take a stab at it once merge window opens.. will let you
> and Peter for sneak preview once I start on it :)

I started giving it a try as this has been blocked for two months and a
half now.

I very quickly ran into issues as the interface is ill-defined as it
stands.

- What should happen when .terminate_cookie(EOT) is called with no other
  transfer issued, and a new transfer is issued before the current
  transfer terminates ?

- I expect .terminate_cookie() to be asynchronous, as .terminate_all().
  This means that actual termination of cyclic transfers will actually
  be handled at end of transfer, in the interrupt handler. This creates
  race conditions with other operations. It would also make it much more
  difficult to support this feature for devices that require sleeping
  when stopping the DMA engine at the end of a cyclic transfer.

If we have to go forward with this new API, I need a detailed
explanation of how all this should be handled. I still truly believe
this is a case of yak shaving that introduces additional complexity for
absolutely no valid reason, when a solution that is aligned with the
existing API and its concepts exists already. It's your decision as the
subsystem maintainer, but if you want something more complex, please
provide it soon. I don't want to wait another three months to see
progress on this issue.

> >>>>>> And with this I think it would make sense to also add this to
> >>>>>> capabilities :)
> >>>>> 
> >>>>> I'll repeat the comment I made to Peter: you want me to implement a
> >>>>> feature that you think would be useful, but is completely unrelated to
> >>>>> my use case, while there's a more natural way to handle my issue with
> >>>>> the current API, without precluding in any way the addition of your new
> >>>>> feature in the future. Not fair.
> >>>> 
> >>>> So from API design pov, I would like this to support both the features.
> >>>> This helps us to not rework the API again for the immediate abort.
> >>>> 
> >>>> I am not expecting this to be implemented by you if your hw doesn't
> >>>> support it. The core changes are pretty minimal and callback in the
> >>>> driver is the one which does the job and yours wont do this
> >>> 
> >>> Xilinx DMA drivers don't support DMA_RESIDUE_GRANULARITY_BURST so I
> >>> can't test this indeed.
> >> 
> >> Sure I understand that! Am sure folks will respond to CFT and I guess
> >> Peter will also be interested in testing.
> > 
> > s/testing/implementing it/ :-)
> 
> Even better :)
Laurent Pinchart April 15, 2020, 3:12 p.m. UTC | #40
Hi Vinod,

Ping. We need a solution to this problem, it's been way too long
already. If you don't want to accept my proposal, please provide me with
an implementation or a very detailed spec I can implement.

On Wed, Apr 08, 2020 at 08:00:49PM +0300, Laurent Pinchart wrote:
> On Thu, Mar 26, 2020 at 12:32:34PM +0530, Vinod Koul wrote:
> > On 11-03-20, 17:52, Laurent Pinchart wrote:
> >> On Wed, Mar 04, 2020 at 09:54:26PM +0530, Vinod Koul wrote:
> >>>>>>> Second in error handling where some engines do not support
> >>>>>>> aborting (unless we reset the whole controller)
> >>>>>> 
> >>>>>> Could you explain that one ? I'm not sure to understand it.
> >>>>> 
> >>>>> So I have dma to a slow peripheral and it is stuck for some reason. I
> >>>>> want to abort the cookie and let subsequent ones runs (btw this is for
> >>>>> non cyclic case), so I would use that here. Today we terminate_all and
> >>>>> then resubmit...
> >>>> 
> >>>> That's also for immediate abort, right ?
> >>> 
> >>> Right
> >>> 
> >>>> For this to work properly we need very accurate residue reporting, as
> >>>> the client will usually need to know exactly what has been transferred.
> >>>> The device would need to support DMA_RESIDUE_GRANULARITY_BURST when
> >>>> aborting an ongoing transfer. What hardware supports this ?
> >>> 
> >>> git grep DMA_RESIDUE_GRANULARITY_BURST drivers/dma/ |wc -l
> >>> 27
> >>> 
> >>> So it seems many do support the burst reporting.
> >> 
> >> Yes, but not all of those may support aborting a transfer *and*
> >> reporting the exact residue of cancelled transfers. We need both to
> >> implement your proposal.
> > 
> > Reporting residue is already implemented, please see  struct
> > dmaengine_result. This can be passed by a callback
> > dma_async_tx_callback_result() in struct dma_async_tx_descriptor.
> 
> I mean that I don't know if the driver that support
> DMA_RESIDUE_GRANULARITY_BURST only support reporting the residue when
> the transfer is active, or also support reporting it when cancelling a
> transfer. Maybe all of them do, maybe a subset of them do, so I can't
> tell if this would be a feature that could be widely supported.
> 
> >>>>>>> But yes the .terminate_cookie() semantics should indicate if the
> >>>>>>> termination should be immediate or end of current txn. I see people
> >>>>>>> using it for both.
> >>>>>> 
> >>>>>> Immediate termination is *not* something I'll implement as I have no
> >>>>>> good way to test that semantics. I assume you would be fine with leaving
> >>>>>> that for later, when someone will need it ?
> >>>>> 
> >>>>> Sure, if you have hw to support please test. If not, you will not
> >>>>> implement that.
> >>>>> 
> >>>>> The point is that API should support it and people can add support in
> >>>>> the controllers and test :)
> >>>> 
> >>>> I still think this is a different API. We'll have
> >>>> 
> >>>> 1. Existing .issue_pending(), queueing the next transfer for non-cyclic
> >>>>    cases, and being a no-op for cyclic cases.
> >>>> 2. New .terminate_cookie(AT_END_OF_TRANSFER), being a no-op for
> >>>>    non-cyclic cases, and moving to the next transfer for cyclic cases.
> >>>> 3. New .terminate_cookie(ABORT_IMMEDIATELY), applicable to both cyclic
> >>>>    and non-cyclic cases.
> >>>> 
> >>>> 3. is an API I don't need, and can't easily test. I agree that it can
> >>>> have use cases (provided the DMA device can abort an ongoing transfer
> >>>> *and* still support DMA_RESIDUE_GRANULARITY_BURST in that case).
> >>>> 
> >>>> I'm troubled by my inability to convince you that 1. and 2. are really
> >>>> the same, with 1. addressing the non-cyclic case and 2. addressing the
> >>>> cyclic case :-) This is why I think they should both be implemeted using
> >>>> .issue_pending() (no other option for 1., that's what it uses today).
> >>>> This wouldn't prevent implementing 3. with a new .terminate_cookie()
> >>>> operation, that wouldn't need to take a flag as it would always operate
> >>>> in ABORT_IMMEDIATELY mode. There would also be no need to report a new
> >>>> capability for 3., as the presence of the .terminate_cookie() handler
> >>>> would be enough to tell clients that the API is supported. Only a new
> >>>> capability for 2. would be needed.
> >>> 
> >>> Well I agree 1 & 2 seem similar but I would like to define the behaviour
> >>> not dependent on the txn being cyclic or not. That is my concern and
> >>> hence the idea that:
> >>> 
> >>> 1. .issue_pending() will push txn to pending_queue, you may have a case
> >>> where that is done only once (due to nature of txn), but no other
> >>> implication
> >>> 
> >>> 2. .terminate_cookie(EOT) will abort the transfer at the end. Maybe not
> >>> used for cyclic but irrespective of that, the behaviour would be abort
> >>> at end of cyclic
> >> 
> >> Did you mean "maybe not used for non-cyclic" ?
> > 
> > Yes I think so..
> > 
> >>> 3. .terminate_cookie(IMMEDIATE) will abort immediately. If there is
> >>> anything in pending_queue that will get pushed to hardware.
> >>> 
> >>> 4. Cyclic by nature never completes
> >>>    - as a consequence needs to be stopped by terminate_all/terminate_cookie
> >>> 
> >>> Does these rules make sense :)
> >> 
> >> It's a set of rules that I think can handle my use case, but I still
> >> believe my proposal based on just .issue_pending() would be simpler, in
> >> line with the existing API concepts, and wouldn't preclude the addition
> >> of .terminate_cookie(IMMEDIATE) at a later point. It's your call though,
> >> especially if you provide the implementation :-) When do you think you
> >> will be able to do so ?
> > 
> > I will try to take a stab at it once merge window opens.. will let you
> > and Peter for sneak preview once I start on it :)
> 
> I started giving it a try as this has been blocked for two months and a
> half now.
> 
> I very quickly ran into issues as the interface is ill-defined as it
> stands.
> 
> - What should happen when .terminate_cookie(EOT) is called with no other
>   transfer issued, and a new transfer is issued before the current
>   transfer terminates ?
> 
> - I expect .terminate_cookie() to be asynchronous, as .terminate_all().
>   This means that actual termination of cyclic transfers will actually
>   be handled at end of transfer, in the interrupt handler. This creates
>   race conditions with other operations. It would also make it much more
>   difficult to support this feature for devices that require sleeping
>   when stopping the DMA engine at the end of a cyclic transfer.
> 
> If we have to go forward with this new API, I need a detailed
> explanation of how all this should be handled. I still truly believe
> this is a case of yak shaving that introduces additional complexity for
> absolutely no valid reason, when a solution that is aligned with the
> existing API and its concepts exists already. It's your decision as the
> subsystem maintainer, but if you want something more complex, please
> provide it soon. I don't want to wait another three months to see
> progress on this issue.
> 
> >>>>>>> And with this I think it would make sense to also add this to
> >>>>>>> capabilities :)
> >>>>>> 
> >>>>>> I'll repeat the comment I made to Peter: you want me to implement a
> >>>>>> feature that you think would be useful, but is completely unrelated to
> >>>>>> my use case, while there's a more natural way to handle my issue with
> >>>>>> the current API, without precluding in any way the addition of your new
> >>>>>> feature in the future. Not fair.
> >>>>> 
> >>>>> So from API design pov, I would like this to support both the features.
> >>>>> This helps us to not rework the API again for the immediate abort.
> >>>>> 
> >>>>> I am not expecting this to be implemented by you if your hw doesn't
> >>>>> support it. The core changes are pretty minimal and callback in the
> >>>>> driver is the one which does the job and yours wont do this
> >>>> 
> >>>> Xilinx DMA drivers don't support DMA_RESIDUE_GRANULARITY_BURST so I
> >>>> can't test this indeed.
> >>> 
> >>> Sure I understand that! Am sure folks will respond to CFT and I guess
> >>> Peter will also be interested in testing.
> >> 
> >> s/testing/implementing it/ :-)
> > 
> > Even better :)
diff mbox series

Patch

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 03ac4b96117c..4ffb98a47f31 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -981,7 +981,13 @@  int dma_async_device_register(struct dma_device *device)
 			"DMA_INTERLEAVE");
 		return -EIO;
 	}
-
+	if (dma_has_cap(DMA_INTERLEAVE_CYCLIC, device->cap_mask) &&
+	    !device->device_prep_interleaved_cyclic) {
+		dev_err(device->dev,
+			"Device claims capability %s, but op is not defined\n",
+			"DMA_INTERLEAVE_CYCLIC");
+		return -EIO;
+	}
 
 	if (!device->device_tx_status) {
 		dev_err(device->dev, "Device tx_status is not defined\n");
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 8fcdee1c0cf9..e9af3bf835cb 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -61,6 +61,7 @@  enum dma_transaction_type {
 	DMA_SLAVE,
 	DMA_CYCLIC,
 	DMA_INTERLEAVE,
+	DMA_INTERLEAVE_CYCLIC,
 /* last transaction type for creation of the capabilities mask */
 	DMA_TX_TYPE_END,
 };
@@ -701,6 +702,10 @@  struct dma_filter {
  *	The function takes a buffer of size buf_len. The callback function will
  *	be called after period_len bytes have been transferred.
  * @device_prep_interleaved_dma: Transfer expression in a generic way.
+ * @device_prep_interleaved_cyclic: prepares an interleaved cyclic transfer.
+ *	This is similar to @device_prep_interleaved_dma, but the transfer is
+ *	repeated until a new transfer is issued. This transfer type is meant
+ *	for display.
  * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address
  * @device_config: Pushes a new configuration to a channel, return 0 or an error
  *	code
@@ -785,6 +790,9 @@  struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
 		struct dma_chan *chan, struct dma_interleaved_template *xt,
 		unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_interleaved_cyclic)(
+		struct dma_chan *chan, struct dma_interleaved_template *xt,
+		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_imm_data)(
 		struct dma_chan *chan, dma_addr_t dst, u64 data,
 		unsigned long flags);
@@ -880,6 +888,16 @@  static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
 	return chan->device->device_prep_interleaved_dma(chan, xt, flags);
 }
 
+static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_cyclic(
+		struct dma_chan *chan, struct dma_interleaved_template *xt,
+		unsigned long flags)
+{
+	if (!chan || !chan->device || !chan->device->device_prep_interleaved_cyclic)
+		return NULL;
+
+	return chan->device->device_prep_interleaved_cyclic(chan, xt, flags);
+}
+
 static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset(
 		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
 		unsigned long flags)