diff mbox series

[v6,11/16] dmaengine: tegra-apb: Keep clock enabled only during of DMA transfer

Message ID 20200130043804.32243-12-digetx@gmail.com (mailing list archive)
State Superseded
Headers show
Series NVIDIA Tegra APB DMA driver fixes and improvements | expand

Commit Message

Dmitry Osipenko Jan. 30, 2020, 4:37 a.m. UTC
It's a bit impractical to enable hardware's clock at the time of DMA
channel's allocation because most of DMA client drivers allocate DMA
channel at the time of the driver's probing, and thus, DMA clock is kept
always-enabled in practice, defeating the whole purpose of runtime PM.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
---
 drivers/dma/tegra20-apb-dma.c | 47 ++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 15 deletions(-)

Comments

Jon Hunter Jan. 30, 2020, 2:09 p.m. UTC | #1
On 30/01/2020 04:37, Dmitry Osipenko wrote:
> It's a bit impractical to enable hardware's clock at the time of DMA
> channel's allocation because most of DMA client drivers allocate DMA
> channel at the time of the driver's probing, and thus, DMA clock is kept
> always-enabled in practice, defeating the whole purpose of runtime PM.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/dma/tegra20-apb-dma.c | 47 ++++++++++++++++++++++++-----------
>  1 file changed, 32 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
> index 22b88ccff05d..0ee28d8e3c96 100644
> --- a/drivers/dma/tegra20-apb-dma.c
> +++ b/drivers/dma/tegra20-apb-dma.c
> @@ -436,6 +436,8 @@ static void tegra_dma_stop(struct tegra_dma_channel *tdc)
>  		tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
>  	}
>  	tdc->busy = false;
> +
> +	pm_runtime_put(tdc->tdma->dev);
>  }
>  
>  static void tegra_dma_start(struct tegra_dma_channel *tdc,
> @@ -500,18 +502,25 @@ static void tegra_dma_configure_for_next(struct tegra_dma_channel *tdc,
>  	tegra_dma_resume(tdc);
>  }
>  
> -static void tdc_start_head_req(struct tegra_dma_channel *tdc)
> +static bool tdc_start_head_req(struct tegra_dma_channel *tdc)
>  {
>  	struct tegra_dma_sg_req *sg_req;
> +	int err;
>  
>  	if (list_empty(&tdc->pending_sg_req))
> -		return;
> +		return false;
> +
> +	err = pm_runtime_get_sync(tdc->tdma->dev);
> +	if (WARN_ON_ONCE(err < 0))
> +		return false;
>  
>  	sg_req = list_first_entry(&tdc->pending_sg_req, typeof(*sg_req), node);
>  	tegra_dma_start(tdc, sg_req);
>  	sg_req->configured = true;
>  	sg_req->words_xferred = 0;
>  	tdc->busy = true;
> +
> +	return true;
>  }
>  
>  static void tdc_configure_next_head_desc(struct tegra_dma_channel *tdc)
> @@ -615,6 +624,8 @@ static void handle_once_dma_done(struct tegra_dma_channel *tdc,
>  	}
>  	list_add_tail(&sgreq->node, &tdc->free_sg_req);
>  
> +	pm_runtime_put(tdc->tdma->dev);
> +
>  	/* Do not start DMA if it is going to be terminate */
>  	if (to_terminate || list_empty(&tdc->pending_sg_req))
>  		return;
> @@ -730,9 +741,7 @@ static void tegra_dma_issue_pending(struct dma_chan *dc)
>  		dev_err(tdc2dev(tdc), "No DMA request\n");
>  		goto end;
>  	}
> -	if (!tdc->busy) {
> -		tdc_start_head_req(tdc);
> -
> +	if (!tdc->busy && tdc_start_head_req(tdc)) {
>  		/* Continuous single mode: Configure next req */
>  		if (tdc->cyclic) {
>  			/*
> @@ -775,6 +784,13 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
>  	else
>  		wcount = status;
>  
> +	/*
> +	 * tegra_dma_stop() will drop the RPM's usage refcount, but
> +	 * tegra_dma_resume() touches hardware and thus we should keep
> +	 * the DMA clock active while it's needed.
> +	 */
> +	pm_runtime_get(tdc->tdma->dev);
> +

Would it work and make it simpler to just enable in the issue_pending
and disable in the handle_once_dma_done or terminate_all?

diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 3a45079d11ec..86bbb45da93d 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -616,9 +616,14 @@ static void handle_once_dma_done(struct
tegra_dma_channel *tdc,
        list_add_tail(&sgreq->node, &tdc->free_sg_req);

        /* Do not start DMA if it is going to be terminate */
-       if (to_terminate || list_empty(&tdc->pending_sg_req))
+       if (to_terminate)
                return;

+       if (list_empty(&tdc->pending_sg_req)) {
+               pm_runtime_put(tdc->tdma->dev);
+               return;
+       }
+
        tdc_start_head_req(tdc);
 }

@@ -729,6 +734,11 @@ static void tegra_dma_issue_pending(struct dma_chan
*dc)
                goto end;
        }
        if (!tdc->busy) {
+               if (pm_runtime_get_sync(tdc->tdma->dev) < 0) {
+                       dev_err(tdc2dev(tdc), "Failed to enable DMA!\n");
+                       goto end;
+               }
+
                tdc_start_head_req(tdc);

                /* Continuous single mode: Configure next req */
@@ -788,6 +798,7 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
                                get_current_xferred_count(tdc, sgreq,
wcount);
        }
        tegra_dma_resume(tdc);
+       pm_runtime_put(tdc->tdma->dev);

 skip_dma_stop:
        tegra_dma_abort_all(tdc);
Dmitry Osipenko Jan. 30, 2020, 4:11 p.m. UTC | #2
30.01.2020 17:09, Jon Hunter пишет:
> 
> On 30/01/2020 04:37, Dmitry Osipenko wrote:
>> It's a bit impractical to enable hardware's clock at the time of DMA
>> channel's allocation because most of DMA client drivers allocate DMA
>> channel at the time of the driver's probing, and thus, DMA clock is kept
>> always-enabled in practice, defeating the whole purpose of runtime PM.
>>
>> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
>> ---
>>  drivers/dma/tegra20-apb-dma.c | 47 ++++++++++++++++++++++++-----------
>>  1 file changed, 32 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
>> index 22b88ccff05d..0ee28d8e3c96 100644
>> --- a/drivers/dma/tegra20-apb-dma.c
>> +++ b/drivers/dma/tegra20-apb-dma.c
>> @@ -436,6 +436,8 @@ static void tegra_dma_stop(struct tegra_dma_channel *tdc)
>>  		tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
>>  	}
>>  	tdc->busy = false;
>> +
>> +	pm_runtime_put(tdc->tdma->dev);
>>  }
>>  
>>  static void tegra_dma_start(struct tegra_dma_channel *tdc,
>> @@ -500,18 +502,25 @@ static void tegra_dma_configure_for_next(struct tegra_dma_channel *tdc,
>>  	tegra_dma_resume(tdc);
>>  }
>>  
>> -static void tdc_start_head_req(struct tegra_dma_channel *tdc)
>> +static bool tdc_start_head_req(struct tegra_dma_channel *tdc)
>>  {
>>  	struct tegra_dma_sg_req *sg_req;
>> +	int err;
>>  
>>  	if (list_empty(&tdc->pending_sg_req))
>> -		return;
>> +		return false;
>> +
>> +	err = pm_runtime_get_sync(tdc->tdma->dev);
>> +	if (WARN_ON_ONCE(err < 0))
>> +		return false;
>>  
>>  	sg_req = list_first_entry(&tdc->pending_sg_req, typeof(*sg_req), node);
>>  	tegra_dma_start(tdc, sg_req);
>>  	sg_req->configured = true;
>>  	sg_req->words_xferred = 0;
>>  	tdc->busy = true;
>> +
>> +	return true;
>>  }
>>  
>>  static void tdc_configure_next_head_desc(struct tegra_dma_channel *tdc)
>> @@ -615,6 +624,8 @@ static void handle_once_dma_done(struct tegra_dma_channel *tdc,
>>  	}
>>  	list_add_tail(&sgreq->node, &tdc->free_sg_req);
>>  
>> +	pm_runtime_put(tdc->tdma->dev);
>> +
>>  	/* Do not start DMA if it is going to be terminate */
>>  	if (to_terminate || list_empty(&tdc->pending_sg_req))
>>  		return;
>> @@ -730,9 +741,7 @@ static void tegra_dma_issue_pending(struct dma_chan *dc)
>>  		dev_err(tdc2dev(tdc), "No DMA request\n");
>>  		goto end;
>>  	}
>> -	if (!tdc->busy) {
>> -		tdc_start_head_req(tdc);
>> -
>> +	if (!tdc->busy && tdc_start_head_req(tdc)) {
>>  		/* Continuous single mode: Configure next req */
>>  		if (tdc->cyclic) {
>>  			/*
>> @@ -775,6 +784,13 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
>>  	else
>>  		wcount = status;
>>  
>> +	/*
>> +	 * tegra_dma_stop() will drop the RPM's usage refcount, but
>> +	 * tegra_dma_resume() touches hardware and thus we should keep
>> +	 * the DMA clock active while it's needed.
>> +	 */
>> +	pm_runtime_get(tdc->tdma->dev);
>> +
> 
> Would it work and make it simpler to just enable in the issue_pending
> and disable in the handle_once_dma_done or terminate_all?
> 
> diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
> index 3a45079d11ec..86bbb45da93d 100644
> --- a/drivers/dma/tegra20-apb-dma.c
> +++ b/drivers/dma/tegra20-apb-dma.c
> @@ -616,9 +616,14 @@ static void handle_once_dma_done(struct
> tegra_dma_channel *tdc,
>         list_add_tail(&sgreq->node, &tdc->free_sg_req);
> 
>         /* Do not start DMA if it is going to be terminate */
> -       if (to_terminate || list_empty(&tdc->pending_sg_req))
> +       if (to_terminate)
>                 return;
> 
> +       if (list_empty(&tdc->pending_sg_req)) {
> +               pm_runtime_put(tdc->tdma->dev);
> +               return;
> +       }
> +
>         tdc_start_head_req(tdc);
>  }
> 
> @@ -729,6 +734,11 @@ static void tegra_dma_issue_pending(struct dma_chan
> *dc)
>                 goto end;
>         }
>         if (!tdc->busy) {
> +               if (pm_runtime_get_sync(tdc->tdma->dev) < 0) {
> +                       dev_err(tdc2dev(tdc), "Failed to enable DMA!\n");
> +                       goto end;
> +               }
> +
>                 tdc_start_head_req(tdc);
> 
>                 /* Continuous single mode: Configure next req */
> @@ -788,6 +798,7 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
>                                 get_current_xferred_count(tdc, sgreq,
> wcount);
>         }
>         tegra_dma_resume(tdc);
> +       pm_runtime_put(tdc->tdma->dev);
> 
>  skip_dma_stop:
>         tegra_dma_abort_all(tdc);
> 

The tegra_dma_stop() should put RPM anyways, which is missed in yours
sample. Please see handle_continuous_head_request().

I'm also finding the explicit get/put a bit easier to follow in the
code, don't you think so?
Jon Hunter Jan. 30, 2020, 6:45 p.m. UTC | #3
On 30/01/2020 16:11, Dmitry Osipenko wrote:
> 30.01.2020 17:09, Jon Hunter пишет:
>>
>> On 30/01/2020 04:37, Dmitry Osipenko wrote:
>>> It's a bit impractical to enable hardware's clock at the time of DMA
>>> channel's allocation because most of DMA client drivers allocate DMA
>>> channel at the time of the driver's probing, and thus, DMA clock is kept
>>> always-enabled in practice, defeating the whole purpose of runtime PM.
>>>
>>> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
>>> ---
>>>  drivers/dma/tegra20-apb-dma.c | 47 ++++++++++++++++++++++++-----------
>>>  1 file changed, 32 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
>>> index 22b88ccff05d..0ee28d8e3c96 100644
>>> --- a/drivers/dma/tegra20-apb-dma.c
>>> +++ b/drivers/dma/tegra20-apb-dma.c
>>> @@ -436,6 +436,8 @@ static void tegra_dma_stop(struct tegra_dma_channel *tdc)
>>>  		tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
>>>  	}
>>>  	tdc->busy = false;
>>> +
>>> +	pm_runtime_put(tdc->tdma->dev);
>>>  }
>>>  
>>>  static void tegra_dma_start(struct tegra_dma_channel *tdc,
>>> @@ -500,18 +502,25 @@ static void tegra_dma_configure_for_next(struct tegra_dma_channel *tdc,
>>>  	tegra_dma_resume(tdc);
>>>  }
>>>  
>>> -static void tdc_start_head_req(struct tegra_dma_channel *tdc)
>>> +static bool tdc_start_head_req(struct tegra_dma_channel *tdc)
>>>  {
>>>  	struct tegra_dma_sg_req *sg_req;
>>> +	int err;
>>>  
>>>  	if (list_empty(&tdc->pending_sg_req))
>>> -		return;
>>> +		return false;
>>> +
>>> +	err = pm_runtime_get_sync(tdc->tdma->dev);
>>> +	if (WARN_ON_ONCE(err < 0))
>>> +		return false;
>>>  
>>>  	sg_req = list_first_entry(&tdc->pending_sg_req, typeof(*sg_req), node);
>>>  	tegra_dma_start(tdc, sg_req);
>>>  	sg_req->configured = true;
>>>  	sg_req->words_xferred = 0;
>>>  	tdc->busy = true;
>>> +
>>> +	return true;
>>>  }
>>>  
>>>  static void tdc_configure_next_head_desc(struct tegra_dma_channel *tdc)
>>> @@ -615,6 +624,8 @@ static void handle_once_dma_done(struct tegra_dma_channel *tdc,
>>>  	}
>>>  	list_add_tail(&sgreq->node, &tdc->free_sg_req);
>>>  
>>> +	pm_runtime_put(tdc->tdma->dev);
>>> +
>>>  	/* Do not start DMA if it is going to be terminate */
>>>  	if (to_terminate || list_empty(&tdc->pending_sg_req))
>>>  		return;
>>> @@ -730,9 +741,7 @@ static void tegra_dma_issue_pending(struct dma_chan *dc)
>>>  		dev_err(tdc2dev(tdc), "No DMA request\n");
>>>  		goto end;
>>>  	}
>>> -	if (!tdc->busy) {
>>> -		tdc_start_head_req(tdc);
>>> -
>>> +	if (!tdc->busy && tdc_start_head_req(tdc)) {
>>>  		/* Continuous single mode: Configure next req */
>>>  		if (tdc->cyclic) {
>>>  			/*
>>> @@ -775,6 +784,13 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
>>>  	else
>>>  		wcount = status;
>>>  
>>> +	/*
>>> +	 * tegra_dma_stop() will drop the RPM's usage refcount, but
>>> +	 * tegra_dma_resume() touches hardware and thus we should keep
>>> +	 * the DMA clock active while it's needed.
>>> +	 */
>>> +	pm_runtime_get(tdc->tdma->dev);
>>> +
>>
>> Would it work and make it simpler to just enable in the issue_pending
>> and disable in the handle_once_dma_done or terminate_all?
>>
>> diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
>> index 3a45079d11ec..86bbb45da93d 100644
>> --- a/drivers/dma/tegra20-apb-dma.c
>> +++ b/drivers/dma/tegra20-apb-dma.c
>> @@ -616,9 +616,14 @@ static void handle_once_dma_done(struct
>> tegra_dma_channel *tdc,
>>         list_add_tail(&sgreq->node, &tdc->free_sg_req);
>>
>>         /* Do not start DMA if it is going to be terminate */
>> -       if (to_terminate || list_empty(&tdc->pending_sg_req))
>> +       if (to_terminate)
>>                 return;
>>
>> +       if (list_empty(&tdc->pending_sg_req)) {
>> +               pm_runtime_put(tdc->tdma->dev);
>> +               return;
>> +       }
>> +
>>         tdc_start_head_req(tdc);
>>  }
>>
>> @@ -729,6 +734,11 @@ static void tegra_dma_issue_pending(struct dma_chan
>> *dc)
>>                 goto end;
>>         }
>>         if (!tdc->busy) {
>> +               if (pm_runtime_get_sync(tdc->tdma->dev) < 0) {
>> +                       dev_err(tdc2dev(tdc), "Failed to enable DMA!\n");
>> +                       goto end;
>> +               }
>> +
>>                 tdc_start_head_req(tdc);
>>
>>                 /* Continuous single mode: Configure next req */
>> @@ -788,6 +798,7 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
>>                                 get_current_xferred_count(tdc, sgreq,
>> wcount);
>>         }
>>         tegra_dma_resume(tdc);
>> +       pm_runtime_put(tdc->tdma->dev);
>>
>>  skip_dma_stop:
>>         tegra_dma_abort_all(tdc);
>>
> 
> The tegra_dma_stop() should put RPM anyways, which is missed in yours
> sample. Please see handle_continuous_head_request().

Yes and that is deliberate. The cyclic transfers the transfers *should*
not stop until terminate_all is called. The tegra_dma_stop in
handle_continuous_head_request() is an error condition and so I am not
sure it is actually necessary to call pm_runtime_put() here.

> I'm also finding the explicit get/put a bit easier to follow in the
> code, don't you think so?

I can see that, but I was thinking that in the case of cyclic transfers,
it should only really be necessary to call the get/put at the beginning
and end. So in my mind there should only be two exit points which are
the ISR handler for SG and terminate_all for SG and cyclic.

Jon
Dmitry Osipenko Jan. 30, 2020, 8:04 p.m. UTC | #4
30.01.2020 21:45, Jon Hunter пишет:
> 
> On 30/01/2020 16:11, Dmitry Osipenko wrote:
>> 30.01.2020 17:09, Jon Hunter пишет:
>>>
>>> On 30/01/2020 04:37, Dmitry Osipenko wrote:
>>>> It's a bit impractical to enable hardware's clock at the time of DMA
>>>> channel's allocation because most of DMA client drivers allocate DMA
>>>> channel at the time of the driver's probing, and thus, DMA clock is kept
>>>> always-enabled in practice, defeating the whole purpose of runtime PM.
>>>>
>>>> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
>>>> ---
>>>>  drivers/dma/tegra20-apb-dma.c | 47 ++++++++++++++++++++++++-----------
>>>>  1 file changed, 32 insertions(+), 15 deletions(-)
>>>>
>>>> diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
>>>> index 22b88ccff05d..0ee28d8e3c96 100644
>>>> --- a/drivers/dma/tegra20-apb-dma.c
>>>> +++ b/drivers/dma/tegra20-apb-dma.c
>>>> @@ -436,6 +436,8 @@ static void tegra_dma_stop(struct tegra_dma_channel *tdc)
>>>>  		tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
>>>>  	}
>>>>  	tdc->busy = false;
>>>> +
>>>> +	pm_runtime_put(tdc->tdma->dev);
>>>>  }
>>>>  
>>>>  static void tegra_dma_start(struct tegra_dma_channel *tdc,
>>>> @@ -500,18 +502,25 @@ static void tegra_dma_configure_for_next(struct tegra_dma_channel *tdc,
>>>>  	tegra_dma_resume(tdc);
>>>>  }
>>>>  
>>>> -static void tdc_start_head_req(struct tegra_dma_channel *tdc)
>>>> +static bool tdc_start_head_req(struct tegra_dma_channel *tdc)
>>>>  {
>>>>  	struct tegra_dma_sg_req *sg_req;
>>>> +	int err;
>>>>  
>>>>  	if (list_empty(&tdc->pending_sg_req))
>>>> -		return;
>>>> +		return false;
>>>> +
>>>> +	err = pm_runtime_get_sync(tdc->tdma->dev);
>>>> +	if (WARN_ON_ONCE(err < 0))
>>>> +		return false;
>>>>  
>>>>  	sg_req = list_first_entry(&tdc->pending_sg_req, typeof(*sg_req), node);
>>>>  	tegra_dma_start(tdc, sg_req);
>>>>  	sg_req->configured = true;
>>>>  	sg_req->words_xferred = 0;
>>>>  	tdc->busy = true;
>>>> +
>>>> +	return true;
>>>>  }
>>>>  
>>>>  static void tdc_configure_next_head_desc(struct tegra_dma_channel *tdc)
>>>> @@ -615,6 +624,8 @@ static void handle_once_dma_done(struct tegra_dma_channel *tdc,
>>>>  	}
>>>>  	list_add_tail(&sgreq->node, &tdc->free_sg_req);
>>>>  
>>>> +	pm_runtime_put(tdc->tdma->dev);
>>>> +
>>>>  	/* Do not start DMA if it is going to be terminate */
>>>>  	if (to_terminate || list_empty(&tdc->pending_sg_req))
>>>>  		return;
>>>> @@ -730,9 +741,7 @@ static void tegra_dma_issue_pending(struct dma_chan *dc)
>>>>  		dev_err(tdc2dev(tdc), "No DMA request\n");
>>>>  		goto end;
>>>>  	}
>>>> -	if (!tdc->busy) {
>>>> -		tdc_start_head_req(tdc);
>>>> -
>>>> +	if (!tdc->busy && tdc_start_head_req(tdc)) {
>>>>  		/* Continuous single mode: Configure next req */
>>>>  		if (tdc->cyclic) {
>>>>  			/*
>>>> @@ -775,6 +784,13 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
>>>>  	else
>>>>  		wcount = status;
>>>>  
>>>> +	/*
>>>> +	 * tegra_dma_stop() will drop the RPM's usage refcount, but
>>>> +	 * tegra_dma_resume() touches hardware and thus we should keep
>>>> +	 * the DMA clock active while it's needed.
>>>> +	 */
>>>> +	pm_runtime_get(tdc->tdma->dev);
>>>> +
>>>
>>> Would it work and make it simpler to just enable in the issue_pending
>>> and disable in the handle_once_dma_done or terminate_all?
>>>
>>> diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
>>> index 3a45079d11ec..86bbb45da93d 100644
>>> --- a/drivers/dma/tegra20-apb-dma.c
>>> +++ b/drivers/dma/tegra20-apb-dma.c
>>> @@ -616,9 +616,14 @@ static void handle_once_dma_done(struct
>>> tegra_dma_channel *tdc,
>>>         list_add_tail(&sgreq->node, &tdc->free_sg_req);
>>>
>>>         /* Do not start DMA if it is going to be terminate */
>>> -       if (to_terminate || list_empty(&tdc->pending_sg_req))
>>> +       if (to_terminate)
>>>                 return;
>>>
>>> +       if (list_empty(&tdc->pending_sg_req)) {
>>> +               pm_runtime_put(tdc->tdma->dev);
>>> +               return;
>>> +       }
>>> +
>>>         tdc_start_head_req(tdc);
>>>  }
>>>
>>> @@ -729,6 +734,11 @@ static void tegra_dma_issue_pending(struct dma_chan
>>> *dc)
>>>                 goto end;
>>>         }
>>>         if (!tdc->busy) {
>>> +               if (pm_runtime_get_sync(tdc->tdma->dev) < 0) {
>>> +                       dev_err(tdc2dev(tdc), "Failed to enable DMA!\n");
>>> +                       goto end;
>>> +               }
>>> +
>>>                 tdc_start_head_req(tdc);
>>>
>>>                 /* Continuous single mode: Configure next req */
>>> @@ -788,6 +798,7 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
>>>                                 get_current_xferred_count(tdc, sgreq,
>>> wcount);
>>>         }
>>>         tegra_dma_resume(tdc);
>>> +       pm_runtime_put(tdc->tdma->dev);
>>>
>>>  skip_dma_stop:
>>>         tegra_dma_abort_all(tdc);
>>>
>>
>> The tegra_dma_stop() should put RPM anyways, which is missed in yours
>> sample. Please see handle_continuous_head_request().
> 
> Yes and that is deliberate. The cyclic transfers the transfers *should*
> not stop until terminate_all is called. The tegra_dma_stop in
> handle_continuous_head_request() is an error condition and so I am not
> sure it is actually necessary to call pm_runtime_put() here.

But then tegra_dma_stop() shouldn't unset the "busy" mark.

>> I'm also finding the explicit get/put a bit easier to follow in the
>> code, don't you think so?
> 
> I can see that, but I was thinking that in the case of cyclic transfers,
> it should only really be necessary to call the get/put at the beginning
> and end. So in my mind there should only be two exit points which are
> the ISR handler for SG and terminate_all for SG and cyclic.

Alright, I'll update this patch.
Jon Hunter Jan. 31, 2020, 9:02 a.m. UTC | #5
On 30/01/2020 20:04, Dmitry Osipenko wrote:

...

>>> The tegra_dma_stop() should put RPM anyways, which is missed in yours
>>> sample. Please see handle_continuous_head_request().
>>
>> Yes and that is deliberate. The cyclic transfers the transfers *should*
>> not stop until terminate_all is called. The tegra_dma_stop in
>> handle_continuous_head_request() is an error condition and so I am not
>> sure it is actually necessary to call pm_runtime_put() here.
> 
> But then tegra_dma_stop() shouldn't unset the "busy" mark.

True.

>>> I'm also finding the explicit get/put a bit easier to follow in the
>>> code, don't you think so?
>>
>> I can see that, but I was thinking that in the case of cyclic transfers,
>> it should only really be necessary to call the get/put at the beginning
>> and end. So in my mind there should only be two exit points which are
>> the ISR handler for SG and terminate_all for SG and cyclic.
> 
> Alright, I'll update this patch.

Hmmm ... I am wondering if we should not mess with that and leave how
you have it.
Jon Hunter Jan. 31, 2020, 9:05 a.m. UTC | #6
On 30/01/2020 04:37, Dmitry Osipenko wrote:
> It's a bit impractical to enable hardware's clock at the time of DMA
> channel's allocation because most of DMA client drivers allocate DMA
> channel at the time of the driver's probing, and thus, DMA clock is kept
> always-enabled in practice, defeating the whole purpose of runtime PM.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/dma/tegra20-apb-dma.c | 47 ++++++++++++++++++++++++-----------
>  1 file changed, 32 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
> index 22b88ccff05d..0ee28d8e3c96 100644
> --- a/drivers/dma/tegra20-apb-dma.c
> +++ b/drivers/dma/tegra20-apb-dma.c
> @@ -436,6 +436,8 @@ static void tegra_dma_stop(struct tegra_dma_channel *tdc)
>  		tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
>  	}
>  	tdc->busy = false;
> +
> +	pm_runtime_put(tdc->tdma->dev);

There are only 3 places where tegra_dma_stop is called, does it simplify
the code if we move the pm_runtime_put() outside of tegra_dma_stop? In
other words, everywhere there is a tegra_dma_stop, afterwards we then
call pm_runtime_put?

This would allow us to get rid of the extra pm_runtime_get in
terminate_all.

Jon
Dmitry Osipenko Jan. 31, 2020, 2:22 p.m. UTC | #7
31.01.2020 12:02, Jon Hunter пишет:
> 
> On 30/01/2020 20:04, Dmitry Osipenko wrote:
> 
> ...
> 
>>>> The tegra_dma_stop() should put RPM anyways, which is missed in yours
>>>> sample. Please see handle_continuous_head_request().
>>>
>>> Yes and that is deliberate. The cyclic transfers the transfers *should*
>>> not stop until terminate_all is called. The tegra_dma_stop in
>>> handle_continuous_head_request() is an error condition and so I am not
>>> sure it is actually necessary to call pm_runtime_put() here.
>>
>> But then tegra_dma_stop() shouldn't unset the "busy" mark.
> 
> True.
> 
>>>> I'm also finding the explicit get/put a bit easier to follow in the
>>>> code, don't you think so?
>>>
>>> I can see that, but I was thinking that in the case of cyclic transfers,
>>> it should only really be necessary to call the get/put at the beginning
>>> and end. So in my mind there should only be two exit points which are
>>> the ISR handler for SG and terminate_all for SG and cyclic.
>>
>> Alright, I'll update this patch.
> 
> Hmmm ... I am wondering if we should not mess with that and leave how
> you have it.

I took another look and seems my current v6 should be more correct because:

1. If "busy" is unset in tegra_dma_stop(), then the RPM should be put
there since tegra_dma_terminate_all() won't put RPM in this case:

	if (!tdc->busy)
		goto skip_dma_stop;

2. We can't move the "busy" unsetting into the terminate because then
tegra_dma_stop() will be invoked twice. Although, one option could be to
remove the tegra_dma_stop() from the error paths of
handle_continuous_head_request(), but I'm not sure that this is correct
to do.
Dmitry Osipenko Feb. 1, 2020, 3:13 p.m. UTC | #8
31.01.2020 17:22, Dmitry Osipenko пишет:
> 31.01.2020 12:02, Jon Hunter пишет:
>>
>> On 30/01/2020 20:04, Dmitry Osipenko wrote:
>>
>> ...
>>
>>>>> The tegra_dma_stop() should put RPM anyways, which is missed in yours
>>>>> sample. Please see handle_continuous_head_request().
>>>>
>>>> Yes and that is deliberate. The cyclic transfers the transfers *should*
>>>> not stop until terminate_all is called. The tegra_dma_stop in
>>>> handle_continuous_head_request() is an error condition and so I am not
>>>> sure it is actually necessary to call pm_runtime_put() here.
>>>
>>> But then tegra_dma_stop() shouldn't unset the "busy" mark.
>>
>> True.
>>
>>>>> I'm also finding the explicit get/put a bit easier to follow in the
>>>>> code, don't you think so?
>>>>
>>>> I can see that, but I was thinking that in the case of cyclic transfers,
>>>> it should only really be necessary to call the get/put at the beginning
>>>> and end. So in my mind there should only be two exit points which are
>>>> the ISR handler for SG and terminate_all for SG and cyclic.
>>>
>>> Alright, I'll update this patch.
>>
>> Hmmm ... I am wondering if we should not mess with that and leave how
>> you have it.
> 
> I took another look and seems my current v6 should be more correct because:
> 
> 1. If "busy" is unset in tegra_dma_stop(), then the RPM should be put
> there since tegra_dma_terminate_all() won't put RPM in this case:
> 
> 	if (!tdc->busy)
> 		goto skip_dma_stop;
> 
> 2. We can't move the "busy" unsetting into the terminate because then
> tegra_dma_stop() will be invoked twice. Although, one option could be to
> remove the tegra_dma_stop() from the error paths of
> handle_continuous_head_request(), but I'm not sure that this is correct
> to do.

Jon, I realized that my v6 variant is wrong too because
tegra_dma_terminate_all() -> tdc->isr_handler() will put RPM, and thus,
the RPM enable-count will be wrecked in this case.

I'm now leaning to adopt yours variant and simply remove the
tegra_dma_stop() from handle_continuous_head_request() because there
shouldn't be any harm in keeping DMA active in the case of error
condition. Besides, these error conditions are very extreme cases that
should never happen in practice.

The "list_empty(&tdc->pending_sg_req)" error seems couldn't ever happen
at all, I'll remove it in v7.
Jon Hunter Feb. 3, 2020, 11:37 a.m. UTC | #9
On 01/02/2020 15:13, Dmitry Osipenko wrote:
> 31.01.2020 17:22, Dmitry Osipenko пишет:
>> 31.01.2020 12:02, Jon Hunter пишет:
>>>
>>> On 30/01/2020 20:04, Dmitry Osipenko wrote:
>>>
>>> ...
>>>
>>>>>> The tegra_dma_stop() should put RPM anyways, which is missed in yours
>>>>>> sample. Please see handle_continuous_head_request().
>>>>>
>>>>> Yes and that is deliberate. The cyclic transfers the transfers *should*
>>>>> not stop until terminate_all is called. The tegra_dma_stop in
>>>>> handle_continuous_head_request() is an error condition and so I am not
>>>>> sure it is actually necessary to call pm_runtime_put() here.
>>>>
>>>> But then tegra_dma_stop() shouldn't unset the "busy" mark.
>>>
>>> True.
>>>
>>>>>> I'm also finding the explicit get/put a bit easier to follow in the
>>>>>> code, don't you think so?
>>>>>
>>>>> I can see that, but I was thinking that in the case of cyclic transfers,
>>>>> it should only really be necessary to call the get/put at the beginning
>>>>> and end. So in my mind there should only be two exit points which are
>>>>> the ISR handler for SG and terminate_all for SG and cyclic.
>>>>
>>>> Alright, I'll update this patch.
>>>
>>> Hmmm ... I am wondering if we should not mess with that and leave how
>>> you have it.
>>
>> I took another look and seems my current v6 should be more correct because:
>>
>> 1. If "busy" is unset in tegra_dma_stop(), then the RPM should be put
>> there since tegra_dma_terminate_all() won't put RPM in this case:
>>
>> 	if (!tdc->busy)
>> 		goto skip_dma_stop;
>>
>> 2. We can't move the "busy" unsetting into the terminate because then
>> tegra_dma_stop() will be invoked twice. Although, one option could be to
>> remove the tegra_dma_stop() from the error paths of
>> handle_continuous_head_request(), but I'm not sure that this is correct
>> to do.
> 
> Jon, I realized that my v6 variant is wrong too because
> tegra_dma_terminate_all() -> tdc->isr_handler() will put RPM, and thus,
> the RPM enable-count will be wrecked in this case.

Did you see my other suggestion to move the pm_runtime_put() outside of
tegra_dma_stop? There are only a few call sites for tegra_dma_stop and
so if we call pm_runtime_put() after calling tegra_dma_stop this should
simplify matters.

Jon
Dmitry Osipenko Feb. 3, 2020, 4:24 p.m. UTC | #10
03.02.2020 14:37, Jon Hunter пишет:
> 
> On 01/02/2020 15:13, Dmitry Osipenko wrote:
>> 31.01.2020 17:22, Dmitry Osipenko пишет:
>>> 31.01.2020 12:02, Jon Hunter пишет:
>>>>
>>>> On 30/01/2020 20:04, Dmitry Osipenko wrote:
>>>>
>>>> ...
>>>>
>>>>>>> The tegra_dma_stop() should put RPM anyways, which is missed in yours
>>>>>>> sample. Please see handle_continuous_head_request().
>>>>>>
>>>>>> Yes and that is deliberate. The cyclic transfers the transfers *should*
>>>>>> not stop until terminate_all is called. The tegra_dma_stop in
>>>>>> handle_continuous_head_request() is an error condition and so I am not
>>>>>> sure it is actually necessary to call pm_runtime_put() here.
>>>>>
>>>>> But then tegra_dma_stop() shouldn't unset the "busy" mark.
>>>>
>>>> True.
>>>>
>>>>>>> I'm also finding the explicit get/put a bit easier to follow in the
>>>>>>> code, don't you think so?
>>>>>>
>>>>>> I can see that, but I was thinking that in the case of cyclic transfers,
>>>>>> it should only really be necessary to call the get/put at the beginning
>>>>>> and end. So in my mind there should only be two exit points which are
>>>>>> the ISR handler for SG and terminate_all for SG and cyclic.
>>>>>
>>>>> Alright, I'll update this patch.
>>>>
>>>> Hmmm ... I am wondering if we should not mess with that and leave how
>>>> you have it.
>>>
>>> I took another look and seems my current v6 should be more correct because:
>>>
>>> 1. If "busy" is unset in tegra_dma_stop(), then the RPM should be put
>>> there since tegra_dma_terminate_all() won't put RPM in this case:
>>>
>>> 	if (!tdc->busy)
>>> 		goto skip_dma_stop;
>>>
>>> 2. We can't move the "busy" unsetting into the terminate because then
>>> tegra_dma_stop() will be invoked twice. Although, one option could be to
>>> remove the tegra_dma_stop() from the error paths of
>>> handle_continuous_head_request(), but I'm not sure that this is correct
>>> to do.
>>
>> Jon, I realized that my v6 variant is wrong too because
>> tegra_dma_terminate_all() -> tdc->isr_handler() will put RPM, and thus,
>> the RPM enable-count will be wrecked in this case.
> 
> Did you see my other suggestion to move the pm_runtime_put() outside of
> tegra_dma_stop?

Yes, but seems I skimmed too quickly through the lines and failed to
recognize the point you made.

> There are only a few call sites for tegra_dma_stop and
> so if we call pm_runtime_put() after calling tegra_dma_stop this should
> simplify matters.

This is somewhat similar to what I made in the v7. Instead of adding
pm_runtime_put() after each tegra_dma_stop(), I removed the
tegra_dma_stop().

Looking at it once again, perhaps indeed it will be better to leave the
relevant tegra_dma_stop() in place (the irrelevant could be removed).

Please take a look at the v7, I'll drop the "[PATCH v7 13/19] dmaengine:
tegra-apb: Don't stop cyclic DMA in a case of error condition" and make
v8 after yours review of the v7. Thanks in advance!
diff mbox series

Patch

diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 22b88ccff05d..0ee28d8e3c96 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -436,6 +436,8 @@  static void tegra_dma_stop(struct tegra_dma_channel *tdc)
 		tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
 	}
 	tdc->busy = false;
+
+	pm_runtime_put(tdc->tdma->dev);
 }
 
 static void tegra_dma_start(struct tegra_dma_channel *tdc,
@@ -500,18 +502,25 @@  static void tegra_dma_configure_for_next(struct tegra_dma_channel *tdc,
 	tegra_dma_resume(tdc);
 }
 
-static void tdc_start_head_req(struct tegra_dma_channel *tdc)
+static bool tdc_start_head_req(struct tegra_dma_channel *tdc)
 {
 	struct tegra_dma_sg_req *sg_req;
+	int err;
 
 	if (list_empty(&tdc->pending_sg_req))
-		return;
+		return false;
+
+	err = pm_runtime_get_sync(tdc->tdma->dev);
+	if (WARN_ON_ONCE(err < 0))
+		return false;
 
 	sg_req = list_first_entry(&tdc->pending_sg_req, typeof(*sg_req), node);
 	tegra_dma_start(tdc, sg_req);
 	sg_req->configured = true;
 	sg_req->words_xferred = 0;
 	tdc->busy = true;
+
+	return true;
 }
 
 static void tdc_configure_next_head_desc(struct tegra_dma_channel *tdc)
@@ -615,6 +624,8 @@  static void handle_once_dma_done(struct tegra_dma_channel *tdc,
 	}
 	list_add_tail(&sgreq->node, &tdc->free_sg_req);
 
+	pm_runtime_put(tdc->tdma->dev);
+
 	/* Do not start DMA if it is going to be terminate */
 	if (to_terminate || list_empty(&tdc->pending_sg_req))
 		return;
@@ -730,9 +741,7 @@  static void tegra_dma_issue_pending(struct dma_chan *dc)
 		dev_err(tdc2dev(tdc), "No DMA request\n");
 		goto end;
 	}
-	if (!tdc->busy) {
-		tdc_start_head_req(tdc);
-
+	if (!tdc->busy && tdc_start_head_req(tdc)) {
 		/* Continuous single mode: Configure next req */
 		if (tdc->cyclic) {
 			/*
@@ -775,6 +784,13 @@  static int tegra_dma_terminate_all(struct dma_chan *dc)
 	else
 		wcount = status;
 
+	/*
+	 * tegra_dma_stop() will drop the RPM's usage refcount, but
+	 * tegra_dma_resume() touches hardware and thus we should keep
+	 * the DMA clock active while it's needed.
+	 */
+	pm_runtime_get(tdc->tdma->dev);
+
 	was_busy = tdc->busy;
 	tegra_dma_stop(tdc);
 
@@ -786,6 +802,8 @@  static int tegra_dma_terminate_all(struct dma_chan *dc)
 	}
 	tegra_dma_resume(tdc);
 
+	pm_runtime_put(tdc->tdma->dev);
+
 skip_dma_stop:
 	tegra_dma_abort_all(tdc);
 
@@ -1280,22 +1298,15 @@  tegra_dma_prep_dma_cyclic(struct dma_chan *dc, dma_addr_t buf_addr,
 static int tegra_dma_alloc_chan_resources(struct dma_chan *dc)
 {
 	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
-	struct tegra_dma *tdma = tdc->tdma;
-	int ret;
 
 	dma_cookie_init(&tdc->dma_chan);
 
-	ret = pm_runtime_get_sync(tdma->dev);
-	if (ret < 0)
-		return ret;
-
 	return 0;
 }
 
 static void tegra_dma_free_chan_resources(struct dma_chan *dc)
 {
 	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
-	struct tegra_dma *tdma = tdc->tdma;
 	struct tegra_dma_desc *dma_desc;
 	struct tegra_dma_sg_req *sg_req;
 	struct list_head dma_desc_list;
@@ -1328,7 +1339,6 @@  static void tegra_dma_free_chan_resources(struct dma_chan *dc)
 		list_del(&sg_req->node);
 		kfree(sg_req);
 	}
-	pm_runtime_put(tdma->dev);
 
 	tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID;
 }
@@ -1428,6 +1438,11 @@  static int tegra_dma_probe(struct platform_device *pdev)
 
 	spin_lock_init(&tdma->global_lock);
 
+	ret = clk_prepare(tdma->dma_clk);
+	if (ret)
+		return ret;
+
+	pm_runtime_irq_safe(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 
 	ret = pm_runtime_get_sync(&pdev->dev);
@@ -1543,6 +1558,7 @@  static int tegra_dma_probe(struct platform_device *pdev)
 
 err_pm_disable:
 	pm_runtime_disable(&pdev->dev);
+	clk_unprepare(tdma->dma_clk);
 
 	return ret;
 }
@@ -1553,6 +1569,7 @@  static int tegra_dma_remove(struct platform_device *pdev)
 
 	dma_async_device_unregister(&tdma->dma_dev);
 	pm_runtime_disable(&pdev->dev);
+	clk_unprepare(tdma->dma_clk);
 
 	return 0;
 }
@@ -1581,7 +1598,7 @@  static int tegra_dma_runtime_suspend(struct device *dev)
 						  TEGRA_APBDMA_CHAN_WCOUNT);
 	}
 
-	clk_disable_unprepare(tdma->dma_clk);
+	clk_disable(tdma->dma_clk);
 
 	return 0;
 }
@@ -1592,7 +1609,7 @@  static int tegra_dma_runtime_resume(struct device *dev)
 	unsigned int i;
 	int ret;
 
-	ret = clk_prepare_enable(tdma->dma_clk);
+	ret = clk_enable(tdma->dma_clk);
 	if (ret < 0) {
 		dev_err(dev, "clk_enable failed: %d\n", ret);
 		return ret;