diff mbox series

[5/5] drm/msm/A6x: Add devfreq support in A6x

Message ID 1535015911-2040-6-git-send-email-smasetty@codeaurora.org (mailing list archive)
State New, archived
Headers show
Series msm/drm: A6x DCVS series | expand

Commit Message

Sharat Masetty Aug. 23, 2018, 9:18 a.m. UTC
Implement routines to estimate GPU busy time and fetching the
current frequency for the polling interval. This is required by
the devfreq framework which recommends a frequency change if needed.
The driver code then tries to set this new frequency on the GPU by
sending an Out Of Band(OOB) request.

Signed-off-by: Sharat Masetty <smasetty@codeaurora.org>
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 46 +++++++++++++++++++++++++++++++----
 drivers/gpu/drm/msm/adreno/a6xx_gmu.h |  2 ++
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 27 ++++++++++++++++++++
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |  2 ++
 4 files changed, 72 insertions(+), 5 deletions(-)

Comments

Jordan Crouse Aug. 23, 2018, 4 p.m. UTC | #1
On Thu, Aug 23, 2018 at 02:48:31PM +0530, Sharat Masetty wrote:
> Implement routines to estimate GPU busy time and fetching the
> current frequency for the polling interval. This is required by
> the devfreq framework which recommends a frequency change if needed.
> The driver code then tries to set this new frequency on the GPU by
> sending an Out Of Band(OOB) request.

"sending an Out of Band (OOB) request _to the GMU_". Otherwise it is a little
confusing as to who is doing what.

> 
> Signed-off-by: Sharat Masetty <smasetty@codeaurora.org>
> ---
>  drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 46 +++++++++++++++++++++++++++++++----
>  drivers/gpu/drm/msm/adreno/a6xx_gmu.h |  2 ++
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 27 ++++++++++++++++++++
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h |  2 ++
>  4 files changed, 72 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> index f6634c0..92ff48b 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> @@ -67,8 +67,10 @@ static bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
>  		A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
>  }
>  
> -static int a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
> +static int __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
>  {
> +	int ret;

Should be a u32 since we are doing a gmu_read().

> +
>  	gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);
>  
>  	gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING,
> @@ -84,7 +86,41 @@ static int a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
>  	a6xx_gmu_set_oob(gmu, GMU_OOB_DCVS_SET);
>  	a6xx_gmu_clear_oob(gmu, GMU_OOB_DCVS_SET);
>  
> -	return gmu_read(gmu, REG_A6XX_GMU_DCVS_RETURN);
> +	ret = gmu_read(gmu, REG_A6XX_GMU_DCVS_RETURN);
> +	if (!ret)
> +		gmu->cur_freq = gmu->gpu_freqs[index];
> +

'ret' from the register read won't be an appropriate Unix error message so it
should be translated - otherwise it will be confusing because
'a6xx_gmu_set_freq' otherwise returns 0 or valid error messages.

> +	return ret;
> +}
> +
> +int a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
> +{
> +	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> +	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
> +	u32 perf_index = 0;
> +
> +	if (freq == gmu->cur_freq)
> +		return 0;
> +
> +	//TODO: Use a hashmap instead? This gets called potentially every ~10 ms

Please don't use C++ style comments.  A TODO is okay, but I would prefer if you
solved this question.  I'm not sure if walking a short list of 10 items is a big
concern if it happens every 10ms or so.

> +	for (perf_index = 0; perf_index < gmu->nr_gpu_freqs; perf_index++)
> +		if (freq == gmu->gpu_freqs[perf_index])
> +			break;

Are you positive we don't need to worry about rounding here - will devfreq
*always* give you an exact frequency value? I  know the clock subsystem allows
for rounding.  You might want to double check just to be sure that we don't need
to worry about that here.

In particular, I would be concerned about the userspace governor for devfreq
where the user can set anything they want.  I'm not 100% sure that gets vetted
against the OPP table before we get to this point.

> +	if (perf_index == gmu->nr_gpu_freqs)
> +		return -EINVAL;

Related to the previous comment slightly, if devfreq wants to set a frequency of
a hundred million HZ is it an error or should we just clamp to the highest
available frequency and call it good?

> +	return  __a6xx_gmu_set_freq(gmu, perf_index);
> +}
> +
> +unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
> +{
> +	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> +	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
> +
> +	return  gmu->cur_freq;
>  }
>  
>  static bool a6xx_gmu_check_idle_level(struct a6xx_gmu *gmu)
> @@ -629,8 +665,8 @@ int a6xx_gmu_reset(struct a6xx_gpu *a6xx_gpu)
>  	if (!ret)
>  		ret = a6xx_hfi_start(gmu, GMU_COLD_BOOT);
>  
> -	/* Set the GPU back to the highest power frequency */
> -	a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1);
> +	/* Save the current frequency for devfreq */
> +	gmu->cur_freq = gmu->gpu_freqs[gmu->nr_gpu_freqs - 1];

I'm not sure I understand this change - don't we need to set the frequency
GPU immediately out of reset even if DCVS is expected to change it soon?

>  
>  out:
>  	if (ret)
> @@ -671,7 +707,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
>  	ret = a6xx_hfi_start(gmu, status);
>  
>  	/* Set the GPU to the highest power frequency */
> -	a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1);
> +	__a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1);
>  
>  out:
>  	/* Make sure to turn off the boot OOB request on error */
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
> index f9e4dfe..ce6e5ca 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
> @@ -77,6 +77,8 @@ struct a6xx_gmu {
>  	unsigned long gmu_freqs[4];
>  	u32 cx_arc_votes[4];
>  
> +	unsigned long cur_freq;
> +

This could just be 'freq'.

>  	struct a6xx_hfi_queue queues[2];
>  
>  	struct tasklet_struct hfi_tasklet;
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 3429d33a..af90706 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -7,6 +7,8 @@
>  #include "a6xx_gpu.h"
>  #include "a6xx_gmu.xml.h"
>  
> +#include <linux/devfreq.h>
> +
>  static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
>  {
>  	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> @@ -682,6 +684,8 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
>  
>  	gpu->needs_hw_init = true;
>  
> +	msm_gpu_resume_devfreq(gpu);
> +
>  	return ret;
>  }
>  
> @@ -690,6 +694,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
>  	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>  	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
>  
> +	devfreq_suspend_device(gpu->devfreq.devfreq);
> +
>  	/*
>  	 * Make sure the GMU is idle before continuing (because some transitions
>  	 * may use VBIF
> @@ -753,6 +759,24 @@ static void a6xx_destroy(struct msm_gpu *gpu)
>  	kfree(a6xx_gpu);
>  }
>  
> +static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
> +{
> +	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> +	u64 busy_cycles;
> +	unsigned long busy_time;
> +
> +	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
> +			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
> +			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
> +
> +	busy_time = ((busy_cycles - gpu->devfreq.busy_cycles) * 10) / 192;
> +
> +	gpu->devfreq.busy_cycles = busy_cycles;
> +
> +	return busy_time;
> +}
> +
>  static const struct adreno_gpu_funcs funcs = {
>  	.base = {
>  		.get_param = adreno_get_param,
> @@ -768,6 +792,9 @@ static void a6xx_destroy(struct msm_gpu *gpu)
>  #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
>  		.show = a6xx_show,
>  #endif
> +		.gpu_busy = a6xx_gpu_busy,
> +		.gpu_get_freq = a6xx_gmu_get_freq,
> +		.gpu_set_freq = a6xx_gmu_set_freq,
>  	},
>  	.get_timestamp = a6xx_get_timestamp,
>  };
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> index 32c2501..f236767 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> @@ -56,5 +56,7 @@ struct a6xx_gpu {
>  
>  int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node, struct platform_device *gpu_pdev);
>  void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu);
> +int a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq);
> +unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu);
>  
>  #endif /* __A6XX_GPU_H__ */
> -- 
> 1.9.1
>
Sharat Masetty Aug. 24, 2018, 9:54 a.m. UTC | #2
On 8/23/2018 9:30 PM, Jordan Crouse wrote:
> On Thu, Aug 23, 2018 at 02:48:31PM +0530, Sharat Masetty wrote:
>> Implement routines to estimate GPU busy time and fetching the
>> current frequency for the polling interval. This is required by
>> the devfreq framework which recommends a frequency change if needed.
>> The driver code then tries to set this new frequency on the GPU by
>> sending an Out Of Band(OOB) request.
> 
> "sending an Out of Band (OOB) request _to the GMU_". Otherwise it is a little
> confusing as to who is doing what.
> 
>>
>> Signed-off-by: Sharat Masetty <smasetty@codeaurora.org>
>> ---
>>   drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 46 +++++++++++++++++++++++++++++++----
>>   drivers/gpu/drm/msm/adreno/a6xx_gmu.h |  2 ++
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 27 ++++++++++++++++++++
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.h |  2 ++
>>   4 files changed, 72 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
>> index f6634c0..92ff48b 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
>> @@ -67,8 +67,10 @@ static bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
>>   		A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
>>   }
>>   
>> -static int a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
>> +static int __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
>>   {
>> +	int ret;
> 
> Should be a u32 since we are doing a gmu_read().
> 
>> +
>>   	gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);
>>   
>>   	gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING,
>> @@ -84,7 +86,41 @@ static int a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
>>   	a6xx_gmu_set_oob(gmu, GMU_OOB_DCVS_SET);
>>   	a6xx_gmu_clear_oob(gmu, GMU_OOB_DCVS_SET);
>>   
>> -	return gmu_read(gmu, REG_A6XX_GMU_DCVS_RETURN);
>> +	ret = gmu_read(gmu, REG_A6XX_GMU_DCVS_RETURN);
>> +	if (!ret)
>> +		gmu->cur_freq = gmu->gpu_freqs[index];
>> +
> 
> 'ret' from the register read won't be an appropriate Unix error message so it
> should be translated - otherwise it will be confusing because
> 'a6xx_gmu_set_freq' otherwise returns 0 or valid error messages.
> 
>> +	return ret;
>> +}
>> +
>> +int a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
>> +{
>> +	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>> +	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
>> +	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
>> +	u32 perf_index = 0;
>> +
>> +	if (freq == gmu->cur_freq)
>> +		return 0;
>> +
>> +	//TODO: Use a hashmap instead? This gets called potentially every ~10 ms
> 
> Please don't use C++ style comments.  A TODO is okay, but I would prefer if you
> solved this question.  I'm not sure if walking a short list of 10 items is a big
> concern if it happens every 10ms or so.
Sure, I will take care of this...
> 
>> +	for (perf_index = 0; perf_index < gmu->nr_gpu_freqs; perf_index++)
>> +		if (freq == gmu->gpu_freqs[perf_index])
>> +			break;
> 
> Are you positive we don't need to worry about rounding here - will devfreq
> *always* give you an exact frequency value? I  know the clock subsystem allows
> for rounding.  You might want to double check just to be sure that we don't need
> to worry about that here.
> 
> In particular, I would be concerned about the userspace governor for devfreq
> where the user can set anything they want.  I'm not 100% sure that gets vetted
> against the OPP table before we get to this point.
> 
>> +	if (perf_index == gmu->nr_gpu_freqs)
>> +		return -EINVAL;
> 
> Related to the previous comment slightly, if devfreq wants to set a frequency of
> a hundred million HZ is it an error or should we just clamp to the highest
> available frequency and call it good?
For this and the comment above, we use the devfreq_recommended_opp() 
function to get a proper OPP from our OPP list in the dt for the GPU 
device.
> 
>> +	return  __a6xx_gmu_set_freq(gmu, perf_index);
>> +}
>> +
>> +unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
>> +{
>> +	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>> +	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
>> +	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
>> +
>> +	return  gmu->cur_freq;
>>   }
>>   
>>   static bool a6xx_gmu_check_idle_level(struct a6xx_gmu *gmu)
>> @@ -629,8 +665,8 @@ int a6xx_gmu_reset(struct a6xx_gpu *a6xx_gpu)
>>   	if (!ret)
>>   		ret = a6xx_hfi_start(gmu, GMU_COLD_BOOT);
>>   
>> -	/* Set the GPU back to the highest power frequency */
>> -	a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1);
>> +	/* Save the current frequency for devfreq */
>> +	gmu->cur_freq = gmu->gpu_freqs[gmu->nr_gpu_freqs - 1];
> 
> I'm not sure I understand this change - don't we need to set the frequency
> GPU immediately out of reset even if DCVS is expected to change it soon?
Oops, this somehow slipped my attention and should not be here. I will 
revert this. Thanks for the catch.
> 
>>   
>>   out:
>>   	if (ret)
>> @@ -671,7 +707,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
>>   	ret = a6xx_hfi_start(gmu, status);
>>   
>>   	/* Set the GPU to the highest power frequency */
>> -	a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1);
>> +	__a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1);
>>   
>>   out:
>>   	/* Make sure to turn off the boot OOB request on error */
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
>> index f9e4dfe..ce6e5ca 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
>> @@ -77,6 +77,8 @@ struct a6xx_gmu {
>>   	unsigned long gmu_freqs[4];
>>   	u32 cx_arc_votes[4];
>>   
>> +	unsigned long cur_freq;
>> +
> 
> This could just be 'freq'.
> 
>>   	struct a6xx_hfi_queue queues[2];
>>   
>>   	struct tasklet_struct hfi_tasklet;
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> index 3429d33a..af90706 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> @@ -7,6 +7,8 @@
>>   #include "a6xx_gpu.h"
>>   #include "a6xx_gmu.xml.h"
>>   
>> +#include <linux/devfreq.h>
>> +
>>   static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
>>   {
>>   	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>> @@ -682,6 +684,8 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
>>   
>>   	gpu->needs_hw_init = true;
>>   
>> +	msm_gpu_resume_devfreq(gpu);
>> +
>>   	return ret;
>>   }
>>   
>> @@ -690,6 +694,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
>>   	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>>   	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
>>   
>> +	devfreq_suspend_device(gpu->devfreq.devfreq);
>> +
>>   	/*
>>   	 * Make sure the GMU is idle before continuing (because some transitions
>>   	 * may use VBIF
>> @@ -753,6 +759,24 @@ static void a6xx_destroy(struct msm_gpu *gpu)
>>   	kfree(a6xx_gpu);
>>   }
>>   
>> +static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
>> +{
>> +	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>> +	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
>> +	u64 busy_cycles;
>> +	unsigned long busy_time;
>> +
>> +	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
>> +			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
>> +			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
>> +
>> +	busy_time = ((busy_cycles - gpu->devfreq.busy_cycles) * 10) / 192;
>> +
>> +	gpu->devfreq.busy_cycles = busy_cycles;
>> +
>> +	return busy_time;
>> +}
>> +
>>   static const struct adreno_gpu_funcs funcs = {
>>   	.base = {
>>   		.get_param = adreno_get_param,
>> @@ -768,6 +792,9 @@ static void a6xx_destroy(struct msm_gpu *gpu)
>>   #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
>>   		.show = a6xx_show,
>>   #endif
>> +		.gpu_busy = a6xx_gpu_busy,
>> +		.gpu_get_freq = a6xx_gmu_get_freq,
>> +		.gpu_set_freq = a6xx_gmu_set_freq,
>>   	},
>>   	.get_timestamp = a6xx_get_timestamp,
>>   };
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> index 32c2501..f236767 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> @@ -56,5 +56,7 @@ struct a6xx_gpu {
>>   
>>   int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node, struct platform_device *gpu_pdev);
>>   void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu);
>> +int a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq);
>> +unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu);
>>   
>>   #endif /* __A6XX_GPU_H__ */
>> -- 
>> 1.9.1
>>
>
Jordan Crouse Aug. 24, 2018, 2:45 p.m. UTC | #3
On Fri, Aug 24, 2018 at 03:24:04PM +0530, Sharat Masetty wrote:
> 
> 
> On 8/23/2018 9:30 PM, Jordan Crouse wrote:
> >On Thu, Aug 23, 2018 at 02:48:31PM +0530, Sharat Masetty wrote:
> >>Implement routines to estimate GPU busy time and fetching the
> >>current frequency for the polling interval. This is required by
> >>the devfreq framework which recommends a frequency change if needed.
> >>The driver code then tries to set this new frequency on the GPU by
> >>sending an Out Of Band(OOB) request.
> >
> >"sending an Out of Band (OOB) request _to the GMU_". Otherwise it is a little
> >confusing as to who is doing what.
> >
> >>
> >>Signed-off-by: Sharat Masetty <smasetty@codeaurora.org>
> >>---
> >>  drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 46 +++++++++++++++++++++++++++++++----
> >>  drivers/gpu/drm/msm/adreno/a6xx_gmu.h |  2 ++
> >>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 27 ++++++++++++++++++++
> >>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h |  2 ++
> >>  4 files changed, 72 insertions(+), 5 deletions(-)
> >>
> >>diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> >>index f6634c0..92ff48b 100644
> >>--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> >>+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> >>@@ -67,8 +67,10 @@ static bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
> >>  		A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
> >>  }
> >>-static int a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
> >>+static int __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
> >>  {
> >>+	int ret;
> >
> >Should be a u32 since we are doing a gmu_read().
> >
> >>+
> >>  	gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);
> >>  	gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING,
> >>@@ -84,7 +86,41 @@ static int a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
> >>  	a6xx_gmu_set_oob(gmu, GMU_OOB_DCVS_SET);
> >>  	a6xx_gmu_clear_oob(gmu, GMU_OOB_DCVS_SET);
> >>-	return gmu_read(gmu, REG_A6XX_GMU_DCVS_RETURN);
> >>+	ret = gmu_read(gmu, REG_A6XX_GMU_DCVS_RETURN);
> >>+	if (!ret)
> >>+		gmu->cur_freq = gmu->gpu_freqs[index];
> >>+
> >
> >'ret' from the register read won't be an appropriate Unix error message so it
> >should be translated - otherwise it will be confusing because
> >'a6xx_gmu_set_freq' otherwise returns 0 or valid error messages.
> >
> >>+	return ret;
> >>+}
> >>+
> >>+int a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
> >>+{
> >>+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> >>+	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> >>+	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
> >>+	u32 perf_index = 0;
> >>+
> >>+	if (freq == gmu->cur_freq)
> >>+		return 0;
> >>+
> >>+	//TODO: Use a hashmap instead? This gets called potentially every ~10 ms
> >
> >Please don't use C++ style comments.  A TODO is okay, but I would prefer if you
> >solved this question.  I'm not sure if walking a short list of 10 items is a big
> >concern if it happens every 10ms or so.
> Sure, I will take care of this...
> >
> >>+	for (perf_index = 0; perf_index < gmu->nr_gpu_freqs; perf_index++)
> >>+		if (freq == gmu->gpu_freqs[perf_index])
> >>+			break;
> >
> >Are you positive we don't need to worry about rounding here - will devfreq
> >*always* give you an exact frequency value? I  know the clock subsystem allows
> >for rounding.  You might want to double check just to be sure that we don't need
> >to worry about that here.
> >
> >In particular, I would be concerned about the userspace governor for devfreq
> >where the user can set anything they want.  I'm not 100% sure that gets vetted
> >against the OPP table before we get to this point.
> >
> >>+	if (perf_index == gmu->nr_gpu_freqs)
> >>+		return -EINVAL;
> >
> >Related to the previous comment slightly, if devfreq wants to set a frequency of
> >a hundred million HZ is it an error or should we just clamp to the highest
> >available frequency and call it good?
> For this and the comment above, we use the devfreq_recommended_opp()
> function to get a proper OPP from our OPP list in the dt for the GPU
> device.

So if we are sure the incoming frequency is always valid then perf_index will
always match and we know this if statement will never be true. So we should
get rid of it.

If you are paranoid about the list being wrong you could do change the for loop
so that it always defaulted to the highest priority and then remove the if
statement:

-	for (perf_index = 0; perf_index < gmu->nr_gpu_freqs; perf_index++)
+	for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++)

Jordan
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index f6634c0..92ff48b 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -67,8 +67,10 @@  static bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
 		A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
 }
 
-static int a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
+static int __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
 {
+	int ret;
+
 	gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);
 
 	gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING,
@@ -84,7 +86,41 @@  static int a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
 	a6xx_gmu_set_oob(gmu, GMU_OOB_DCVS_SET);
 	a6xx_gmu_clear_oob(gmu, GMU_OOB_DCVS_SET);
 
-	return gmu_read(gmu, REG_A6XX_GMU_DCVS_RETURN);
+	ret = gmu_read(gmu, REG_A6XX_GMU_DCVS_RETURN);
+	if (!ret)
+		gmu->cur_freq = gmu->gpu_freqs[index];
+
+	return ret;
+}
+
+int a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+	u32 perf_index = 0;
+
+	if (freq == gmu->cur_freq)
+		return 0;
+
+	//TODO: Use a hashmap instead? This gets called potentially every ~10 ms
+	for (perf_index = 0; perf_index < gmu->nr_gpu_freqs; perf_index++)
+		if (freq == gmu->gpu_freqs[perf_index])
+			break;
+
+	if (perf_index == gmu->nr_gpu_freqs)
+		return -EINVAL;
+
+	return  __a6xx_gmu_set_freq(gmu, perf_index);
+}
+
+unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+
+	return  gmu->cur_freq;
 }
 
 static bool a6xx_gmu_check_idle_level(struct a6xx_gmu *gmu)
@@ -629,8 +665,8 @@  int a6xx_gmu_reset(struct a6xx_gpu *a6xx_gpu)
 	if (!ret)
 		ret = a6xx_hfi_start(gmu, GMU_COLD_BOOT);
 
-	/* Set the GPU back to the highest power frequency */
-	a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1);
+	/* Save the current frequency for devfreq */
+	gmu->cur_freq = gmu->gpu_freqs[gmu->nr_gpu_freqs - 1];
 
 out:
 	if (ret)
@@ -671,7 +707,7 @@  int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
 	ret = a6xx_hfi_start(gmu, status);
 
 	/* Set the GPU to the highest power frequency */
-	a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1);
+	__a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1);
 
 out:
 	/* Make sure to turn off the boot OOB request on error */
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index f9e4dfe..ce6e5ca 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -77,6 +77,8 @@  struct a6xx_gmu {
 	unsigned long gmu_freqs[4];
 	u32 cx_arc_votes[4];
 
+	unsigned long cur_freq;
+
 	struct a6xx_hfi_queue queues[2];
 
 	struct tasklet_struct hfi_tasklet;
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 3429d33a..af90706 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -7,6 +7,8 @@ 
 #include "a6xx_gpu.h"
 #include "a6xx_gmu.xml.h"
 
+#include <linux/devfreq.h>
+
 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -682,6 +684,8 @@  static int a6xx_pm_resume(struct msm_gpu *gpu)
 
 	gpu->needs_hw_init = true;
 
+	msm_gpu_resume_devfreq(gpu);
+
 	return ret;
 }
 
@@ -690,6 +694,8 @@  static int a6xx_pm_suspend(struct msm_gpu *gpu)
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 
+	devfreq_suspend_device(gpu->devfreq.devfreq);
+
 	/*
 	 * Make sure the GMU is idle before continuing (because some transitions
 	 * may use VBIF
@@ -753,6 +759,24 @@  static void a6xx_destroy(struct msm_gpu *gpu)
 	kfree(a6xx_gpu);
 }
 
+static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+	u64 busy_cycles;
+	unsigned long busy_time;
+
+	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
+			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
+			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
+
+	busy_time = ((busy_cycles - gpu->devfreq.busy_cycles) * 10) / 192;
+
+	gpu->devfreq.busy_cycles = busy_cycles;
+
+	return busy_time;
+}
+
 static const struct adreno_gpu_funcs funcs = {
 	.base = {
 		.get_param = adreno_get_param,
@@ -768,6 +792,9 @@  static void a6xx_destroy(struct msm_gpu *gpu)
 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 		.show = a6xx_show,
 #endif
+		.gpu_busy = a6xx_gpu_busy,
+		.gpu_get_freq = a6xx_gmu_get_freq,
+		.gpu_set_freq = a6xx_gmu_set_freq,
 	},
 	.get_timestamp = a6xx_get_timestamp,
 };
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 32c2501..f236767 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -56,5 +56,7 @@  struct a6xx_gpu {
 
 int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node, struct platform_device *gpu_pdev);
 void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu);
+int a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq);
+unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu);
 
 #endif /* __A6XX_GPU_H__ */