diff mbox series

[v2,1/3] cppc_cpufreq: Return desired perf in ->get() if feedback counters are 0

Message ID 20240912072231.439332-2-zhanjie9@hisilicon.com (mailing list archive)
State New
Headers show
Series cppc_cpufreq: Rework ->get() error handling when cores are idle | expand

Commit Message

Jie Zhan Sept. 12, 2024, 7:22 a.m. UTC
The CPPC performance feedback counters could return 0 when the target cpu
is in a deep idle state, e.g. powered off, and those counters are not
powered.  In this case, cppc_cpufreq_get_rate() returns 0, and hence,
cpufreq_online() gets a false error and doesn't generate a cpufreq policy,
which happens in cpufreq_add_dev() when a new cpu device is added.

Don't take it as an error and return the frequency corresponding to the
desired perf when the feedback counters are 0.

Fixes: 6a4fec4f6d30 ("cpufreq: cppc: cppc_cpufreq_get_rate() returns zero in all error cases.")
Signed-off-by: Jie Zhan <zhanjie9@hisilicon.com>
Reviewed-by: Zeng Heng <zengheng4@huawei.com>
---
 drivers/cpufreq/cppc_cpufreq.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

Comments

Ionela Voinescu Sept. 12, 2024, 9:43 a.m. UTC | #1
Hi,

On Thursday 12 Sep 2024 at 15:22:29 (+0800), Jie Zhan wrote:
> The CPPC performance feedback counters could return 0 when the target cpu
> is in a deep idle state, e.g. powered off, and those counters are not
> powered.  In this case, cppc_cpufreq_get_rate() returns 0, and hence,
> cpufreq_online() gets a false error and doesn't generate a cpufreq policy,
> which happens in cpufreq_add_dev() when a new cpu device is added.
> 
> Don't take it as an error and return the frequency corresponding to the
> desired perf when the feedback counters are 0.
> 
> Fixes: 6a4fec4f6d30 ("cpufreq: cppc: cppc_cpufreq_get_rate() returns zero in all error cases.")
> Signed-off-by: Jie Zhan <zhanjie9@hisilicon.com>
> Reviewed-by: Zeng Heng <zengheng4@huawei.com>
> ---
>  drivers/cpufreq/cppc_cpufreq.c | 19 +++++++++++++++++--
>  1 file changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> index bafa32dd375d..6aa3af56924b 100644
> --- a/drivers/cpufreq/cppc_cpufreq.c
> +++ b/drivers/cpufreq/cppc_cpufreq.c
> @@ -748,18 +748,33 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
>  
>  	ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
>  	if (ret)
> -		return 0;
> +		goto out_err;
>  
>  	udelay(2); /* 2usec delay between sampling */
>  
>  	ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
>  	if (ret)
> -		return 0;
> +		goto out_err;
>  
>  	delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
>  					       &fb_ctrs_t1);
>  
>  	return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
> +
> +out_err:
> +	/*
> +	 * Feedback counters could be 0 when cores are powered down.
> +	 * Take desired perf for reflecting frequency in this case.
> +	 */
> +	if (ret == -EFAULT) {
> +		ret = cppc_get_desired_perf(cpu, &delivered_perf);
> +		if (ret)
> +			return 0;
> +
> +		return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
> +	}
> +
> +	return 0;
>  }

A possible (slimmer) alternative implementation for you to consider
(this merges patches 1 & 2):

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index bafa32dd375d..c16be9651a6f 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -118,6 +118,9 @@ static void cppc_scale_freq_workfn(struct kthread_work *work)

        perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs,
                                     &fb_ctrs);
+       if (!perf)
+               perf = cpu_data->perf_ctrls.desired_perf;
+
        cppc_fi->prev_perf_fb_ctrs = fb_ctrs;

        perf <<= SCHED_CAPACITY_SHIFT;
@@ -726,7 +729,7 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,

        /* Check to avoid divide-by zero and invalid delivered_perf */
        if (!delta_reference || !delta_delivered)
-               return cpu_data->perf_ctrls.desired_perf;
+               return 0;

        return (reference_perf * delta_delivered) / delta_reference;
 }
@@ -736,7 +739,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
        struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
        struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
        struct cppc_cpudata *cpu_data;
-       u64 delivered_perf;
+       u64 delivered_perf = 0;
        int ret;

        if (!policy)
@@ -747,19 +750,22 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
        cpufreq_cpu_put(policy);

        ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
-       if (ret)
-               return 0;
-
-       udelay(2); /* 2usec delay between sampling */
-
-       ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
-       if (ret)
-               return 0;
+       if (!ret) {
+               udelay(2); /* 2usec delay between sampling */
+               ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
+       }
+       if (!ret)
+               delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
+                                                      &fb_ctrs_t1);
+       if ((ret == -EFAULT) || !delivered_perf) {
+               if (cppc_get_desired_perf(cpu, &delivered_perf))
+                       delivered_perf = cpu_data->perf_ctrls.desired_perf;
+       }

-       delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
-                                              &fb_ctrs_t1);
+       if (delivered_perf)
+               return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);

-       return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
+       return 0;
 }

disclaimer: not fully checked so likely not "production ready" code :)

Hope it helps,
Ionela.

>  
>  static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
> -- 
> 2.33.0
>
Jie Zhan Sept. 13, 2024, 12:05 p.m. UTC | #2
Hi Ionela,

On 12/09/2024 17:43, Ionela Voinescu wrote:

...

> 
> A possible (slimmer) alternative implementation for you to consider
> (this merges patches 1 & 2):
> 
> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> index bafa32dd375d..c16be9651a6f 100644
> --- a/drivers/cpufreq/cppc_cpufreq.c
> +++ b/drivers/cpufreq/cppc_cpufreq.c
> @@ -118,6 +118,9 @@ static void cppc_scale_freq_workfn(struct kthread_work *work)
> 
>         perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs,
>                                      &fb_ctrs);
> +       if (!perf)
> +               perf = cpu_data->perf_ctrls.desired_perf;
> +

I think it's better to just return here.
If feedback counters are successfully read but unchanged, the following
calculation and update in cppc_scale_freq_workfn() is meaningless because it
won't change anything.

>         cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
> 
>         perf <<= SCHED_CAPACITY_SHIFT;
> @@ -726,7 +729,7 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
> 
>         /* Check to avoid divide-by zero and invalid delivered_perf */
>         if (!delta_reference || !delta_delivered)
> -               return cpu_data->perf_ctrls.desired_perf;
> +               return 0;

This makes sense to me.
Here is probably why Patch 2 looks bulky.

> 
>         return (reference_perf * delta_delivered) / delta_reference;
>  }
> @@ -736,7 +739,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
>         struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
>         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
>         struct cppc_cpudata *cpu_data;
> -       u64 delivered_perf;
> +       u64 delivered_perf = 0;
>         int ret;
> 
>         if (!policy)
> @@ -747,19 +750,22 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
>         cpufreq_cpu_put(policy);
> 
>         ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
> -       if (ret)
> -               return 0;
> -
> -       udelay(2); /* 2usec delay between sampling */
> -
> -       ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
> -       if (ret)
> -               return 0;
> +       if (!ret) {
> +               udelay(2); /* 2usec delay between sampling */
> +               ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
> +       }
> +       if (!ret)
> +               delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
> +                                                      &fb_ctrs_t1);

TBH, 'if (!ret)' style looks very strange to me.
We haven't done so anywhere in cppc_cpufreq, so let's keep consistency and make
it easier for people to read and maintain?

> +       if ((ret == -EFAULT) || !delivered_perf) {
> +               if (cppc_get_desired_perf(cpu, &delivered_perf))
> +                       delivered_perf = cpu_data->perf_ctrls.desired_perf;

will take this.

> +       }
> 
> -       delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
> -                                              &fb_ctrs_t1);
> +       if (delivered_perf)
> +               return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
> 
> -       return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
> +       return 0;
>  }
> 
> disclaimer: not fully checked so likely not "production ready" code :)
> 
> Hope it helps,
> Ionela.
> 
>>  
>>  static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
>> -- 
>> 2.33.0
>>
> 

How about this? merged patch 1 & 2 as well.

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index bafa32dd375d..411303f2e8cb 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -118,6 +118,9 @@ static void cppc_scale_freq_workfn(struct kthread_work *work)

          perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs,
                                       &fb_ctrs);
+       if (!perf)
+               return;
+
          cppc_fi->prev_perf_fb_ctrs = fb_ctrs;

          perf <<= SCHED_CAPACITY_SHIFT;
@@ -726,7 +729,7 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,

          /* Check to avoid divide-by zero and invalid delivered_perf */
          if (!delta_reference || !delta_delivered)
-               return cpu_data->perf_ctrls.desired_perf;
+               return 0;

          return (reference_perf * delta_delivered) / delta_reference;
   }
@@ -748,18 +751,32 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)

          ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
          if (ret)
-               return 0;
+               goto out_err;

          udelay(2); /* 2usec delay between sampling */

          ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
          if (ret)
-               return 0;
+               goto out_err;

          delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
                                                 &fb_ctrs_t1);

          return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
+
+out_err:
+       /*
+        * Feedback counters could be 0 when cores are powered down.
+        * Take desired perf for reflecting frequency in this case.
+        */
+       if (ret == -EFAULT) {
+               if (cppc_get_desired_perf(cpu, &delivered_perf))
+                       delivered_perf = cpu_data->perf_ctrls.desired_perf;
+
+               return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
+       }
+
+       return 0;
   }

   static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
---

Thanks indeed!
Jie
Ionela Voinescu Sept. 17, 2024, 10:36 a.m. UTC | #3
Hi,

On Friday 13 Sep 2024 at 20:05:50 (+0800), Jie Zhan wrote:
> 
> Hi Ionela,
> 
> On 12/09/2024 17:43, Ionela Voinescu wrote:
> 
> ...
> 
> > 
> > A possible (slimmer) alternative implementation for you to consider
> > (this merges patches 1 & 2):
> > 
> > diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> > index bafa32dd375d..c16be9651a6f 100644
> > --- a/drivers/cpufreq/cppc_cpufreq.c
> > +++ b/drivers/cpufreq/cppc_cpufreq.c
> > @@ -118,6 +118,9 @@ static void cppc_scale_freq_workfn(struct kthread_work *work)
> > 
> >         perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs,
> >                                      &fb_ctrs);
> > +       if (!perf)
> > +               perf = cpu_data->perf_ctrls.desired_perf;
> > +
> 
> I think it's better to just return here.
> If feedback counters are successfully read but unchanged, the following
> calculation and update in cppc_scale_freq_workfn() is meaningless because it
> won't change anything.

Agreed!

> 
> >         cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
> > 
> >         perf <<= SCHED_CAPACITY_SHIFT;
> > @@ -726,7 +729,7 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
> > 
> >         /* Check to avoid divide-by zero and invalid delivered_perf */
> >         if (!delta_reference || !delta_delivered)
> > -               return cpu_data->perf_ctrls.desired_perf;
> > +               return 0;
> 
> This makes sense to me.
> Here is probably why Patch 2 looks bulky.
> 
> > 
> >         return (reference_perf * delta_delivered) / delta_reference;
> >  }
> > @@ -736,7 +739,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
> >         struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
> >         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
> >         struct cppc_cpudata *cpu_data;
> > -       u64 delivered_perf;
> > +       u64 delivered_perf = 0;
> >         int ret;
> > 
> >         if (!policy)
> > @@ -747,19 +750,22 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
> >         cpufreq_cpu_put(policy);
> > 
> >         ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
> > -       if (ret)
> > -               return 0;
> > -
> > -       udelay(2); /* 2usec delay between sampling */
> > -
> > -       ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
> > -       if (ret)
> > -               return 0;
> > +       if (!ret) {
> > +               udelay(2); /* 2usec delay between sampling */
> > +               ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
> > +       }
> > +       if (!ret)
> > +               delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
> > +                                                      &fb_ctrs_t1);
> 
> TBH, 'if (!ret)' style looks very strange to me.
> We haven't done so anywhere in cppc_cpufreq, so let's keep consistency and make
> it easier for people to read and maintain?

I agree it's a bit of a difficult read, that's why I only sent my code
as a suggestion. I did like the benefit of not having to have two
different calls to cppc_perf_to_khz() and making the code below common
for the error and non-error paths. But it's up to you. 
> 
> > +       if ((ret == -EFAULT) || !delivered_perf) {
> > +               if (cppc_get_desired_perf(cpu, &delivered_perf))
> > +                       delivered_perf = cpu_data->perf_ctrls.desired_perf;
> 
> will take this.
> 
> > +       }
> > 
> > -       delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
> > -                                              &fb_ctrs_t1);
> > +       if (delivered_perf)
> > +               return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
> > 
> > -       return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
> > +       return 0;
> >  }
> > 
> > disclaimer: not fully checked so likely not "production ready" code :)
> > 
> > Hope it helps,
> > Ionela.
> > 
> >>  
> >>  static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
> >> -- 
> >> 2.33.0
> >>
> > 
> 
> How about this? merged patch 1 & 2 as well.
> 
> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> index bafa32dd375d..411303f2e8cb 100644
> --- a/drivers/cpufreq/cppc_cpufreq.c
> +++ b/drivers/cpufreq/cppc_cpufreq.c
> @@ -118,6 +118,9 @@ static void cppc_scale_freq_workfn(struct kthread_work *work)
> 
>           perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs,
>                                        &fb_ctrs);
> +       if (!perf)
> +               return;
> +
>           cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
> 
>           perf <<= SCHED_CAPACITY_SHIFT;
> @@ -726,7 +729,7 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
> 
>           /* Check to avoid divide-by zero and invalid delivered_perf */
>           if (!delta_reference || !delta_delivered)
> -               return cpu_data->perf_ctrls.desired_perf;
> +               return 0;
> 
>           return (reference_perf * delta_delivered) / delta_reference;
>    }
> @@ -748,18 +751,32 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
> 
>           ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
>           if (ret)
> -               return 0;
> +               goto out_err;
> 
>           udelay(2); /* 2usec delay between sampling */
> 
>           ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
>           if (ret)
> -               return 0;
> +               goto out_err;
> 
>           delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
>                                                  &fb_ctrs_t1);

You need a check here for !delivered_perf (when at least one of the
deltas is 0) in which case it would be good to take the same error path
below. Something like:

            if(delivered_perf)
	            return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
	    else
		ret = -EFAULT;

That's why I did the tricky if/else dance above as we need to take the
error path below for multiple cases.

Thanks,
Ionela.

> 
>           return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
> +
> +out_err:
> +       /*
> +        * Feedback counters could be 0 when cores are powered down.
> +        * Take desired perf for reflecting frequency in this case.
> +        */
> +       if (ret == -EFAULT) {
> +               if (cppc_get_desired_perf(cpu, &delivered_perf))
> +                       delivered_perf = cpu_data->perf_ctrls.desired_perf;
> +
> +               return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
> +       }
> +
> +       return 0;
>    }
> 
>    static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
> ---
> 
> Thanks indeed!
> Jie
Jie Zhan Sept. 18, 2024, 2:05 a.m. UTC | #4
On 17/09/2024 18:36, Ionela Voinescu wrote:

...

>>> @@ -747,19 +750,22 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
>>>         cpufreq_cpu_put(policy);
>>>
>>>         ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
>>> -       if (ret)
>>> -               return 0;
>>> -
>>> -       udelay(2); /* 2usec delay between sampling */
>>> -
>>> -       ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
>>> -       if (ret)
>>> -               return 0;
>>> +       if (!ret) {
>>> +               udelay(2); /* 2usec delay between sampling */
>>> +               ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
>>> +       }
>>> +       if (!ret)
>>> +               delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
>>> +                                                      &fb_ctrs_t1);
>>
>> TBH, 'if (!ret)' style looks very strange to me.
>> We haven't done so anywhere in cppc_cpufreq, so let's keep consistency and make
>> it easier for people to read and maintain?
> 
> I agree it's a bit of a difficult read, that's why I only sent my code
> as a suggestion. I did like the benefit of not having to have two
> different calls to cppc_perf_to_khz() and making the code below common
> for the error and non-error paths. But it's up to you. 

Yeah understood. I did try minimizing duplicate code, but ended up with either
duplicate 'get desired perf' stuff or duplicate cppc_perf_to_khz().

...
>>
>>           delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
>>                                                  &fb_ctrs_t1);
> 
> You need a check here for !delivered_perf (when at least one of the
> deltas is 0) in which case it would be good to take the same error path
> below. Something like:
> 
>             if(delivered_perf)
> 	            return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
> 	    else
> 		ret = -EFAULT;
> 
> That's why I did the tricky if/else dance above as we need to take the
> error path below for multiple cases.
> 
> Thanks,
> Ionela.
> 

Sure, thanks for reminding this.

...

How does this look? I think this should have the least duplicate code except for
two cppc_perf_to_khz() calls, while keeping the logic easy to follow.

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index bafa32dd375d..6070444ed098 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -118,6 +118,9 @@ static void cppc_scale_freq_workfn(struct kthread_work *work)
 
        perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs,
                                     &fb_ctrs);
+       if (!perf)
+               return;
+
        cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
 
        perf <<= SCHED_CAPACITY_SHIFT;
@@ -726,11 +729,27 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
 
        /* Check to avoid divide-by zero and invalid delivered_perf */
        if (!delta_reference || !delta_delivered)
-               return cpu_data->perf_ctrls.desired_perf;
+               return 0;
 
        return (reference_perf * delta_delivered) / delta_reference;
 }
 
+static int cppc_get_perf_ctrs_sample(int cpu,
+                                    struct cppc_perf_fb_ctrs *fb_ctrs_t0,
+                                    struct cppc_perf_fb_ctrs *fb_ctrs_t1)
+{
+       int ret;
+
+       ret = cppc_get_perf_ctrs(cpu, fb_ctrs_t0);
+       if (ret)
+               return ret;
+
+       udelay(2); /* 2usec delay between sampling */
+
+       ret = cppc_get_perf_ctrs(cpu, fb_ctrs_t1);
+       return ret;
+}
+
 static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
 {
        struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
@@ -746,20 +765,30 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
 
        cpufreq_cpu_put(policy);
 
-       ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
-       if (ret)
-               return 0;
-
-       udelay(2); /* 2usec delay between sampling */
-
-       ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
-       if (ret)
-               return 0;
+       ret = cppc_get_perf_ctrs_sample(cpu, &fb_ctrs_t0, &fb_ctrs_t1);
+       if (ret) {
+               if (ret == -EFAULT)
+                       goto out_invalid_counters;
+               else
+                       return 0;
+       }
 
        delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
                                               &fb_ctrs_t1);
+       if (!delivered_perf)
+               goto out_invalid_counters;
 
        return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
+
+out_invalid_counters:
+       /*
+        * Feedback counters could be unchanged or 0 when a cpu enters a
+        * low-power idle state, e.g. clock-gated or power-gated.
+        * Get the lastest or cached desired perf for reflecting frequency.
+        */
+       if (cppc_get_desired_perf(cpu, &delivered_perf))
+               delivered_perf = cpu_data->perf_ctrls.desired_perf;
+       return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
 }
 
 static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)


Thanks!
Jie
diff mbox series

Patch

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index bafa32dd375d..6aa3af56924b 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -748,18 +748,33 @@  static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
 
 	ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
 	if (ret)
-		return 0;
+		goto out_err;
 
 	udelay(2); /* 2usec delay between sampling */
 
 	ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
 	if (ret)
-		return 0;
+		goto out_err;
 
 	delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
 					       &fb_ctrs_t1);
 
 	return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
+
+out_err:
+	/*
+	 * Feedback counters could be 0 when cores are powered down.
+	 * Take desired perf for reflecting frequency in this case.
+	 */
+	if (ret == -EFAULT) {
+		ret = cppc_get_desired_perf(cpu, &delivered_perf);
+		if (ret)
+			return 0;
+
+		return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
+	}
+
+	return 0;
 }
 
 static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)