diff mbox series

[09/11] cpufreq: amd-pstate: implement heterogeneous core topology for highest performance initialization

Message ID 731a28ea8dda4ca1db64f673c87770de4646290b.1715065568.git.perry.yuan@amd.com (mailing list archive)
State Superseded, archived
Headers show
Series AMD Pstate Driver Fixes and Improvements | expand

Commit Message

Yuan, Perry May 7, 2024, 7:15 a.m. UTC
Introduces an optimization to the AMD-Pstate driver by implementing
a heterogeneous core topology for the initialization of the highest
performance value while driver loading.
There are two type cores designed including performance core and
efficiency Core. each core type has different highest performance value
and highest frequency initialized by power firmware, `amd_pstate` driver
need to identify the core types and set correct highest perf value.

X86_FEATURE_HETERO_CORE_TOPOLOGY is used to identify whether the
processor support heterogeneous core type by reading CPUID leaf
Fn_0x80000026_EAX and bit 30. if the bit is set as one, then amd_pstate
driver will check EBX 30:28 bits to get the core type.

Value Description:
0h Performance Core.
1h Efficiency Core.

https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf
PDF p274
Signed-off-by: Perry Yuan <perry.yuan@amd.com>
---
 arch/x86/include/asm/processor.h |  2 ++
 arch/x86/kernel/cpu/amd.c        | 19 ++++++++++
 drivers/cpufreq/amd-pstate.c     | 62 ++++++++++++++++++++++++--------
 include/linux/amd-pstate.h       |  8 +++++
 4 files changed, 77 insertions(+), 14 deletions(-)

Comments

Mario Limonciello May 7, 2024, 3:19 p.m. UTC | #1
On 5/7/2024 02:15, Perry Yuan wrote:
> Introduces an optimization to the AMD-Pstate driver by implementing
> a heterogeneous core topology for the initialization of the highest
> performance value while driver loading.
> There are two type cores designed including performance core and
> efficiency Core. each core type has different highest performance value

Capitalize the "E" in each.

> and highest frequency initialized by power firmware, `amd_pstate` driver

Three things:

1. Rather than "power firmware" you should just say "platform".
2. I would use "configured" instead of "initialized"
3. This is a run on sentence.

Here's my proposed change.

Each core type has different highest performance and frequency values 
configured by the platform.  The `amd_pstate` driver needs to identify
the type of core to correctly set an appropriate highest perf value.

> need to identify the core types and set correct highest perf value.
> 
> X86_FEATURE_HETERO_CORE_TOPOLOGY is used to identify whether the
> processor support heterogeneous core type by reading CPUID leaf
> Fn_0x80000026_EAX and bit 30. if the bit is set as one, then amd_pstate
> driver will check EBX 30:28 bits to get the core type.
> 
> Value Description:
> 0h Performance Core.
> 1h Efficiency Core.
> 
> https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf

Use the word "Link:" to prefix this link.

> PDF p274
> Signed-off-by: Perry Yuan <perry.yuan@amd.com>
> ---
>   arch/x86/include/asm/processor.h |  2 ++
>   arch/x86/kernel/cpu/amd.c        | 19 ++++++++++
>   drivers/cpufreq/amd-pstate.c     | 62 ++++++++++++++++++++++++--------
>   include/linux/amd-pstate.h       |  8 +++++
>   4 files changed, 77 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 811548f131f4..30d1900bb7e0 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -683,10 +683,12 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu)
>   extern u32 amd_get_highest_perf(void);
>   extern void amd_clear_divider(void);
>   extern void amd_check_microcode(void);
> +extern int amd_get_this_core_type(void);
>   #else
>   static inline u32 amd_get_highest_perf(void)		{ return 0; }
>   static inline void amd_clear_divider(void)		{ }
>   static inline void amd_check_microcode(void)		{ }
> +static inline int amd_get_this_core_type(void)		{ }
>   #endif
>   
>   extern unsigned long arch_align_stack(unsigned long sp);
> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index 307302af0aee..67966bdcde65 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -1219,3 +1219,22 @@ void noinstr amd_clear_divider(void)
>   		     :: "a" (0), "d" (0), "r" (1));
>   }
>   EXPORT_SYMBOL_GPL(amd_clear_divider);
> +
> +#define X86_CPU_TYPE_ID_SHIFT	28
> +
> +/**
> + * amd_get_this_core_type - Get the type of this heterogeneous CPU
> + *
> + * Returns the CPU type [31:28] (i.e., performance or efficient) of
> + * a CPU in the processor.
> + * If the processor has no core type support, returns -1.
> + */
> +
> +int amd_get_this_core_type(void)
> +{
> +	if (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY))
> +		return -1;
> +
> +	return cpuid_ebx(0x80000026) >> X86_CPU_TYPE_ID_SHIFT;
> +}
> +EXPORT_SYMBOL_GPL(amd_get_this_core_type);
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 7145248b38ec..7fe8a8fc6227 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -50,7 +50,9 @@
>   
>   #define AMD_PSTATE_TRANSITION_LATENCY	20000
>   #define AMD_PSTATE_TRANSITION_DELAY	1000
> -#define AMD_PSTATE_PREFCORE_THRESHOLD	166
> +#define CPPC_HIGHEST_PERF_EFFICIENT		132
> +#define CPPC_HIGHEST_PERF_PERFORMANCE		196
> +#define CPPC_HIGHEST_PERF_DEFAULT		166
>   
>   /*
>    * TODO: We need more time to fine tune processors with shared memory solution
> @@ -326,6 +328,49 @@ static inline int amd_pstate_enable(bool enable)
>   	return static_call(amd_pstate_enable)(enable);
>   }
>   
> +static void get_this_core_type(void *data)
> +{
> +	int *cpu_type = data;
> +
> +	*cpu_type = amd_get_this_core_type();
> +}

Does this really need a static function calling a static function just 
to set a variable?

I would think that you can simplify it something like this:

int amd_get_core_type(void *data)
{
	int *type = data;

	if (!type)
		return -EINVAL;
	if (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY)) {
		*type = -1;
		return -ENODEV;
	}
	*type = cpuid_ebx(0x80000026) >> X86_CPU_TYPE_ID_SHIFT;
}

And then amd_pstate_get_cpu_type() can call:

smp_call_function_single(cpu, amd_get_core_type, &cpu_type, 1);

> +
> +static int amd_pstate_get_cpu_type(int cpu)
> +{
> +	int cpu_type = 0;
> +
> +	smp_call_function_single(cpu, get_this_core_type, &cpu_type, 1);
> +
> +	return cpu_type;
> +}
> +
> +static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
> +{
> +	u32 highest_perf;
> +	int core_type;
> +
> +	core_type = amd_pstate_get_cpu_type(cpudata->cpu);
> +	pr_debug("core_type %d found\n", core_type);
> +
> +	switch (core_type) {
> +	case CPU_CORE_TYPE_NO_HETERO_SUP:
> +		highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
> +		break;
> +	case CPU_CORE_TYPE_PERFORMANCE:
> +		highest_perf = CPPC_HIGHEST_PERF_PERFORMANCE;
> +		break;
> +	case CPU_CORE_TYPE_EFFICIENCY:
> +		highest_perf = CPPC_HIGHEST_PERF_EFFICIENT;
> +		break;
> +	default:
> +		highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
> +		WARN_ONCE(true, "WARNING: Undefined core type found");
> +		break;
> +	}
> +
> +    return highest_perf;
> +}
> +
>   static int pstate_init_perf(struct amd_cpudata *cpudata)
>   {
>   	u64 cap1;
> @@ -336,15 +381,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
>   	if (ret)
>   		return ret;
>   
> -	/* For platforms that do not support the preferred core feature, the
> -	 * highest_pef may be configured with 166 or 255, to avoid max frequency
> -	 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
> -	 * the default max perf.
> -	 */
> -	if (cpudata->hw_prefcore)
> -		highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
> -	else
> -		highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
> +	highest_perf = amd_pstate_highest_perf_set(cpudata);

I think it would be a good idea to follow up later on validate the 
selections of this logic from amd-pstate-ut.c.

>   
>   	WRITE_ONCE(cpudata->highest_perf, highest_perf);
>   	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
> @@ -365,10 +402,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
>   	if (ret)
>   		return ret;
>   
> -	if (cpudata->hw_prefcore)
> -		highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
> -	else
> -		highest_perf = cppc_perf.highest_perf;
> +	highest_perf = amd_pstate_highest_perf_set(cpudata);
>   
>   	WRITE_ONCE(cpudata->highest_perf, highest_perf);
>   	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
> diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> index d58fc022ec46..869d916003f1 100644
> --- a/include/linux/amd-pstate.h
> +++ b/include/linux/amd-pstate.h
> @@ -134,4 +134,12 @@ struct quirk_entry {
>   	u32 lowest_freq;
>   };
>   
> +/* defined by CPUID_Fn80000026_EBX BIT [31:28] */
> +enum amd_core_type {
> +	CPU_CORE_TYPE_NO_HETERO_SUP = -1,
> +	CPU_CORE_TYPE_PERFORMANCE = 0,
> +	CPU_CORE_TYPE_EFFICIENCY = 1,
> +	CPU_CORE_TYPE_UNDEFINED = 2,
> +};
> +
>   #endif /* _LINUX_AMD_PSTATE_H */
kernel test robot May 7, 2024, 6:14 p.m. UTC | #2
Hi Perry,

kernel test robot noticed the following build warnings:

[auto build test WARNING on rafael-pm/linux-next]
[also build test WARNING on rafael-pm/bleeding-edge next-20240507]
[cannot apply to tip/x86/core linus/master v6.9-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Perry-Yuan/cpufreq-amd-pstate-optimiza-the-initial-frequency-values-verification/20240507-151930
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link:    https://lore.kernel.org/r/731a28ea8dda4ca1db64f673c87770de4646290b.1715065568.git.perry.yuan%40amd.com
patch subject: [PATCH 09/11] cpufreq: amd-pstate: implement heterogeneous core topology for highest performance initialization
config: x86_64-buildonly-randconfig-001-20240507 (https://download.01.org/0day-ci/archive/20240508/202405080122.4pEEcR2C-lkp@intel.com/config)
compiler: clang version 18.1.4 (https://github.com/llvm/llvm-project e6c3289804a67ea0bb6a86fadbe454dd93b8d855)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240508/202405080122.4pEEcR2C-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405080122.4pEEcR2C-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from arch/x86/kernel/asm-offsets.c:9:
   In file included from include/linux/crypto.h:15:
   In file included from include/linux/completion.h:12:
   In file included from include/linux/swait.h:7:
   In file included from include/linux/spinlock.h:60:
   In file included from include/linux/thread_info.h:60:
   In file included from arch/x86/include/asm/thread_info.h:59:
   In file included from arch/x86/include/asm/cpufeature.h:5:
>> arch/x86/include/asm/processor.h:690:51: warning: non-void function does not return a value [-Wreturn-type]
     690 | static inline int amd_get_this_core_type(void)          { }
         |                                                           ^
   1 warning generated.
--
   In file included from drivers/usb/dwc2/hcd_ddma.c:12:
   In file included from include/linux/module.h:13:
   In file included from include/linux/stat.h:19:
   In file included from include/linux/time.h:60:
   In file included from include/linux/time32.h:13:
   In file included from include/linux/timex.h:67:
   In file included from arch/x86/include/asm/timex.h:5:
>> arch/x86/include/asm/processor.h:690:51: warning: non-void function does not return a value [-Wreturn-type]
     690 | static inline int amd_get_this_core_type(void)          { }
         |                                                           ^
   drivers/usb/dwc2/hcd_ddma.c:555:16: warning: variable 'n_desc' set but not used [-Wunused-but-set-variable]
     555 |         u16 idx, inc, n_desc = 0, ntd_max = 0;
         |                       ^
   2 warnings generated.
--
   In file included from arch/x86/kernel/asm-offsets.c:9:
   In file included from include/linux/crypto.h:15:
   In file included from include/linux/completion.h:12:
   In file included from include/linux/swait.h:7:
   In file included from include/linux/spinlock.h:60:
   In file included from include/linux/thread_info.h:60:
   In file included from arch/x86/include/asm/thread_info.h:59:
   In file included from arch/x86/include/asm/cpufeature.h:5:
>> arch/x86/include/asm/processor.h:690:51: warning: non-void function does not return a value [-Wreturn-type]
     690 | static inline int amd_get_this_core_type(void)          { }
         |                                                           ^
   1 warning generated.


vim +690 arch/x86/include/asm/processor.h

   680	
   681	#ifdef CONFIG_CPU_SUP_AMD
   682	extern u32 amd_get_highest_perf(void);
   683	extern void amd_clear_divider(void);
   684	extern void amd_check_microcode(void);
   685	extern int amd_get_this_core_type(void);
   686	#else
   687	static inline u32 amd_get_highest_perf(void)		{ return 0; }
   688	static inline void amd_clear_divider(void)		{ }
   689	static inline void amd_check_microcode(void)		{ }
 > 690	static inline int amd_get_this_core_type(void)		{ }
   691	#endif
   692
kernel test robot May 7, 2024, 7:40 p.m. UTC | #3
Hi Perry,

kernel test robot noticed the following build warnings:

[auto build test WARNING on rafael-pm/linux-next]
[also build test WARNING on rafael-pm/bleeding-edge next-20240507]
[cannot apply to tip/x86/core linus/master v6.9-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Perry-Yuan/cpufreq-amd-pstate-optimiza-the-initial-frequency-values-verification/20240507-151930
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link:    https://lore.kernel.org/r/731a28ea8dda4ca1db64f673c87770de4646290b.1715065568.git.perry.yuan%40amd.com
patch subject: [PATCH 09/11] cpufreq: amd-pstate: implement heterogeneous core topology for highest performance initialization
config: i386-buildonly-randconfig-005-20240507 (https://download.01.org/0day-ci/archive/20240508/202405080340.n0nVlmfY-lkp@intel.com/config)
compiler: gcc-11 (Ubuntu 11.4.0-4ubuntu1) 11.4.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240508/202405080340.n0nVlmfY-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405080340.n0nVlmfY-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from arch/x86/include/asm/cpufeature.h:5,
                    from arch/x86/include/asm/thread_info.h:59,
                    from include/linux/thread_info.h:60,
                    from include/linux/spinlock.h:60,
                    from include/linux/swait.h:7,
                    from include/linux/completion.h:12,
                    from include/linux/crypto.h:15,
                    from arch/x86/kernel/asm-offsets.c:9:
   arch/x86/include/asm/processor.h: In function 'amd_get_this_core_type':
>> arch/x86/include/asm/processor.h:690:1: warning: no return statement in function returning non-void [-Wreturn-type]
     690 | static inline int amd_get_this_core_type(void)          { }
         | ^~~~~~
--
   In file included from arch/x86/include/asm/cpufeature.h:5,
                    from arch/x86/include/asm/thread_info.h:59,
                    from include/linux/thread_info.h:60,
                    from include/linux/spinlock.h:60,
                    from include/linux/swait.h:7,
                    from include/linux/completion.h:12,
                    from include/linux/crypto.h:15,
                    from arch/x86/kernel/asm-offsets.c:9:
   arch/x86/include/asm/processor.h: In function 'amd_get_this_core_type':
>> arch/x86/include/asm/processor.h:690:1: warning: no return statement in function returning non-void [-Wreturn-type]
     690 | static inline int amd_get_this_core_type(void)          { }
         | ^~~~~~


vim +690 arch/x86/include/asm/processor.h

   680	
   681	#ifdef CONFIG_CPU_SUP_AMD
   682	extern u32 amd_get_highest_perf(void);
   683	extern void amd_clear_divider(void);
   684	extern void amd_check_microcode(void);
   685	extern int amd_get_this_core_type(void);
   686	#else
   687	static inline u32 amd_get_highest_perf(void)		{ return 0; }
   688	static inline void amd_clear_divider(void)		{ }
   689	static inline void amd_check_microcode(void)		{ }
 > 690	static inline int amd_get_this_core_type(void)		{ }
   691	#endif
   692
Yuan, Perry May 9, 2024, 4:36 p.m. UTC | #4
[AMD Official Use Only - General]

 Hi Mario

> -----Original Message-----
> From: Limonciello, Mario <Mario.Limonciello@amd.com>
> Sent: Tuesday, May 7, 2024 11:19 PM
> To: Yuan, Perry <Perry.Yuan@amd.com>; rafael.j.wysocki@intel.com;
> viresh.kumar@linaro.org; Huang, Ray <Ray.Huang@amd.com>; Shenoy,
> Gautham Ranjal <gautham.shenoy@amd.com>; Petkov, Borislav
> <Borislav.Petkov@amd.com>
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Huang, Shimmer
> <Shimmer.Huang@amd.com>; Du, Xiaojian <Xiaojian.Du@amd.com>; Meng,
> Li (Jassmine) <Li.Meng@amd.com>; linux-pm@vger.kernel.org; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH 09/11] cpufreq: amd-pstate: implement heterogeneous
> core topology for highest performance initialization
>
> On 5/7/2024 02:15, Perry Yuan wrote:
> > Introduces an optimization to the AMD-Pstate driver by implementing a
> > heterogeneous core topology for the initialization of the highest
> > performance value while driver loading.
> > There are two type cores designed including performance core and
> > efficiency Core. each core type has different highest performance
> > value
>
> Capitalize the "E" in each.
>
> > and highest frequency initialized by power firmware, `amd_pstate`
> > driver
>
> Three things:
>
> 1. Rather than "power firmware" you should just say "platform".
> 2. I would use "configured" instead of "initialized"
> 3. This is a run on sentence.
>
> Here's my proposed change.
>
> Each core type has different highest performance and frequency values
> configured by the platform.  The `amd_pstate` driver needs to identify the
> type of core to correctly set an appropriate highest perf value.
>
> > need to identify the core types and set correct highest perf value.
> >
> > X86_FEATURE_HETERO_CORE_TOPOLOGY is used to identify whether the
> > processor support heterogeneous core type by reading CPUID leaf
> > Fn_0x80000026_EAX and bit 30. if the bit is set as one, then
> > amd_pstate driver will check EBX 30:28 bits to get the core type.
> >
> > Value Description:
> > 0h Performance Core.
> > 1h Efficiency Core.
> >
> > https://www.amd.com/content/dam/amd/en/documents/processor-
> tech-docs/p
> > rogrammer-references/24593.pdf
>
> Use the word "Link:" to prefix this link.
>
> > PDF p274
> > Signed-off-by: Perry Yuan <perry.yuan@amd.com>

Above comments have be addressed in V2.

> > ---
> >   arch/x86/include/asm/processor.h |  2 ++
> >   arch/x86/kernel/cpu/amd.c        | 19 ++++++++++
> >   drivers/cpufreq/amd-pstate.c     | 62 ++++++++++++++++++++++++-------
> -
> >   include/linux/amd-pstate.h       |  8 +++++
> >   4 files changed, 77 insertions(+), 14 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/processor.h
> > b/arch/x86/include/asm/processor.h
> > index 811548f131f4..30d1900bb7e0 100644
> > --- a/arch/x86/include/asm/processor.h
> > +++ b/arch/x86/include/asm/processor.h
> > @@ -683,10 +683,12 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu)
> >   extern u32 amd_get_highest_perf(void);
> >   extern void amd_clear_divider(void);
> >   extern void amd_check_microcode(void);
> > +extern int amd_get_this_core_type(void);
> >   #else
> >   static inline u32 amd_get_highest_perf(void)              { return 0; }
> >   static inline void amd_clear_divider(void)                { }
> >   static inline void amd_check_microcode(void)              { }
> > +static inline int amd_get_this_core_type(void)             { }
> >   #endif
> >
> >   extern unsigned long arch_align_stack(unsigned long sp); diff --git
> > a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index
> > 307302af0aee..67966bdcde65 100644
> > --- a/arch/x86/kernel/cpu/amd.c
> > +++ b/arch/x86/kernel/cpu/amd.c
> > @@ -1219,3 +1219,22 @@ void noinstr amd_clear_divider(void)
> >                  :: "a" (0), "d" (0), "r" (1));
> >   }
> >   EXPORT_SYMBOL_GPL(amd_clear_divider);
> > +
> > +#define X86_CPU_TYPE_ID_SHIFT      28
> > +
> > +/**
> > + * amd_get_this_core_type - Get the type of this heterogeneous CPU
> > + *
> > + * Returns the CPU type [31:28] (i.e., performance or efficient) of
> > + * a CPU in the processor.
> > + * If the processor has no core type support, returns -1.
> > + */
> > +
> > +int amd_get_this_core_type(void)
> > +{
> > +   if
> (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY))
> > +           return -1;
> > +
> > +   return cpuid_ebx(0x80000026) >> X86_CPU_TYPE_ID_SHIFT; }
> > +EXPORT_SYMBOL_GPL(amd_get_this_core_type);
> > diff --git a/drivers/cpufreq/amd-pstate.c
> > b/drivers/cpufreq/amd-pstate.c index 7145248b38ec..7fe8a8fc6227 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -50,7 +50,9 @@
> >
> >   #define AMD_PSTATE_TRANSITION_LATENCY     20000
> >   #define AMD_PSTATE_TRANSITION_DELAY       1000
> > -#define AMD_PSTATE_PREFCORE_THRESHOLD      166
> > +#define CPPC_HIGHEST_PERF_EFFICIENT                132
> > +#define CPPC_HIGHEST_PERF_PERFORMANCE              196
> > +#define CPPC_HIGHEST_PERF_DEFAULT          166
> >
> >   /*
> >    * TODO: We need more time to fine tune processors with shared
> > memory solution @@ -326,6 +328,49 @@ static inline int
> amd_pstate_enable(bool enable)
> >     return static_call(amd_pstate_enable)(enable);
> >   }
> >
> > +static void get_this_core_type(void *data) {
> > +   int *cpu_type = data;
> > +
> > +   *cpu_type = amd_get_this_core_type(); }
>
> Does this really need a static function calling a static function just to set a
> variable?
>
> I would think that you can simplify it something like this:
>
> int amd_get_core_type(void *data)
> {
>       int *type = data;
>
>       if (!type)
>               return -EINVAL;
>       if
> (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY)) {
>               *type = -1;
>               return -ENODEV;
>       }
>       *type = cpuid_ebx(0x80000026) >> X86_CPU_TYPE_ID_SHIFT; }
>
> And then amd_pstate_get_cpu_type() can call:
>
> smp_call_function_single(cpu, amd_get_core_type, &cpu_type, 1);

amd_get_this_core_type() cannot be merged, it will be called by some other components, for example qemu.
It is better to user separate function to make codes clear and simple to maintain.

Perry.

>
> > +
> > +static int amd_pstate_get_cpu_type(int cpu) {
> > +   int cpu_type = 0;
> > +
> > +   smp_call_function_single(cpu, get_this_core_type, &cpu_type, 1);
> > +
> > +   return cpu_type;
> > +}
> > +
> > +static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata) {
> > +   u32 highest_perf;
> > +   int core_type;
> > +
> > +   core_type = amd_pstate_get_cpu_type(cpudata->cpu);
> > +   pr_debug("core_type %d found\n", core_type);
> > +
> > +   switch (core_type) {
> > +   case CPU_CORE_TYPE_NO_HETERO_SUP:
> > +           highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
> > +           break;
> > +   case CPU_CORE_TYPE_PERFORMANCE:
> > +           highest_perf = CPPC_HIGHEST_PERF_PERFORMANCE;
> > +           break;
> > +   case CPU_CORE_TYPE_EFFICIENCY:
> > +           highest_perf = CPPC_HIGHEST_PERF_EFFICIENT;
> > +           break;
> > +   default:
> > +           highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
> > +           WARN_ONCE(true, "WARNING: Undefined core type
> found");
> > +           break;
> > +   }
> > +
> > +    return highest_perf;
> > +}
> > +
> >   static int pstate_init_perf(struct amd_cpudata *cpudata)
> >   {
> >     u64 cap1;
> > @@ -336,15 +381,7 @@ static int pstate_init_perf(struct amd_cpudata
> *cpudata)
> >     if (ret)
> >             return ret;
> >
> > -   /* For platforms that do not support the preferred core feature, the
> > -    * highest_pef may be configured with 166 or 255, to avoid max
> frequency
> > -    * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1)
> value as
> > -    * the default max perf.
> > -    */
> > -   if (cpudata->hw_prefcore)
> > -           highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
> > -   else
> > -           highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
> > +   highest_perf = amd_pstate_highest_perf_set(cpudata);
>
> I think it would be a good idea to follow up later on validate the selections of
> this logic from amd-pstate-ut.c.

Yes, you are right.
pstate ut codes will be modified later

perry.

>
> >
> >     WRITE_ONCE(cpudata->highest_perf, highest_perf);
> >     WRITE_ONCE(cpudata->max_limit_perf, highest_perf); @@ -365,10
> > +402,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
> >     if (ret)
> >             return ret;
> >
> > -   if (cpudata->hw_prefcore)
> > -           highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
> > -   else
> > -           highest_perf = cppc_perf.highest_perf;
> > +   highest_perf = amd_pstate_highest_perf_set(cpudata);
> >
> >     WRITE_ONCE(cpudata->highest_perf, highest_perf);
> >     WRITE_ONCE(cpudata->max_limit_perf, highest_perf); diff --git
> > a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index
> > d58fc022ec46..869d916003f1 100644
> > --- a/include/linux/amd-pstate.h
> > +++ b/include/linux/amd-pstate.h
> > @@ -134,4 +134,12 @@ struct quirk_entry {
> >     u32 lowest_freq;
> >   };
> >
> > +/* defined by CPUID_Fn80000026_EBX BIT [31:28] */ enum
> amd_core_type
> > +{
> > +   CPU_CORE_TYPE_NO_HETERO_SUP = -1,
> > +   CPU_CORE_TYPE_PERFORMANCE = 0,
> > +   CPU_CORE_TYPE_EFFICIENCY = 1,
> > +   CPU_CORE_TYPE_UNDEFINED = 2,
> > +};
> > +
> >   #endif /* _LINUX_AMD_PSTATE_H */
diff mbox series

Patch

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 811548f131f4..30d1900bb7e0 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -683,10 +683,12 @@  static inline u32 per_cpu_l2c_id(unsigned int cpu)
 extern u32 amd_get_highest_perf(void);
 extern void amd_clear_divider(void);
 extern void amd_check_microcode(void);
+extern int amd_get_this_core_type(void);
 #else
 static inline u32 amd_get_highest_perf(void)		{ return 0; }
 static inline void amd_clear_divider(void)		{ }
 static inline void amd_check_microcode(void)		{ }
+static inline int amd_get_this_core_type(void)		{ }
 #endif
 
 extern unsigned long arch_align_stack(unsigned long sp);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 307302af0aee..67966bdcde65 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1219,3 +1219,22 @@  void noinstr amd_clear_divider(void)
 		     :: "a" (0), "d" (0), "r" (1));
 }
 EXPORT_SYMBOL_GPL(amd_clear_divider);
+
+#define X86_CPU_TYPE_ID_SHIFT	28
+
+/**
+ * amd_get_this_core_type - Get the type of this heterogeneous CPU
+ *
+ * Returns the CPU type [31:28] (i.e., performance or efficient) of
+ * a CPU in the processor.
+ * If the processor has no core type support, returns -1.
+ */
+
+int amd_get_this_core_type(void)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY))
+		return -1;
+
+	return cpuid_ebx(0x80000026) >> X86_CPU_TYPE_ID_SHIFT;
+}
+EXPORT_SYMBOL_GPL(amd_get_this_core_type);
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 7145248b38ec..7fe8a8fc6227 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -50,7 +50,9 @@ 
 
 #define AMD_PSTATE_TRANSITION_LATENCY	20000
 #define AMD_PSTATE_TRANSITION_DELAY	1000
-#define AMD_PSTATE_PREFCORE_THRESHOLD	166
+#define CPPC_HIGHEST_PERF_EFFICIENT		132
+#define CPPC_HIGHEST_PERF_PERFORMANCE		196
+#define CPPC_HIGHEST_PERF_DEFAULT		166
 
 /*
  * TODO: We need more time to fine tune processors with shared memory solution
@@ -326,6 +328,49 @@  static inline int amd_pstate_enable(bool enable)
 	return static_call(amd_pstate_enable)(enable);
 }
 
+static void get_this_core_type(void *data)
+{
+	int *cpu_type = data;
+
+	*cpu_type = amd_get_this_core_type();
+}
+
+static int amd_pstate_get_cpu_type(int cpu)
+{
+	int cpu_type = 0;
+
+	smp_call_function_single(cpu, get_this_core_type, &cpu_type, 1);
+
+	return cpu_type;
+}
+
+static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
+{
+	u32 highest_perf;
+	int core_type;
+
+	core_type = amd_pstate_get_cpu_type(cpudata->cpu);
+	pr_debug("core_type %d found\n", core_type);
+
+	switch (core_type) {
+	case CPU_CORE_TYPE_NO_HETERO_SUP:
+		highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
+		break;
+	case CPU_CORE_TYPE_PERFORMANCE:
+		highest_perf = CPPC_HIGHEST_PERF_PERFORMANCE;
+		break;
+	case CPU_CORE_TYPE_EFFICIENCY:
+		highest_perf = CPPC_HIGHEST_PERF_EFFICIENT;
+		break;
+	default:
+		highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
+		WARN_ONCE(true, "WARNING: Undefined core type found");
+		break;
+	}
+
+    return highest_perf;
+}
+
 static int pstate_init_perf(struct amd_cpudata *cpudata)
 {
 	u64 cap1;
@@ -336,15 +381,7 @@  static int pstate_init_perf(struct amd_cpudata *cpudata)
 	if (ret)
 		return ret;
 
-	/* For platforms that do not support the preferred core feature, the
-	 * highest_pef may be configured with 166 or 255, to avoid max frequency
-	 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
-	 * the default max perf.
-	 */
-	if (cpudata->hw_prefcore)
-		highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
-	else
-		highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
+	highest_perf = amd_pstate_highest_perf_set(cpudata);
 
 	WRITE_ONCE(cpudata->highest_perf, highest_perf);
 	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
@@ -365,10 +402,7 @@  static int cppc_init_perf(struct amd_cpudata *cpudata)
 	if (ret)
 		return ret;
 
-	if (cpudata->hw_prefcore)
-		highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
-	else
-		highest_perf = cppc_perf.highest_perf;
+	highest_perf = amd_pstate_highest_perf_set(cpudata);
 
 	WRITE_ONCE(cpudata->highest_perf, highest_perf);
 	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
index d58fc022ec46..869d916003f1 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
@@ -134,4 +134,12 @@  struct quirk_entry {
 	u32 lowest_freq;
 };
 
+/* defined by CPUID_Fn80000026_EBX BIT [31:28] */
+enum amd_core_type {
+	CPU_CORE_TYPE_NO_HETERO_SUP = -1,
+	CPU_CORE_TYPE_PERFORMANCE = 0,
+	CPU_CORE_TYPE_EFFICIENCY = 1,
+	CPU_CORE_TYPE_UNDEFINED = 2,
+};
+
 #endif /* _LINUX_AMD_PSTATE_H */