diff mbox series

[v3,09/10] cpufreq: amd-pstate: implement heterogeneous core topology for highest performance initialization

Message ID 97b65a2294154dd469377a7a76ee738de7bf7aef.1718095377.git.perry.yuan@amd.com (mailing list archive)
State Changes Requested
Delegated to: Mario Limonciello
Headers show
Series AMD Pstate Driver Fixes and Improvements | expand

Commit Message

Yuan, Perry June 11, 2024, 8:52 a.m. UTC
Introduces an optimization to the AMD-Pstate driver by implementing
a heterogeneous core topology for the initialization of the highest
performance value while driver loading.
There are two type cores designed including performance core and
Efficiency Core.
Each core type has different highest performance and frequency values
configured by the platform.  The `amd_pstate` driver needs to identify
the type of core to correctly set an appropriate highest perf value.

X86_FEATURE_HETERO_CORE_TOPOLOGY is used to identify whether the
processor support heterogeneous core type by reading CPUID leaf
Fn_0x80000026_EAX and bit 30. if the bit is set as one, then amd_pstate
driver will check EBX 30:28 bits to get the core type.

PDF p274

Link: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf
Signed-off-by: Perry Yuan <perry.yuan@amd.com>
---
 arch/x86/include/asm/processor.h |  2 +
 arch/x86/kernel/cpu/amd.c        | 19 +++++++++
 drivers/cpufreq/amd-pstate.c     | 67 ++++++++++++++++++++++++--------
 3 files changed, 72 insertions(+), 16 deletions(-)

Comments

Mario Limonciello June 11, 2024, 7:29 p.m. UTC | #1
On 6/11/2024 03:52, Perry Yuan wrote:
> Introduces an optimization to the AMD-Pstate driver by implementing
> a heterogeneous core topology for the initialization of the highest
> performance value while driver loading.
> There are two type cores designed including performance core and
> Efficiency Core.

I would say:

The two core types supported are "performance" and "efficiency".

> Each core type has different highest performance and frequency values
> configured by the platform.  The `amd_pstate` driver needs to identify
> the type of core to correctly set an appropriate highest perf value.
> 
> X86_FEATURE_HETERO_CORE_TOPOLOGY is used to identify whether the
> processor support heterogeneous core type by reading CPUID leaf
> Fn_0x80000026_EAX and bit 30. if the bit is set as one, then amd_pstate
> driver will check EBX 30:28 bits to get the core type.
> 
> PDF p274
> 
> Link: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf

As Boris suggested in the other patch, this URL won't be stable if the 
CRM system changes again.

I'd use his suggestion of the title, document number and page instead.

> Signed-off-by: Perry Yuan <perry.yuan@amd.com>


I checked and it fails to apply at this patch at the moment to:

https://git.kernel.org/pub/scm/linux/kernel/git/superm1/linux.git/log/?h=bleeding-edge

error: patch failed: arch/x86/include/asm/cpufeatures.h:470
error: arch/x86/include/asm/cpufeatures.h: patch does not apply
Patch failed at 0008 x86/cpufeatures: Add feature bits for AMD 
heterogeneous processor

Is it dependent upon refs in x86-tip?

> ---
>   arch/x86/include/asm/processor.h |  2 +
>   arch/x86/kernel/cpu/amd.c        | 19 +++++++++
>   drivers/cpufreq/amd-pstate.c     | 67 ++++++++++++++++++++++++--------
>   3 files changed, 72 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index cb4f6c513c48..223aa58e2d5c 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -694,10 +694,12 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu)
>   extern u32 amd_get_highest_perf(void);
>   extern void amd_clear_divider(void);
>   extern void amd_check_microcode(void);
> +extern int amd_get_this_core_type(void);
>   #else
>   static inline u32 amd_get_highest_perf(void)		{ return 0; }
>   static inline void amd_clear_divider(void)		{ }
>   static inline void amd_check_microcode(void)		{ }
> +static inline int amd_get_this_core_type(void)		{ return -1; }
>   #endif
>   
>   extern unsigned long arch_align_stack(unsigned long sp);
> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index 44df3f11e731..62a4ef21ef79 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -1231,3 +1231,22 @@ void noinstr amd_clear_divider(void)
>   		     :: "a" (0), "d" (0), "r" (1));
>   }
>   EXPORT_SYMBOL_GPL(amd_clear_divider);
> +
> +#define X86_CPU_TYPE_ID_SHIFT	28
> +
> +/**
> + * amd_get_this_core_type - Get the type of this heterogeneous CPU
> + *
> + * Returns the CPU type [31:28] (i.e., performance or efficient) of
> + * a CPU in the processor.
> + * If the processor has no core type support, returns -1.

Is there any reason to use -1 specifically?  It seems like it should be
CPU_CORE_TYPE_NO_HETERO_SUP.

> + */
> +
> +int amd_get_this_core_type(void)

Shouldn't the return be "enum amd_core_type"?

> +{
> +	if (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY))
> +		return -1;

return CPU_CORE_TYPE_NO_HETERO_SUP;

> +
> +	return cpuid_ebx(0x80000026) >> X86_CPU_TYPE_ID_SHIFT;
> +}
> +EXPORT_SYMBOL_GPL(amd_get_this_core_type);
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index cb59de71b6ee..fa486dfaa7e8 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -51,8 +51,9 @@
>   
>   #define AMD_PSTATE_TRANSITION_LATENCY	20000
>   #define AMD_PSTATE_TRANSITION_DELAY	1000
> -#define CPPC_HIGHEST_PERF_PERFORMANCE	196
> -#define CPPC_HIGHEST_PERF_DEFAULT	166
> +#define CPPC_HIGHEST_PERF_EFFICIENT		132
> +#define CPPC_HIGHEST_PERF_PERFORMANCE		196

Why did you bump this to two tabs?  It can stay at one tab no?

> +#define CPPC_HIGHEST_PERF_DEFAULT		166
>   
>   #define AMD_CPPC_EPP_PERFORMANCE		0x00
>   #define AMD_CPPC_EPP_BALANCE_PERFORMANCE	0x80
> @@ -85,6 +86,14 @@ struct quirk_entry {
>   	u32 lowest_freq;
>   };
>   
> +/* defined by CPUID_Fn80000026_EBX BIT [31:28] */
> +enum amd_core_type {
> +	CPU_CORE_TYPE_NO_HETERO_SUP = -1,
> +	CPU_CORE_TYPE_PERFORMANCE = 0,
> +	CPU_CORE_TYPE_EFFICIENCY = 1,
> +	CPU_CORE_TYPE_UNDEFINED = 2,
> +};
> +
>   /*
>    * TODO: We need more time to fine tune processors with shared memory solution
>    * with community together.
> @@ -359,9 +368,27 @@ static inline int amd_pstate_enable(bool enable)
>   	return static_call(amd_pstate_enable)(enable);
>   }
>   
> +static void get_this_core_type(void *data)
> +{
> +	int *cpu_type = data;
> +
> +	*cpu_type = amd_get_this_core_type();
> +}
> +
> +static int amd_pstate_get_cpu_type(int cpu)

Shouldn't the return be "enum amd_core_type"?

> +{
> +	int cpu_type = 0;

I think it's confusing to initialize this to CPU_CORE_TYPE_PERFORMANCE.
It should be either not initialized or initialized to 
CPU_CORE_TYPE_NO_HETERO_SUP.

> +
> +	smp_call_function_single(cpu, get_this_core_type, &cpu_type, 1);
> +
> +	return cpu_type;
> +}
> +
>   static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
>   {
>   	struct cpuinfo_x86 *c = &cpu_data(0);
> +	u32 highest_perf;
> +	int core_type;
>   
>   	/*
>   	 * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
> @@ -371,7 +398,26 @@ static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
>   	if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f))
>   		return CPPC_HIGHEST_PERF_PERFORMANCE;
>   
> -	return CPPC_HIGHEST_PERF_DEFAULT;
> +	core_type = amd_pstate_get_cpu_type(cpudata->cpu);
> +	pr_debug("core_type %d found\n", core_type);
> +
> +	switch (core_type) {
> +	case CPU_CORE_TYPE_NO_HETERO_SUP:
> +		highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
> +		break;
> +	case CPU_CORE_TYPE_PERFORMANCE:
> +		highest_perf = CPPC_HIGHEST_PERF_PERFORMANCE;
> +		break;
> +	case CPU_CORE_TYPE_EFFICIENCY:
> +		highest_perf = CPPC_HIGHEST_PERF_EFFICIENT;
> +		break;
> +	default:
> +		highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
> +		WARN_ONCE(true, "WARNING: Undefined core type found");
> +		break;
> +	}
> +
> +    return highest_perf;
>   }
>   
>   static int pstate_init_perf(struct amd_cpudata *cpudata)
> @@ -384,15 +430,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
>   	if (ret)
>   		return ret;
>   
> -	/* For platforms that do not support the preferred core feature, the
> -	 * highest_pef may be configured with 166 or 255, to avoid max frequency
> -	 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
> -	 * the default max perf.
> -	 */
> -	if (cpudata->hw_prefcore)
> -		highest_perf = amd_pstate_highest_perf_set(cpudata);
> -	else
> -		highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
> +	highest_perf = amd_pstate_highest_perf_set(cpudata);
>   
>   	WRITE_ONCE(cpudata->highest_perf, highest_perf);
>   	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
> @@ -413,10 +451,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
>   	if (ret)
>   		return ret;
>   
> -	if (cpudata->hw_prefcore)
> -		highest_perf = amd_pstate_highest_perf_set(cpudata);
> -	else
> -		highest_perf = cppc_perf.highest_perf;
> +	highest_perf = amd_pstate_highest_perf_set(cpudata);
>   
>   	WRITE_ONCE(cpudata->highest_perf, highest_perf);
>   	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
Gautham R. Shenoy June 13, 2024, 7:13 a.m. UTC | #2
Perry Yuan <perry.yuan@amd.com> writes:

> Introduces an optimization to the AMD-Pstate driver by implementing
> a heterogeneous core topology for the initialization of the highest
> performance value while driver loading.
> There are two type cores designed including performance core and
> Efficiency Core.
> Each core type has different highest performance and frequency values
> configured by the platform.  The `amd_pstate` driver needs to identify
> the type of core to correctly set an appropriate highest perf value.
>
> X86_FEATURE_HETERO_CORE_TOPOLOGY is used to identify whether the
> processor support heterogeneous core type by reading CPUID leaf
> Fn_0x80000026_EAX and bit 30. if the bit is set as one, then amd_pstate
> driver will check EBX 30:28 bits to get the core type.
>
> PDF p274
>
> Link: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf
> Signed-off-by: Perry Yuan <perry.yuan@amd.com>
> ---
>  arch/x86/include/asm/processor.h |  2 +
>  arch/x86/kernel/cpu/amd.c        | 19 +++++++++
>  drivers/cpufreq/amd-pstate.c     | 67 ++++++++++++++++++++++++--------
>  3 files changed, 72 insertions(+), 16 deletions(-)
>
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index cb4f6c513c48..223aa58e2d5c 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -694,10 +694,12 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu)
>  extern u32 amd_get_highest_perf(void);
>  extern void amd_clear_divider(void);
>  extern void amd_check_microcode(void);
> +extern int amd_get_this_core_type(void);
>  #else
>  static inline u32 amd_get_highest_perf(void)		{ return 0; }
>  static inline void amd_clear_divider(void)		{ }
>  static inline void amd_check_microcode(void)		{ }
> +static inline int amd_get_this_core_type(void)		{ return -1; }
>  #endif
>  
>  extern unsigned long arch_align_stack(unsigned long sp);
> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index 44df3f11e731..62a4ef21ef79 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -1231,3 +1231,22 @@ void noinstr amd_clear_divider(void)
>  		     :: "a" (0), "d" (0), "r" (1));
>  }
>  EXPORT_SYMBOL_GPL(amd_clear_divider);
> +
> +#define X86_CPU_TYPE_ID_SHIFT	28
> +
> +/**
> + * amd_get_this_core_type - Get the type of this heterogeneous CPU
> + *
> + * Returns the CPU type [31:28] (i.e., performance or efficient) of
> + * a CPU in the processor.
> + * If the processor has no core type support, returns -1.
> + */
> +
> +int amd_get_this_core_type(void)
> +{
> +	if (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY))
> +		return -1;
> +
> +	return cpuid_ebx(0x80000026) >> X86_CPU_TYPE_ID_SHIFT;
> +}

The 0x80000026 parser doesn't have support for discovering and recording
the heterogenous stuff _yet_. Eventually I suppose it will get that and
until then amd-pstate will have to discover the core type by itself.

Even this, I find the bitfield way of doing things more cleaner instead
of using shifts and masks. Something like...


int amd_get_this_core_type(void)
{
        struct {
                u32  num_processors             :16,
                     power_efficiency_ranking   :8,
                     native_model_id            :4,
                     core_type                  :4;
        } props;


	if (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY))
		return -1;

        cpuid_leaf_reg(0x80000026, CPUID_EBX, &props);

        return props->core_type;
        
}

> +EXPORT_SYMBOL_GPL(amd_get_this_core_type);
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index cb59de71b6ee..fa486dfaa7e8 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -51,8 +51,9 @@
>  
>  #define AMD_PSTATE_TRANSITION_LATENCY	20000
>  #define AMD_PSTATE_TRANSITION_DELAY	1000
> -#define CPPC_HIGHEST_PERF_PERFORMANCE	196
> -#define CPPC_HIGHEST_PERF_DEFAULT	166
> +#define CPPC_HIGHEST_PERF_EFFICIENT		132
> +#define CPPC_HIGHEST_PERF_PERFORMANCE		196
> +#define CPPC_HIGHEST_PERF_DEFAULT		166
>  
>  #define AMD_CPPC_EPP_PERFORMANCE		0x00
>  #define AMD_CPPC_EPP_BALANCE_PERFORMANCE	0x80
> @@ -85,6 +86,14 @@ struct quirk_entry {
>  	u32 lowest_freq;
>  };
>  
> +/* defined by CPUID_Fn80000026_EBX BIT [31:28] */
> +enum amd_core_type {
> +	CPU_CORE_TYPE_NO_HETERO_SUP = -1,
> +	CPU_CORE_TYPE_PERFORMANCE = 0,
> +	CPU_CORE_TYPE_EFFICIENCY = 1,
> +	CPU_CORE_TYPE_UNDEFINED = 2,
> +};
> +
>  /*
>   * TODO: We need more time to fine tune processors with shared memory solution
>   * with community together.
> @@ -359,9 +368,27 @@ static inline int amd_pstate_enable(bool enable)
>  	return static_call(amd_pstate_enable)(enable);
>  }
>  
> +static void get_this_core_type(void *data)
> +{
> +	int *cpu_type = data;
> +
> +	*cpu_type = amd_get_this_core_type();
> +}
> +
> +static int amd_pstate_get_cpu_type(int cpu)
> +{
> +	int cpu_type = 0;
> +
> +	smp_call_function_single(cpu, get_this_core_type, &cpu_type, 1);
> +
> +	return cpu_type;
> +}
> +
>  static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
>  {
>  	struct cpuinfo_x86 *c = &cpu_data(0);
> +	u32 highest_perf;
> +	int core_type;
>  
>  	/*
>  	 * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
> @@ -371,7 +398,26 @@ static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
>  	if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f))
>  		return CPPC_HIGHEST_PERF_PERFORMANCE;
>  
> -	return CPPC_HIGHEST_PERF_DEFAULT;
> +	core_type = amd_pstate_get_cpu_type(cpudata->cpu);
> +	pr_debug("core_type %d found\n", core_type);
> +
> +	switch (core_type) {
> +	case CPU_CORE_TYPE_NO_HETERO_SUP:
> +		highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
> +		break;
> +	case CPU_CORE_TYPE_PERFORMANCE:
> +		highest_perf = CPPC_HIGHEST_PERF_PERFORMANCE;
> +		break;
> +	case CPU_CORE_TYPE_EFFICIENCY:
> +		highest_perf = CPPC_HIGHEST_PERF_EFFICIENT;
> +		break;
> +	default:
> +		highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
> +		WARN_ONCE(true, "WARNING: Undefined core type found");
> +		break;
> +	}
> +
> +    return highest_perf;
>  }
>  
>  static int pstate_init_perf(struct amd_cpudata *cpudata)
> @@ -384,15 +430,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
>  	if (ret)
>  		return ret;
>  
> -	/* For platforms that do not support the preferred core feature, the
> -	 * highest_pef may be configured with 166 or 255, to avoid max frequency
> -	 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
> -	 * the default max perf.
> -	 */
> -	if (cpudata->hw_prefcore)
> -		highest_perf = amd_pstate_highest_perf_set(cpudata);
> -	else
> -		highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
> +	highest_perf = amd_pstate_highest_perf_set(cpudata);


Without this patch, on systems without hw_prefcore, highest_perf would
be derived from MSR_AMD_CPPC_CAP1. The value of the highest_perf could
have been 255. 

This patch changes the behaviour and sets the default value to
CPPC_HIGHEST_PERF_DEFAULT which is only 196. Am I missing something ?


>  
>  	WRITE_ONCE(cpudata->highest_perf, highest_perf);
>  	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
> @@ -413,10 +451,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
>  	if (ret)
>  		return ret;
>  
> -	if (cpudata->hw_prefcore)
> -		highest_perf = amd_pstate_highest_perf_set(cpudata);
> -	else
> -		highest_perf = cppc_perf.highest_perf;
> +	highest_perf = amd_pstate_highest_perf_set(cpudata);


Same comment as above here.


>  
>  	WRITE_ONCE(cpudata->highest_perf, highest_perf);
>  	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
> -- 
> 2.34.1
Yuan, Perry June 13, 2024, 8:25 a.m. UTC | #3
[AMD Official Use Only - AMD Internal Distribution Only]

Hi Gautham

> -----Original Message-----
> From: Shenoy, Gautham Ranjal <gautham.shenoy@amd.com>
> Sent: Thursday, June 13, 2024 3:14 PM
> To: Yuan, Perry <Perry.Yuan@amd.com>; Limonciello, Mario
> <Mario.Limonciello@amd.com>
> Cc: rafael.j.wysocki@intel.com; viresh.kumar@linaro.org; Huang, Ray
> <Ray.Huang@amd.com>; Petkov, Borislav <Borislav.Petkov@amd.com>;
> Deucher, Alexander <Alexander.Deucher@amd.com>; Huang, Shimmer
> <Shimmer.Huang@amd.com>; Du, Xiaojian <Xiaojian.Du@amd.com>; Meng,
> Li (Jassmine) <Li.Meng@amd.com>; linux-pm@vger.kernel.org; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH v3 09/10] cpufreq: amd-pstate: implement
> heterogeneous core topology for highest performance initialization
>
> Perry Yuan <perry.yuan@amd.com> writes:
>
> > Introduces an optimization to the AMD-Pstate driver by implementing a
> > heterogeneous core topology for the initialization of the highest
> > performance value while driver loading.
> > There are two type cores designed including performance core and
> > Efficiency Core.
> > Each core type has different highest performance and frequency values
> > configured by the platform.  The `amd_pstate` driver needs to identify
> > the type of core to correctly set an appropriate highest perf value.
> >
> > X86_FEATURE_HETERO_CORE_TOPOLOGY is used to identify whether the
> > processor support heterogeneous core type by reading CPUID leaf
> > Fn_0x80000026_EAX and bit 30. if the bit is set as one, then
> > amd_pstate driver will check EBX 30:28 bits to get the core type.
> >
> > PDF p274
> >
> > Link:
> > https://www.amd.com/content/dam/amd/en/documents/processor-tech-
> docs/p
> > rogrammer-references/24593.pdf
> > Signed-off-by: Perry Yuan <perry.yuan@amd.com>
> > ---
> >  arch/x86/include/asm/processor.h |  2 +
> >  arch/x86/kernel/cpu/amd.c        | 19 +++++++++
> >  drivers/cpufreq/amd-pstate.c     | 67 ++++++++++++++++++++++++--------
> >  3 files changed, 72 insertions(+), 16 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/processor.h
> > b/arch/x86/include/asm/processor.h
> > index cb4f6c513c48..223aa58e2d5c 100644
> > --- a/arch/x86/include/asm/processor.h
> > +++ b/arch/x86/include/asm/processor.h
> > @@ -694,10 +694,12 @@ static inline u32 per_cpu_l2c_id(unsigned int
> > cpu)  extern u32 amd_get_highest_perf(void);  extern void
> > amd_clear_divider(void);  extern void amd_check_microcode(void);
> > +extern int amd_get_this_core_type(void);
> >  #else
> >  static inline u32 amd_get_highest_perf(void)               { return 0; }
> >  static inline void amd_clear_divider(void)         { }
> >  static inline void amd_check_microcode(void)               { }
> > +static inline int amd_get_this_core_type(void)             { return -1; }
> >  #endif
> >
> >  extern unsigned long arch_align_stack(unsigned long sp); diff --git
> > a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index
> > 44df3f11e731..62a4ef21ef79 100644
> > --- a/arch/x86/kernel/cpu/amd.c
> > +++ b/arch/x86/kernel/cpu/amd.c
> > @@ -1231,3 +1231,22 @@ void noinstr amd_clear_divider(void)
> >                  :: "a" (0), "d" (0), "r" (1));  }
> > EXPORT_SYMBOL_GPL(amd_clear_divider);
> > +
> > +#define X86_CPU_TYPE_ID_SHIFT      28
> > +
> > +/**
> > + * amd_get_this_core_type - Get the type of this heterogeneous CPU
> > + *
> > + * Returns the CPU type [31:28] (i.e., performance or efficient) of
> > + * a CPU in the processor.
> > + * If the processor has no core type support, returns -1.
> > + */
> > +
> > +int amd_get_this_core_type(void)
> > +{
> > +   if (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY))
> > +           return -1;
> > +
> > +   return cpuid_ebx(0x80000026) >> X86_CPU_TYPE_ID_SHIFT; }
>
> The 0x80000026 parser doesn't have support for discovering and recording
> the heterogenous stuff _yet_. Eventually I suppose it will get that and until
> then amd-pstate will have to discover the core type by itself.

The PPR doc need to be updated to use a new one,
https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/56713-B1_3_05.zip
P119

I have added the cpu feature flag in [PATCH v3 08/10],  it will be fine to identify the core type for pstate driver with one line code.

>
> Even this, I find the bitfield way of doing things more cleaner instead of using
> shifts and masks. Something like...
>
>
> int amd_get_this_core_type(void)
> {
>         struct {
>                 u32  num_processors             :16,
>                      power_efficiency_ranking   :8,
>                      native_model_id            :4,
>                      core_type                  :4;
>         } props;
>
>
>       if (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY))
>               return -1;
>
>         cpuid_leaf_reg(0x80000026, CPUID_EBX, &props);
>
>         return props->core_type;
>
> }
>
> > +EXPORT_SYMBOL_GPL(amd_get_this_core_type);
> > diff --git a/drivers/cpufreq/amd-pstate.c
> > b/drivers/cpufreq/amd-pstate.c index cb59de71b6ee..fa486dfaa7e8 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -51,8 +51,9 @@
> >
> >  #define AMD_PSTATE_TRANSITION_LATENCY      20000
> >  #define AMD_PSTATE_TRANSITION_DELAY        1000
> > -#define CPPC_HIGHEST_PERF_PERFORMANCE      196
> > -#define CPPC_HIGHEST_PERF_DEFAULT  166
> > +#define CPPC_HIGHEST_PERF_EFFICIENT                132
> > +#define CPPC_HIGHEST_PERF_PERFORMANCE              196
> > +#define CPPC_HIGHEST_PERF_DEFAULT          166
> >
> >  #define AMD_CPPC_EPP_PERFORMANCE           0x00
> >  #define AMD_CPPC_EPP_BALANCE_PERFORMANCE   0x80
> > @@ -85,6 +86,14 @@ struct quirk_entry {
> >     u32 lowest_freq;
> >  };
> >
> > +/* defined by CPUID_Fn80000026_EBX BIT [31:28] */ enum
> amd_core_type
> > +{
> > +   CPU_CORE_TYPE_NO_HETERO_SUP = -1,
> > +   CPU_CORE_TYPE_PERFORMANCE = 0,
> > +   CPU_CORE_TYPE_EFFICIENCY = 1,
> > +   CPU_CORE_TYPE_UNDEFINED = 2,
> > +};
> > +
> >  /*
> >   * TODO: We need more time to fine tune processors with shared memory
> solution
> >   * with community together.
> > @@ -359,9 +368,27 @@ static inline int amd_pstate_enable(bool enable)
> >     return static_call(amd_pstate_enable)(enable);
> >  }
> >
> > +static void get_this_core_type(void *data) {
> > +   int *cpu_type = data;
> > +
> > +   *cpu_type = amd_get_this_core_type(); }
> > +
> > +static int amd_pstate_get_cpu_type(int cpu) {
> > +   int cpu_type = 0;
> > +
> > +   smp_call_function_single(cpu, get_this_core_type, &cpu_type, 1);
> > +
> > +   return cpu_type;
> > +}
> > +
> >  static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
> > {
> >     struct cpuinfo_x86 *c = &cpu_data(0);
> > +   u32 highest_perf;
> > +   int core_type;
> >
> >     /*
> >      * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
> > @@ -371,7 +398,26 @@ static u32 amd_pstate_highest_perf_set(struct
> amd_cpudata *cpudata)
> >     if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <=
> 0x7f))
> >             return CPPC_HIGHEST_PERF_PERFORMANCE;
> >
> > -   return CPPC_HIGHEST_PERF_DEFAULT;
> > +   core_type = amd_pstate_get_cpu_type(cpudata->cpu);
> > +   pr_debug("core_type %d found\n", core_type);
> > +
> > +   switch (core_type) {
> > +   case CPU_CORE_TYPE_NO_HETERO_SUP:
> > +           highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
> > +           break;
> > +   case CPU_CORE_TYPE_PERFORMANCE:
> > +           highest_perf = CPPC_HIGHEST_PERF_PERFORMANCE;
> > +           break;
> > +   case CPU_CORE_TYPE_EFFICIENCY:
> > +           highest_perf = CPPC_HIGHEST_PERF_EFFICIENT;
> > +           break;
> > +   default:
> > +           highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
> > +           WARN_ONCE(true, "WARNING: Undefined core type found");
> > +           break;
> > +   }
> > +
> > +    return highest_perf;
> >  }
> >
> >  static int pstate_init_perf(struct amd_cpudata *cpudata) @@ -384,15
> > +430,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
> >     if (ret)
> >             return ret;
> >
> > -   /* For platforms that do not support the preferred core feature, the
> > -    * highest_pef may be configured with 166 or 255, to avoid max
> frequency
> > -    * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1)
> value as
> > -    * the default max perf.
> > -    */
> > -   if (cpudata->hw_prefcore)
> > -           highest_perf = amd_pstate_highest_perf_set(cpudata);
> > -   else
> > -           highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
> > +   highest_perf = amd_pstate_highest_perf_set(cpudata);
>
>
> Without this patch, on systems without hw_prefcore, highest_perf would be
> derived from MSR_AMD_CPPC_CAP1. The value of the highest_perf could
> have been 255.

Thanks to your reminder, highest perf of  systems w/o hw_prefcore are 166 or 255 because of the long history.
It  still needs to use  hw_prefcore to identify the CPU otherwise the highest perf will be incorrect on some systems.
Let me fix it in v4.

>
> This patch changes the behaviour and sets the default value to
> CPPC_HIGHEST_PERF_DEFAULT which is only 196. Am I missing something ?

Some new clients system update the default highest perf value to 196 from 166 from power firmware, so the highest perf need to match the CPU
and update the value in pstate driver as well.

>
>
> >
> >     WRITE_ONCE(cpudata->highest_perf, highest_perf);
> >     WRITE_ONCE(cpudata->max_limit_perf, highest_perf); @@ -413,10
> +451,7
> > @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
> >     if (ret)
> >             return ret;
> >
> > -   if (cpudata->hw_prefcore)
> > -           highest_perf = amd_pstate_highest_perf_set(cpudata);
> > -   else
> > -           highest_perf = cppc_perf.highest_perf;
> > +   highest_perf = amd_pstate_highest_perf_set(cpudata);
>
>
> Same comment as above here.

Will get this fixed in v4.

Thanks for your feedback.

>
>
> >
> >     WRITE_ONCE(cpudata->highest_perf, highest_perf);
> >     WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
> > --
> > 2.34.1
diff mbox series

Patch

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cb4f6c513c48..223aa58e2d5c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -694,10 +694,12 @@  static inline u32 per_cpu_l2c_id(unsigned int cpu)
 extern u32 amd_get_highest_perf(void);
 extern void amd_clear_divider(void);
 extern void amd_check_microcode(void);
+extern int amd_get_this_core_type(void);
 #else
 static inline u32 amd_get_highest_perf(void)		{ return 0; }
 static inline void amd_clear_divider(void)		{ }
 static inline void amd_check_microcode(void)		{ }
+static inline int amd_get_this_core_type(void)		{ return -1; }
 #endif
 
 extern unsigned long arch_align_stack(unsigned long sp);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 44df3f11e731..62a4ef21ef79 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1231,3 +1231,22 @@  void noinstr amd_clear_divider(void)
 		     :: "a" (0), "d" (0), "r" (1));
 }
 EXPORT_SYMBOL_GPL(amd_clear_divider);
+
+#define X86_CPU_TYPE_ID_SHIFT	28
+
+/**
+ * amd_get_this_core_type - Get the type of this heterogeneous CPU
+ *
+ * Returns the CPU type [31:28] (i.e., performance or efficient) of
+ * a CPU in the processor.
+ * If the processor has no core type support, returns -1.
+ */
+
+int amd_get_this_core_type(void)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY))
+		return -1;
+
+	return cpuid_ebx(0x80000026) >> X86_CPU_TYPE_ID_SHIFT;
+}
+EXPORT_SYMBOL_GPL(amd_get_this_core_type);
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index cb59de71b6ee..fa486dfaa7e8 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -51,8 +51,9 @@ 
 
 #define AMD_PSTATE_TRANSITION_LATENCY	20000
 #define AMD_PSTATE_TRANSITION_DELAY	1000
-#define CPPC_HIGHEST_PERF_PERFORMANCE	196
-#define CPPC_HIGHEST_PERF_DEFAULT	166
+#define CPPC_HIGHEST_PERF_EFFICIENT		132
+#define CPPC_HIGHEST_PERF_PERFORMANCE		196
+#define CPPC_HIGHEST_PERF_DEFAULT		166
 
 #define AMD_CPPC_EPP_PERFORMANCE		0x00
 #define AMD_CPPC_EPP_BALANCE_PERFORMANCE	0x80
@@ -85,6 +86,14 @@  struct quirk_entry {
 	u32 lowest_freq;
 };
 
+/* defined by CPUID_Fn80000026_EBX BIT [31:28] */
+enum amd_core_type {
+	CPU_CORE_TYPE_NO_HETERO_SUP = -1,
+	CPU_CORE_TYPE_PERFORMANCE = 0,
+	CPU_CORE_TYPE_EFFICIENCY = 1,
+	CPU_CORE_TYPE_UNDEFINED = 2,
+};
+
 /*
  * TODO: We need more time to fine tune processors with shared memory solution
  * with community together.
@@ -359,9 +368,27 @@  static inline int amd_pstate_enable(bool enable)
 	return static_call(amd_pstate_enable)(enable);
 }
 
+static void get_this_core_type(void *data)
+{
+	int *cpu_type = data;
+
+	*cpu_type = amd_get_this_core_type();
+}
+
+static int amd_pstate_get_cpu_type(int cpu)
+{
+	int cpu_type = 0;
+
+	smp_call_function_single(cpu, get_this_core_type, &cpu_type, 1);
+
+	return cpu_type;
+}
+
 static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
 {
 	struct cpuinfo_x86 *c = &cpu_data(0);
+	u32 highest_perf;
+	int core_type;
 
 	/*
 	 * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
@@ -371,7 +398,26 @@  static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
 	if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f))
 		return CPPC_HIGHEST_PERF_PERFORMANCE;
 
-	return CPPC_HIGHEST_PERF_DEFAULT;
+	core_type = amd_pstate_get_cpu_type(cpudata->cpu);
+	pr_debug("core_type %d found\n", core_type);
+
+	switch (core_type) {
+	case CPU_CORE_TYPE_NO_HETERO_SUP:
+		highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
+		break;
+	case CPU_CORE_TYPE_PERFORMANCE:
+		highest_perf = CPPC_HIGHEST_PERF_PERFORMANCE;
+		break;
+	case CPU_CORE_TYPE_EFFICIENCY:
+		highest_perf = CPPC_HIGHEST_PERF_EFFICIENT;
+		break;
+	default:
+		highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
+		WARN_ONCE(true, "WARNING: Undefined core type found");
+		break;
+	}
+
+    return highest_perf;
 }
 
 static int pstate_init_perf(struct amd_cpudata *cpudata)
@@ -384,15 +430,7 @@  static int pstate_init_perf(struct amd_cpudata *cpudata)
 	if (ret)
 		return ret;
 
-	/* For platforms that do not support the preferred core feature, the
-	 * highest_pef may be configured with 166 or 255, to avoid max frequency
-	 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
-	 * the default max perf.
-	 */
-	if (cpudata->hw_prefcore)
-		highest_perf = amd_pstate_highest_perf_set(cpudata);
-	else
-		highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
+	highest_perf = amd_pstate_highest_perf_set(cpudata);
 
 	WRITE_ONCE(cpudata->highest_perf, highest_perf);
 	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
@@ -413,10 +451,7 @@  static int cppc_init_perf(struct amd_cpudata *cpudata)
 	if (ret)
 		return ret;
 
-	if (cpudata->hw_prefcore)
-		highest_perf = amd_pstate_highest_perf_set(cpudata);
-	else
-		highest_perf = cppc_perf.highest_perf;
+	highest_perf = amd_pstate_highest_perf_set(cpudata);
 
 	WRITE_ONCE(cpudata->highest_perf, highest_perf);
 	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);