diff mbox series

[v2,1/7] perf/x86/core: Update x86_pmu.pebs_capable for ICELAKE_{X,D}

Message ID 20220721103549.49543-2-likexu@tencent.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/pmu: Fix some corner cases including Intel PEBS | expand

Commit Message

Like Xu July 21, 2022, 10:35 a.m. UTC
From: Like Xu <likexu@tencent.com>

Ice Lake microarchitecture with EPT-Friendly PEBS capability also support
the Extended feature, which means that all counters (both fixed function
and general purpose counters) can be used for PEBS events.

Update x86_pmu.pebs_capable like SPR to apply PEBS_ALL semantics.

Cc: Kan Liang <kan.liang@linux.intel.com>
Fixes: fb358e0b811e ("perf/x86/intel: Add EPT-Friendly PEBS for Ice Lake Server")
Signed-off-by: Like Xu <likexu@tencent.com>
---
 arch/x86/events/intel/core.c | 1 +
 1 file changed, 1 insertion(+)

Comments

Paolo Bonzini Aug. 12, 2022, 7:52 a.m. UTC | #1
On 7/21/22 12:35, Like Xu wrote:
> From: Like Xu <likexu@tencent.com>
> 
> Ice Lake microarchitecture with EPT-Friendly PEBS capability also support
> the Extended feature, which means that all counters (both fixed function
> and general purpose counters) can be used for PEBS events.
> 
> Update x86_pmu.pebs_capable like SPR to apply PEBS_ALL semantics.
> 
> Cc: Kan Liang <kan.liang@linux.intel.com>
> Fixes: fb358e0b811e ("perf/x86/intel: Add EPT-Friendly PEBS for Ice Lake Server")
> Signed-off-by: Like Xu <likexu@tencent.com>
> ---
>   arch/x86/events/intel/core.c | 1 +
>   1 file changed, 1 insertion(+)
> 
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 4e9b7af9cc45..e46fd496187b 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -6239,6 +6239,7 @@ __init int intel_pmu_init(void)
>   	case INTEL_FAM6_ICELAKE_X:
>   	case INTEL_FAM6_ICELAKE_D:
>   		x86_pmu.pebs_ept = 1;
> +		x86_pmu.pebs_capable = ~0ULL;
>   		pmem = true;
>   		fallthrough;
>   	case INTEL_FAM6_ICELAKE_L:

Peter, can you please ack this (you were not CCed on this KVM series but 
this patch is really perf core)?

Thanks,

Paolo
Peter Zijlstra Aug. 15, 2022, 9:31 a.m. UTC | #2
On Fri, Aug 12, 2022 at 09:52:13AM +0200, Paolo Bonzini wrote:
> On 7/21/22 12:35, Like Xu wrote:
> > From: Like Xu <likexu@tencent.com>
> > 
> > Ice Lake microarchitecture with EPT-Friendly PEBS capability also support
> > the Extended feature, which means that all counters (both fixed function
> > and general purpose counters) can be used for PEBS events.
> > 
> > Update x86_pmu.pebs_capable like SPR to apply PEBS_ALL semantics.
> > 
> > Cc: Kan Liang <kan.liang@linux.intel.com>
> > Fixes: fb358e0b811e ("perf/x86/intel: Add EPT-Friendly PEBS for Ice Lake Server")
> > Signed-off-by: Like Xu <likexu@tencent.com>
> > ---
> >   arch/x86/events/intel/core.c | 1 +
> >   1 file changed, 1 insertion(+)
> > 
> > diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> > index 4e9b7af9cc45..e46fd496187b 100644
> > --- a/arch/x86/events/intel/core.c
> > +++ b/arch/x86/events/intel/core.c
> > @@ -6239,6 +6239,7 @@ __init int intel_pmu_init(void)
> >   	case INTEL_FAM6_ICELAKE_X:
> >   	case INTEL_FAM6_ICELAKE_D:
> >   		x86_pmu.pebs_ept = 1;
> > +		x86_pmu.pebs_capable = ~0ULL;
> >   		pmem = true;
> >   		fallthrough;
> >   	case INTEL_FAM6_ICELAKE_L:
> 
> Peter, can you please ack this (you were not CCed on this KVM series but
> this patch is really perf core)?

I would much rather see something like this; except I don't know if it
is fully correct. I can never find what models support what... Kan do
you know?


diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2db93498ff71..b42c1beb9924 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -5933,7 +5933,6 @@ __init int intel_pmu_init(void)
 		x86_pmu.pebs_aliases = NULL;
 		x86_pmu.pebs_prec_dist = true;
 		x86_pmu.lbr_pt_coexist = true;
-		x86_pmu.pebs_capable = ~0ULL;
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.flags |= PMU_FL_PEBS_ALL;
 		x86_pmu.get_event_constraints = glp_get_event_constraints;
@@ -6291,7 +6290,6 @@ __init int intel_pmu_init(void)
 		x86_pmu.pebs_aliases = NULL;
 		x86_pmu.pebs_prec_dist = true;
 		x86_pmu.pebs_block = true;
-		x86_pmu.pebs_capable = ~0ULL;
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 		x86_pmu.flags |= PMU_FL_PEBS_ALL;
@@ -6337,7 +6335,6 @@ __init int intel_pmu_init(void)
 		x86_pmu.pebs_aliases = NULL;
 		x86_pmu.pebs_prec_dist = true;
 		x86_pmu.pebs_block = true;
-		x86_pmu.pebs_capable = ~0ULL;
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 		x86_pmu.flags |= PMU_FL_PEBS_ALL;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ba60427caa6d..e2da643632b9 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2258,6 +2258,7 @@ void __init intel_ds_init(void)
 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
 			x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
 			if (x86_pmu.intel_cap.pebs_baseline) {
+				x86_pmu.pebs_capable = ~0ULL;
 				x86_pmu.large_pebs_flags |=
 					PERF_SAMPLE_BRANCH_STACK |
 					PERF_SAMPLE_TIME;
Like Xu Aug. 15, 2022, 9:43 a.m. UTC | #3
On 15/8/2022 5:31 pm, Peter Zijlstra wrote:
> On Fri, Aug 12, 2022 at 09:52:13AM +0200, Paolo Bonzini wrote:
>> On 7/21/22 12:35, Like Xu wrote:
>>> From: Like Xu <likexu@tencent.com>
>>>
>>> Ice Lake microarchitecture with EPT-Friendly PEBS capability also support
>>> the Extended feature, which means that all counters (both fixed function
>>> and general purpose counters) can be used for PEBS events.
>>>
>>> Update x86_pmu.pebs_capable like SPR to apply PEBS_ALL semantics.
>>>
>>> Cc: Kan Liang <kan.liang@linux.intel.com>
>>> Fixes: fb358e0b811e ("perf/x86/intel: Add EPT-Friendly PEBS for Ice Lake Server")
>>> Signed-off-by: Like Xu <likexu@tencent.com>
>>> ---
>>>    arch/x86/events/intel/core.c | 1 +
>>>    1 file changed, 1 insertion(+)
>>>
>>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>>> index 4e9b7af9cc45..e46fd496187b 100644
>>> --- a/arch/x86/events/intel/core.c
>>> +++ b/arch/x86/events/intel/core.c
>>> @@ -6239,6 +6239,7 @@ __init int intel_pmu_init(void)
>>>    	case INTEL_FAM6_ICELAKE_X:
>>>    	case INTEL_FAM6_ICELAKE_D:
>>>    		x86_pmu.pebs_ept = 1;
>>> +		x86_pmu.pebs_capable = ~0ULL;
>>>    		pmem = true;
>>>    		fallthrough;
>>>    	case INTEL_FAM6_ICELAKE_L:
>>
>> Peter, can you please ack this (you were not CCed on this KVM series but
>> this patch is really perf core)?
> 
> I would much rather see something like this; except I don't know if it
> is fully correct. I can never find what models support what... Kan do
> you know?

For guest PEBS, it's a minor optimization from 0d23dc34a7ce to reduce branch 
instructions:
https://lore.kernel.org/kvm/YKIqbph62oclxjnt@hirez.programming.kicks-ass.net/

> 
> 
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 2db93498ff71..b42c1beb9924 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -5933,7 +5933,6 @@ __init int intel_pmu_init(void)
>   		x86_pmu.pebs_aliases = NULL;
>   		x86_pmu.pebs_prec_dist = true;
>   		x86_pmu.lbr_pt_coexist = true;
> -		x86_pmu.pebs_capable = ~0ULL;
>   		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
>   		x86_pmu.flags |= PMU_FL_PEBS_ALL;
>   		x86_pmu.get_event_constraints = glp_get_event_constraints;
> @@ -6291,7 +6290,6 @@ __init int intel_pmu_init(void)
>   		x86_pmu.pebs_aliases = NULL;
>   		x86_pmu.pebs_prec_dist = true;
>   		x86_pmu.pebs_block = true;
> -		x86_pmu.pebs_capable = ~0ULL;
>   		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
>   		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
>   		x86_pmu.flags |= PMU_FL_PEBS_ALL;
> @@ -6337,7 +6335,6 @@ __init int intel_pmu_init(void)
>   		x86_pmu.pebs_aliases = NULL;
>   		x86_pmu.pebs_prec_dist = true;
>   		x86_pmu.pebs_block = true;
> -		x86_pmu.pebs_capable = ~0ULL;
>   		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
>   		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
>   		x86_pmu.flags |= PMU_FL_PEBS_ALL;
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index ba60427caa6d..e2da643632b9 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -2258,6 +2258,7 @@ void __init intel_ds_init(void)
>   			x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
>   			x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
>   			if (x86_pmu.intel_cap.pebs_baseline) {
> +				x86_pmu.pebs_capable = ~0ULL;

The two features of "Extended PEBS (about pebs_capable)" and "Adaptive PEBS 
(about pebs_baseline)"
are orthogonal, although the two are often supported together.

>   				x86_pmu.large_pebs_flags |=
>   					PERF_SAMPLE_BRANCH_STACK |
>   					PERF_SAMPLE_TIME;
Peter Zijlstra Aug. 15, 2022, 11:51 a.m. UTC | #4
On Mon, Aug 15, 2022 at 05:43:34PM +0800, Like Xu wrote:

> > diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> > index 2db93498ff71..b42c1beb9924 100644
> > --- a/arch/x86/events/intel/core.c
> > +++ b/arch/x86/events/intel/core.c
> > @@ -5933,7 +5933,6 @@ __init int intel_pmu_init(void)
> >   		x86_pmu.pebs_aliases = NULL;
> >   		x86_pmu.pebs_prec_dist = true;
> >   		x86_pmu.lbr_pt_coexist = true;
> > -		x86_pmu.pebs_capable = ~0ULL;
> >   		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
> >   		x86_pmu.flags |= PMU_FL_PEBS_ALL;
> >   		x86_pmu.get_event_constraints = glp_get_event_constraints;
> > @@ -6291,7 +6290,6 @@ __init int intel_pmu_init(void)
> >   		x86_pmu.pebs_aliases = NULL;
> >   		x86_pmu.pebs_prec_dist = true;
> >   		x86_pmu.pebs_block = true;
> > -		x86_pmu.pebs_capable = ~0ULL;
> >   		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
> >   		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
> >   		x86_pmu.flags |= PMU_FL_PEBS_ALL;
> > @@ -6337,7 +6335,6 @@ __init int intel_pmu_init(void)
> >   		x86_pmu.pebs_aliases = NULL;
> >   		x86_pmu.pebs_prec_dist = true;
> >   		x86_pmu.pebs_block = true;
> > -		x86_pmu.pebs_capable = ~0ULL;
> >   		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
> >   		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
> >   		x86_pmu.flags |= PMU_FL_PEBS_ALL;
> > diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> > index ba60427caa6d..e2da643632b9 100644
> > --- a/arch/x86/events/intel/ds.c
> > +++ b/arch/x86/events/intel/ds.c
> > @@ -2258,6 +2258,7 @@ void __init intel_ds_init(void)
> >   			x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
> >   			x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
> >   			if (x86_pmu.intel_cap.pebs_baseline) {
> > +				x86_pmu.pebs_capable = ~0ULL;
> 
> The two features of "Extended PEBS (about pebs_capable)" and "Adaptive PEBS
> (about pebs_baseline)"
> are orthogonal, although the two are often supported together.

The SDM explicitly states that PEBS Baseline implies Extended PEBS. See
3-19.8 (April 22 edition).

The question is if there is hardware that has Extended PEBS but doesn't
have Baseline; and I simply don't know and was hoping Kan could find
out.

That said; the above patch can be further improved by also removing the
PMU_FL_PEBS_ALL lines, which is already set by intel_ds_init().

In general though; the point is, we shouldn't be doing the FMS table
thing for discoverable features. Having pebs_capable = ~0 and
PMU_FL_PEBS_ALL on something with BASELINE set is just wrong.
Like Xu Aug. 15, 2022, 12:48 p.m. UTC | #5
On 15/8/2022 7:51 pm, Peter Zijlstra wrote:
> On Mon, Aug 15, 2022 at 05:43:34PM +0800, Like Xu wrote:
> 
>>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>>> index 2db93498ff71..b42c1beb9924 100644
>>> --- a/arch/x86/events/intel/core.c
>>> +++ b/arch/x86/events/intel/core.c
>>> @@ -5933,7 +5933,6 @@ __init int intel_pmu_init(void)
>>>    		x86_pmu.pebs_aliases = NULL;
>>>    		x86_pmu.pebs_prec_dist = true;
>>>    		x86_pmu.lbr_pt_coexist = true;
>>> -		x86_pmu.pebs_capable = ~0ULL;
>>>    		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
>>>    		x86_pmu.flags |= PMU_FL_PEBS_ALL;
>>>    		x86_pmu.get_event_constraints = glp_get_event_constraints;
>>> @@ -6291,7 +6290,6 @@ __init int intel_pmu_init(void)
>>>    		x86_pmu.pebs_aliases = NULL;
>>>    		x86_pmu.pebs_prec_dist = true;
>>>    		x86_pmu.pebs_block = true;
>>> -		x86_pmu.pebs_capable = ~0ULL;
>>>    		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
>>>    		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
>>>    		x86_pmu.flags |= PMU_FL_PEBS_ALL;
>>> @@ -6337,7 +6335,6 @@ __init int intel_pmu_init(void)
>>>    		x86_pmu.pebs_aliases = NULL;
>>>    		x86_pmu.pebs_prec_dist = true;
>>>    		x86_pmu.pebs_block = true;
>>> -		x86_pmu.pebs_capable = ~0ULL;
>>>    		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
>>>    		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
>>>    		x86_pmu.flags |= PMU_FL_PEBS_ALL;
>>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>>> index ba60427caa6d..e2da643632b9 100644
>>> --- a/arch/x86/events/intel/ds.c
>>> +++ b/arch/x86/events/intel/ds.c
>>> @@ -2258,6 +2258,7 @@ void __init intel_ds_init(void)
>>>    			x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
>>>    			x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
>>>    			if (x86_pmu.intel_cap.pebs_baseline) {
>>> +				x86_pmu.pebs_capable = ~0ULL;
>>
>> The two features of "Extended PEBS (about pebs_capable)" and "Adaptive PEBS
>> (about pebs_baseline)"
>> are orthogonal, although the two are often supported together.
> 
> The SDM explicitly states that PEBS Baseline implies Extended PEBS. See
> 3-19.8 (April 22 edition).

Indeed, "IA32_PERF_CAPABILITIES.PEBS_BASELINE [bit 14]: If set, the following is 
true
- Extended PEBS is supported;
- Adaptive PEBS is supported;"

> 
> The question is if there is hardware that has Extended PEBS but doesn't
> have Baseline; and I simply don't know and was hoping Kan could find
> out.

In the SDM world, we actually have three PEBS sub-features:

[July 2017] 18.5.4.1 Extended PEBS (first on Goldmont Plus)
[January 2019] 18.9.2 Adaptive PEBS (first on Tremont)
[April 2021] IA32_PERF_CAPABILITIES.PEBS_BASELINE in the Table 2-2. IA-32 
Architectural MSRs

Waiting Kan to confirm the real world.

> 
> That said; the above patch can be further improved by also removing the
> PMU_FL_PEBS_ALL lines, which is already set by intel_ds_init().
> 
> In general though; the point is, we shouldn't be doing the FMS table
> thing for discoverable features. Having pebs_capable = ~0 and
> PMU_FL_PEBS_ALL on something with BASELINE set is just wrong.
Liang, Kan Aug. 15, 2022, 1:06 p.m. UTC | #6
On 2022-08-15 7:51 a.m., Peter Zijlstra wrote:
> On Mon, Aug 15, 2022 at 05:43:34PM +0800, Like Xu wrote:
> 
>>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>>> index 2db93498ff71..b42c1beb9924 100644
>>> --- a/arch/x86/events/intel/core.c
>>> +++ b/arch/x86/events/intel/core.c
>>> @@ -5933,7 +5933,6 @@ __init int intel_pmu_init(void)
>>>   		x86_pmu.pebs_aliases = NULL;
>>>   		x86_pmu.pebs_prec_dist = true;
>>>   		x86_pmu.lbr_pt_coexist = true;
>>> -		x86_pmu.pebs_capable = ~0ULL;
>>>   		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
>>>   		x86_pmu.flags |= PMU_FL_PEBS_ALL;
>>>   		x86_pmu.get_event_constraints = glp_get_event_constraints;
>>> @@ -6291,7 +6290,6 @@ __init int intel_pmu_init(void)
>>>   		x86_pmu.pebs_aliases = NULL;
>>>   		x86_pmu.pebs_prec_dist = true;
>>>   		x86_pmu.pebs_block = true;
>>> -		x86_pmu.pebs_capable = ~0ULL;
>>>   		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
>>>   		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
>>>   		x86_pmu.flags |= PMU_FL_PEBS_ALL;
>>> @@ -6337,7 +6335,6 @@ __init int intel_pmu_init(void)
>>>   		x86_pmu.pebs_aliases = NULL;
>>>   		x86_pmu.pebs_prec_dist = true;
>>>   		x86_pmu.pebs_block = true;
>>> -		x86_pmu.pebs_capable = ~0ULL;
>>>   		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
>>>   		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
>>>   		x86_pmu.flags |= PMU_FL_PEBS_ALL;
>>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>>> index ba60427caa6d..e2da643632b9 100644
>>> --- a/arch/x86/events/intel/ds.c
>>> +++ b/arch/x86/events/intel/ds.c
>>> @@ -2258,6 +2258,7 @@ void __init intel_ds_init(void)
>>>   			x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
>>>   			x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
>>>   			if (x86_pmu.intel_cap.pebs_baseline) {
>>> +				x86_pmu.pebs_capable = ~0ULL;
>>
>> The two features of "Extended PEBS (about pebs_capable)" and "Adaptive PEBS
>> (about pebs_baseline)"
>> are orthogonal, although the two are often supported together.
> 
> The SDM explicitly states that PEBS Baseline implies Extended PEBS. See
> 3-19.8 (April 22 edition).
> 
> The question is if there is hardware that has Extended PEBS but doesn't
> have Baseline; and I simply don't know and was hoping Kan could find
> out.

Goldmont Plus should be the only platform which supports extended PEBS
but doesn't have Baseline.

> 
> That said; the above patch can be further improved by also removing the
> PMU_FL_PEBS_ALL lines, which is already set by intel_ds_init().

I think we have to keep PMU_FL_PEBS_ALL for the Goldmont Plus. But we
can remove it for SPR and ADL in intel_pmu_init(), since it's already
set in the intel_ds_init() for the Baseline.

Thanks,
Kan
> 
> In general though; the point is, we shouldn't be doing the FMS table
> thing for discoverable features. Having pebs_capable = ~0 and
> PMU_FL_PEBS_ALL on something with BASELINE set is just wrong.
Peter Zijlstra Aug. 15, 2022, 1:14 p.m. UTC | #7
On Mon, Aug 15, 2022 at 08:48:36PM +0800, Like Xu wrote:

> In the SDM world, we actually have three PEBS sub-features:
> 
> [July 2017] 18.5.4.1 Extended PEBS (first on Goldmont Plus)
> [January 2019] 18.9.2 Adaptive PEBS (first on Tremont)
> [April 2021] IA32_PERF_CAPABILITIES.PEBS_BASELINE in the Table 2-2. IA-32
> Architectural MSRs
> 
> Waiting Kan to confirm the real world.

Right, so it's possible GLM would need the FMS thing, but AFAIK ICX
has Baseline and therefore shouldn't need it.
Peter Zijlstra Aug. 15, 2022, 2:30 p.m. UTC | #8
On Mon, Aug 15, 2022 at 09:06:01AM -0400, Liang, Kan wrote:

> Goldmont Plus should be the only platform which supports extended PEBS
> but doesn't have Baseline.

Like so then...

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2db93498ff71..cb98a05ee743 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6291,10 +6291,8 @@ __init int intel_pmu_init(void)
 		x86_pmu.pebs_aliases = NULL;
 		x86_pmu.pebs_prec_dist = true;
 		x86_pmu.pebs_block = true;
-		x86_pmu.pebs_capable = ~0ULL;
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-		x86_pmu.flags |= PMU_FL_PEBS_ALL;
 		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
 		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
 
@@ -6337,10 +6335,8 @@ __init int intel_pmu_init(void)
 		x86_pmu.pebs_aliases = NULL;
 		x86_pmu.pebs_prec_dist = true;
 		x86_pmu.pebs_block = true;
-		x86_pmu.pebs_capable = ~0ULL;
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-		x86_pmu.flags |= PMU_FL_PEBS_ALL;
 		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
 		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
 		x86_pmu.lbr_pt_coexist = true;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ba60427caa6d..ac6dd4c96dbc 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2262,6 +2262,7 @@ void __init intel_ds_init(void)
 					PERF_SAMPLE_BRANCH_STACK |
 					PERF_SAMPLE_TIME;
 				x86_pmu.flags |= PMU_FL_PEBS_ALL;
+				x86_pmu.pebs_capable = ~0ULL;
 				pebs_qual = "-baseline";
 				x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
 			} else {
Like Xu Aug. 16, 2022, 11:57 a.m. UTC | #9
On 15/8/2022 10:30 pm, Peter Zijlstra wrote:
> On Mon, Aug 15, 2022 at 09:06:01AM -0400, Liang, Kan wrote:
> 
>> Goldmont Plus should be the only platform which supports extended PEBS
>> but doesn't have Baseline.
> 
> Like so then...
> 
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 2db93498ff71..cb98a05ee743 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -6291,10 +6291,8 @@ __init int intel_pmu_init(void)
>   		x86_pmu.pebs_aliases = NULL;
>   		x86_pmu.pebs_prec_dist = true;
>   		x86_pmu.pebs_block = true;
> -		x86_pmu.pebs_capable = ~0ULL;
>   		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
>   		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
> -		x86_pmu.flags |= PMU_FL_PEBS_ALL;
>   		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
>   		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
>   
> @@ -6337,10 +6335,8 @@ __init int intel_pmu_init(void)
>   		x86_pmu.pebs_aliases = NULL;
>   		x86_pmu.pebs_prec_dist = true;
>   		x86_pmu.pebs_block = true;
> -		x86_pmu.pebs_capable = ~0ULL;
>   		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
>   		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
> -		x86_pmu.flags |= PMU_FL_PEBS_ALL;
>   		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
>   		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
>   		x86_pmu.lbr_pt_coexist = true;
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index ba60427caa6d..ac6dd4c96dbc 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -2262,6 +2262,7 @@ void __init intel_ds_init(void)
>   					PERF_SAMPLE_BRANCH_STACK |
>   					PERF_SAMPLE_TIME;
>   				x86_pmu.flags |= PMU_FL_PEBS_ALL;
> +				x86_pmu.pebs_capable = ~0ULL;
>   				pebs_qual = "-baseline";
>   				x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
>   			} else {

Looks good to me.

This diff touches PMU_FL_PEBS_ALL, which is less needed for guest PEBS fixes.
It has be sent out separately to the linux-perf-users group for further review.
diff mbox series

Patch

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 4e9b7af9cc45..e46fd496187b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6239,6 +6239,7 @@  __init int intel_pmu_init(void)
 	case INTEL_FAM6_ICELAKE_X:
 	case INTEL_FAM6_ICELAKE_D:
 		x86_pmu.pebs_ept = 1;
+		x86_pmu.pebs_capable = ~0ULL;
 		pmem = true;
 		fallthrough;
 	case INTEL_FAM6_ICELAKE_L: