diff mbox series

[v3,06/17] KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS

Message ID 20210104131542.495413-7-like.xu@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/pmu: Add support to enable Guest PEBS via DS | expand

Commit Message

Like Xu Jan. 4, 2021, 1:15 p.m. UTC
If IA32_PERF_CAPABILITIES.PEBS_BASELINE [bit 14] is set, the
IA32_PEBS_ENABLE MSR exists and all architecturally enumerated fixed
and general purpose counters have corresponding bits in IA32_PEBS_ENABLE
that enable generation of PEBS records. The general-purpose counter bits
start at bit IA32_PEBS_ENABLE[0], and the fixed counter bits start at
bit IA32_PEBS_ENABLE[32].

When guest PEBS is enabled, the IA32_PEBS_ENABLE MSR will be
added to the perf_guest_switch_msr() and switched during the
VMX transitions just like CORE_PERF_GLOBAL_CTRL MSR.

Originally-by: Andi Kleen <ak@linux.intel.com>
Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Co-developed-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
---
 arch/x86/events/intel/core.c     | 20 ++++++++++++++++++++
 arch/x86/include/asm/kvm_host.h  |  1 +
 arch/x86/include/asm/msr-index.h |  6 ++++++
 arch/x86/kvm/vmx/pmu_intel.c     | 28 ++++++++++++++++++++++++++++
 4 files changed, 55 insertions(+)

Comments

Sean Christopherson Jan. 5, 2021, 9:11 p.m. UTC | #1
On Mon, Jan 04, 2021, Like Xu wrote:
> If IA32_PERF_CAPABILITIES.PEBS_BASELINE [bit 14] is set, the
> IA32_PEBS_ENABLE MSR exists and all architecturally enumerated fixed
> and general purpose counters have corresponding bits in IA32_PEBS_ENABLE
> that enable generation of PEBS records. The general-purpose counter bits
> start at bit IA32_PEBS_ENABLE[0], and the fixed counter bits start at
> bit IA32_PEBS_ENABLE[32].
> 
> When guest PEBS is enabled, the IA32_PEBS_ENABLE MSR will be
> added to the perf_guest_switch_msr() and switched during the
> VMX transitions just like CORE_PERF_GLOBAL_CTRL MSR.
> 
> Originally-by: Andi Kleen <ak@linux.intel.com>
> Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> Co-developed-by: Luwei Kang <luwei.kang@intel.com>
> Signed-off-by: Luwei Kang <luwei.kang@intel.com>
> Signed-off-by: Like Xu <like.xu@linux.intel.com>
> ---
>  arch/x86/events/intel/core.c     | 20 ++++++++++++++++++++
>  arch/x86/include/asm/kvm_host.h  |  1 +
>  arch/x86/include/asm/msr-index.h |  6 ++++++
>  arch/x86/kvm/vmx/pmu_intel.c     | 28 ++++++++++++++++++++++++++++
>  4 files changed, 55 insertions(+)
> 
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index af457f8cb29d..6453b8a6834a 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -3715,6 +3715,26 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
>  		*nr = 2;
>  	}
>  
> +	if (cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask) {
> +		arr[1].msr = MSR_IA32_PEBS_ENABLE;
> +		arr[1].host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask;
> +		arr[1].guest = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
> +		/*
> +		 * The guest PEBS will be disabled once the host PEBS is enabled
> +		 * since the both enabled case may brings a unknown PMI to
> +		 * confuse host and the guest PEBS overflow PMI would be missed.
> +		 */
> +		if (arr[1].host)
> +			arr[1].guest = 0;
> +		arr[0].guest |= arr[1].guest;

Can't you modify the code that strips the PEBS counters from the guest's
value instead of poking into the array entry after the fact?

Also, why is this scenario even allowed?  Can't we force exclude_guest for
events that use PEBS?

> +		*nr = 2;
> +	} else if (*nr == 1) {
> +		/* Remove MSR_IA32_PEBS_ENABLE from MSR switch list in KVM */
> +		arr[1].msr = MSR_IA32_PEBS_ENABLE;
> +		arr[1].host = arr[1].guest = 0;
> +		*nr = 2;

Similar to above, rather then check "*nr == 1", this should properly integrate
with the "x86_pmu.pebs && x86_pmu.pebs_no_isolation" logic instead of poking
into the array after the fact.

By incorporating both suggestions, the logic can be streamlined significantly,
and IMO makes the overall flow much more understandable.  Untested...

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index d4569bfa83e3..c5cc7e558c8e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3708,24 +3708,39 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
        arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
        arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
        arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
-       if (x86_pmu.flags & PMU_FL_PEBS_ALL)
-               arr[0].guest &= ~cpuc->pebs_enabled;
-       else
-               arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
+
+       /*
+        * Disable PEBS in the guest if PEBS is used by the host; enabling PEBS
+        * in both will lead to unexpected PMIs in the host and/or missed PMIs
+        * in the guest.
+        */
+       if (cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask) {
+               if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+                       arr[0].guest &= ~cpuc->pebs_enabled;
+               else
+                       arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
+       }
        *nr = 1;

-       if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
-               /*
-                * If PMU counter has PEBS enabled it is not enough to
-                * disable counter on a guest entry since PEBS memory
-                * write can overshoot guest entry and corrupt guest
-                * memory. Disabling PEBS solves the problem.
-                *
-                * Don't do this if the CPU already enforces it.
-                */
+       if (x86_pmu.pebs) {
                arr[1].msr = MSR_IA32_PEBS_ENABLE;
-               arr[1].host = cpuc->pebs_enabled;
-               arr[1].guest = 0;
+               arr[1].host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask;
+
+               /*
+                * Host and guest PEBS are mutually exclusive.  Load the guest
+                * value iff PEBS is disabled in the host.  If PEBS is enabled
+                * in the host and the CPU supports PEBS isolation, disabling
+                * the counters is sufficient (see above); skip the MSR loads
+                * by stuffing guest=host (KVM will remove the entry).  Without
+                * isolation, PEBS must be explicitly disabled prior to
+                * VM-Enter to prevent PEBS writes from overshooting VM-Enter.
+                */
+               if (!arr[1].host)
+                       arr[1].guest = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
+               else if (x86_pmu.pebs_no_isolation)
+                       arr[1].guest = 0;
+               else
+                       arr[1].guest = arr[1].host;
                *nr = 2;
        }
Xu, Like Jan. 7, 2021, 12:38 p.m. UTC | #2
Hi Sean,

On 2021/1/6 5:11, Sean Christopherson wrote:
> On Mon, Jan 04, 2021, Like Xu wrote:
>> If IA32_PERF_CAPABILITIES.PEBS_BASELINE [bit 14] is set, the
>> IA32_PEBS_ENABLE MSR exists and all architecturally enumerated fixed
>> and general purpose counters have corresponding bits in IA32_PEBS_ENABLE
>> that enable generation of PEBS records. The general-purpose counter bits
>> start at bit IA32_PEBS_ENABLE[0], and the fixed counter bits start at
>> bit IA32_PEBS_ENABLE[32].
>>
>> When guest PEBS is enabled, the IA32_PEBS_ENABLE MSR will be
>> added to the perf_guest_switch_msr() and switched during the
>> VMX transitions just like CORE_PERF_GLOBAL_CTRL MSR.
>>
>> Originally-by: Andi Kleen <ak@linux.intel.com>
>> Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>> Co-developed-by: Luwei Kang <luwei.kang@intel.com>
>> Signed-off-by: Luwei Kang <luwei.kang@intel.com>
>> Signed-off-by: Like Xu <like.xu@linux.intel.com>
>> ---
>>   arch/x86/events/intel/core.c     | 20 ++++++++++++++++++++
>>   arch/x86/include/asm/kvm_host.h  |  1 +
>>   arch/x86/include/asm/msr-index.h |  6 ++++++
>>   arch/x86/kvm/vmx/pmu_intel.c     | 28 ++++++++++++++++++++++++++++
>>   4 files changed, 55 insertions(+)
>>
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index af457f8cb29d..6453b8a6834a 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -3715,6 +3715,26 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
>>   		*nr = 2;
>>   	}
>>   
>> +	if (cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask) {
>> +		arr[1].msr = MSR_IA32_PEBS_ENABLE;
>> +		arr[1].host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask;
>> +		arr[1].guest = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
>> +		/*
>> +		 * The guest PEBS will be disabled once the host PEBS is enabled
>> +		 * since the both enabled case may brings a unknown PMI to
>> +		 * confuse host and the guest PEBS overflow PMI would be missed.
>> +		 */
>> +		if (arr[1].host)
>> +			arr[1].guest = 0;
>> +		arr[0].guest |= arr[1].guest;
> Can't you modify the code that strips the PEBS counters from the guest's
> value instead of poking into the array entry after the fact?
Ah, nice move.
>
> Also, why is this scenario even allowed?  Can't we force exclude_guest for
> events that use PEBS?

The attr.exclude_guest can be configured for each event, and
it's not shared with other perf_events on the same CPU,
and changing it will not take effect when the event is running.

Host perf would still allow to create or run the PEBS perf_event
for vcpu when host is using PEBS counter. One reason is that the
perf scheduler needs to know which event owns which PEBS counter.

>
>> +		*nr = 2;
>> +	} else if (*nr == 1) {
>> +		/* Remove MSR_IA32_PEBS_ENABLE from MSR switch list in KVM */
>> +		arr[1].msr = MSR_IA32_PEBS_ENABLE;
>> +		arr[1].host = arr[1].guest = 0;
>> +		*nr = 2;
> Similar to above, rather then check "*nr == 1", this should properly integrate
> with the "x86_pmu.pebs && x86_pmu.pebs_no_isolation" logic instead of poking
> into the array after the fact.
Thanks, it makes sense to me and I'll figure it out w/ your clearly code.
>
> By incorporating both suggestions, the logic can be streamlined significantly,
> and IMO makes the overall flow much more understandable.  Untested...
>
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index d4569bfa83e3..c5cc7e558c8e 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -3708,24 +3708,39 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
>          arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
>          arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
>          arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
> -       if (x86_pmu.flags & PMU_FL_PEBS_ALL)
> -               arr[0].guest &= ~cpuc->pebs_enabled;
> -       else
> -               arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
> +
> +       /*
> +        * Disable PEBS in the guest if PEBS is used by the host; enabling PEBS
> +        * in both will lead to unexpected PMIs in the host and/or missed PMIs
> +        * in the guest.
> +        */
> +       if (cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask) {
> +               if (x86_pmu.flags & PMU_FL_PEBS_ALL)
> +                       arr[0].guest &= ~cpuc->pebs_enabled;
> +               else
> +                       arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
> +       }
>          *nr = 1;
>
> -       if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
> -               /*
> -                * If PMU counter has PEBS enabled it is not enough to
> -                * disable counter on a guest entry since PEBS memory
> -                * write can overshoot guest entry and corrupt guest
> -                * memory. Disabling PEBS solves the problem.
> -                *
> -                * Don't do this if the CPU already enforces it.
> -                */
> +       if (x86_pmu.pebs) {
>                  arr[1].msr = MSR_IA32_PEBS_ENABLE;
> -               arr[1].host = cpuc->pebs_enabled;
> -               arr[1].guest = 0;
> +               arr[1].host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask;
> +
> +               /*
> +                * Host and guest PEBS are mutually exclusive.  Load the guest
> +                * value iff PEBS is disabled in the host.  If PEBS is enabled
> +                * in the host and the CPU supports PEBS isolation, disabling
> +                * the counters is sufficient (see above); skip the MSR loads
s/above/9b545c04abd4/
> +                * by stuffing guest=host (KVM will remove the entry).  Without
> +                * isolation, PEBS must be explicitly disabled prior to
> +                * VM-Enter to prevent PEBS writes from overshooting VM-Enter.
> +                */
> +               if (!arr[1].host)
> +                       arr[1].guest = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
miss "arr[0].guest |= arr[1].guest;" here to make it enabled in the 
global_ctrl msr.

Sean, if you have more comments on other patches, just let me know.

---
thx,likexu
> +               else if (x86_pmu.pebs_no_isolation)
> +                       arr[1].guest = 0;
> +               else
> +                       arr[1].guest = arr[1].host;
>                  *nr = 2;
>          }
Peter Zijlstra Jan. 15, 2021, 2:46 p.m. UTC | #3
On Mon, Jan 04, 2021 at 09:15:31PM +0800, Like Xu wrote:

> +	if (cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask) {
> +		arr[1].msr = MSR_IA32_PEBS_ENABLE;
> +		arr[1].host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask;
> +		arr[1].guest = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
> +		/*
> +		 * The guest PEBS will be disabled once the host PEBS is enabled
> +		 * since the both enabled case may brings a unknown PMI to
> +		 * confuse host and the guest PEBS overflow PMI would be missed.
> +		 */
> +		if (arr[1].host)
> +			arr[1].guest = 0;
> +		arr[0].guest |= arr[1].guest;
> +		*nr = 2;

Elsewhere you write:

> When we have a PEBS PMI due to guest workload and vm-exits,
> the code path from vm-exit to the host PEBS PMI handler may also
> bring PEBS PMI and mark the status bit. The current PMI handler
> can't distinguish them and would treat the later one as a suspicious
> PMI and output a warning.

So the reason isn't that spurious PMIs are tedious, but that the
hardware is actually doing something weird.

Or am I not reading things straight?
Xu, Like Jan. 15, 2021, 3:29 p.m. UTC | #4
On 2021/1/15 22:46, Peter Zijlstra wrote:
> On Mon, Jan 04, 2021 at 09:15:31PM +0800, Like Xu wrote:
>
>> +	if (cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask) {
>> +		arr[1].msr = MSR_IA32_PEBS_ENABLE;
>> +		arr[1].host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask;
>> +		arr[1].guest = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
>> +		/*
>> +		 * The guest PEBS will be disabled once the host PEBS is enabled
>> +		 * since the both enabled case may brings a unknown PMI to
>> +		 * confuse host and the guest PEBS overflow PMI would be missed.
>> +		 */
>> +		if (arr[1].host)
>> +			arr[1].guest = 0;
>> +		arr[0].guest |= arr[1].guest;
>> +		*nr = 2;
> Elsewhere you write:
>
>> When we have a PEBS PMI due to guest workload and vm-exits,
>> the code path from vm-exit to the host PEBS PMI handler may also
>> bring PEBS PMI and mark the status bit. The current PMI handler
>> can't distinguish them and would treat the later one as a suspicious
>> PMI and output a warning.
> So the reason isn't that spurious PMIs are tedious, but that the
> hardware is actually doing something weird.
>
> Or am I not reading things straight?

I think the PEBS facility works as expected because in the both enabled case,
the first PEBS PMI is generated from host counter 1 based on guest 
interrupt_threshold
and the later PEBS PMI could be generated from host counter 2 based on host 
interrupt_threshold.

Therefore, if we adjust the overflow value to a small value, so that the 
number of
instructions from vm-exit to global ctrl disabling could be enough big to 
trigger PEBS PMI.

Do you think this is weird, or do you see other possibilities ?
diff mbox series

Patch

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index af457f8cb29d..6453b8a6834a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3715,6 +3715,26 @@  static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
 		*nr = 2;
 	}
 
+	if (cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask) {
+		arr[1].msr = MSR_IA32_PEBS_ENABLE;
+		arr[1].host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask;
+		arr[1].guest = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
+		/*
+		 * The guest PEBS will be disabled once the host PEBS is enabled
+		 * since the both enabled case may brings a unknown PMI to
+		 * confuse host and the guest PEBS overflow PMI would be missed.
+		 */
+		if (arr[1].host)
+			arr[1].guest = 0;
+		arr[0].guest |= arr[1].guest;
+		*nr = 2;
+	} else if (*nr == 1) {
+		/* Remove MSR_IA32_PEBS_ENABLE from MSR switch list in KVM */
+		arr[1].msr = MSR_IA32_PEBS_ENABLE;
+		arr[1].host = arr[1].guest = 0;
+		*nr = 2;
+	}
+
 	return arr;
 }
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 09dacda33fb8..88a403fa46d4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -450,6 +450,7 @@  struct kvm_pmu {
 	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
 
 	u64 pebs_enable;
+	u64 pebs_enable_mask;
 
 	/*
 	 * The gate to release perf_events not marked in
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index abfc9b0fbd8d..11cc0b80fe7a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -184,6 +184,12 @@ 
 #define MSR_PEBS_DATA_CFG		0x000003f2
 #define MSR_IA32_DS_AREA		0x00000600
 #define MSR_IA32_PERF_CAPABILITIES	0x00000345
+#define PERF_CAP_PEBS_TRAP             BIT_ULL(6)
+#define PERF_CAP_ARCH_REG              BIT_ULL(7)
+#define PERF_CAP_PEBS_FORMAT           0xf00
+#define PERF_CAP_PEBS_BASELINE         BIT_ULL(14)
+#define PERF_CAP_PEBS_MASK	(PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
+	PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
 #define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6
 
 #define MSR_IA32_RTIT_CTL		0x00000570
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 50047114c298..2f10587bda19 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -180,6 +180,9 @@  static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 		ret = pmu->version > 1;
 		break;
+	case MSR_IA32_PEBS_ENABLE:
+		ret = vcpu->arch.perf_capabilities & PERF_CAP_PEBS_FORMAT;
+		break;
 	default:
 		ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
 			get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
@@ -221,6 +224,9 @@  static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 		msr_info->data = pmu->global_ovf_ctrl;
 		return 0;
+	case MSR_IA32_PEBS_ENABLE:
+		msr_info->data = pmu->pebs_enable;
+		return 0;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -280,6 +286,14 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 0;
 		}
 		break;
+	case MSR_IA32_PEBS_ENABLE:
+		if (pmu->pebs_enable == data)
+			return 0;
+		if (!(data & pmu->pebs_enable_mask)) {
+			pmu->pebs_enable = data;
+			return 0;
+		}
+		break;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -329,6 +343,7 @@  static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 	pmu->version = 0;
 	pmu->reserved_bits = 0xffffffff00200000ull;
 	pmu->fixed_ctr_ctrl_mask = ~0ull;
+	pmu->pebs_enable_mask = ~0ull;
 
 	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
 	if (!entry)
@@ -384,6 +399,19 @@  static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 	bitmap_set(pmu->all_valid_pmc_idx,
 		INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
 
+	if (vcpu->arch.perf_capabilities & PERF_CAP_PEBS_FORMAT) {
+		if (vcpu->arch.perf_capabilities & PERF_CAP_PEBS_BASELINE) {
+			pmu->pebs_enable_mask = ~pmu->global_ctrl;
+			pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE;
+			for (i = 0; i < pmu->nr_arch_fixed_counters; i++)
+				pmu->fixed_ctr_ctrl_mask &=
+					~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4));
+		} else
+			pmu->pebs_enable_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1);
+	} else {
+		vcpu->arch.perf_capabilities &= ~PERF_CAP_PEBS_MASK;
+	}
+
 	nested_vmx_pmu_entry_exit_ctls_update(vcpu);
 }