Message ID | 20210511024214.280733-7-like.xu@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86/pmu: Add *basic* support to enable guest PEBS via DS | expand |
On Tue, May 11, 2021 at 10:42:04AM +0800, Like Xu wrote: > diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c > index 2f89fd599842..c791765f4761 100644 > --- a/arch/x86/events/intel/core.c > +++ b/arch/x86/events/intel/core.c > @@ -3898,31 +3898,49 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) > struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); > struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; > u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); > + u64 pebs_mask = (x86_pmu.flags & PMU_FL_PEBS_ALL) ? > + cpuc->pebs_enabled : (cpuc->pebs_enabled & PEBS_COUNTER_MASK); > + > + *nr = 0; > + arr[(*nr)++] = (struct perf_guest_switch_msr){ > + .msr = MSR_CORE_PERF_GLOBAL_CTRL, > + .host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask, > + .guest = intel_ctrl & (~cpuc->intel_ctrl_host_mask | ~pebs_mask), > + }; > > + if (!x86_pmu.pebs) > + return arr; > > + /* > + * If PMU counter has PEBS enabled it is not enough to > + * disable counter on a guest entry since PEBS memory > + * write can overshoot guest entry and corrupt guest > + * memory. Disabling PEBS solves the problem. > + * > + * Don't do this if the CPU already enforces it. > + */ > + if (x86_pmu.pebs_no_isolation) { > + arr[(*nr)++] = (struct perf_guest_switch_msr){ > + .msr = MSR_IA32_PEBS_ENABLE, > + .host = cpuc->pebs_enabled, > + .guest = 0, > + }; > + return arr; > } > > + if (!x86_pmu.pebs_vmx) > + return arr; > + > + arr[*nr] = (struct perf_guest_switch_msr){ > + .msr = MSR_IA32_PEBS_ENABLE, > + .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, > + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, > + }; > + > + /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */ > + arr[0].guest |= arr[*nr].guest; > + > + ++(*nr); > return arr; > } ISTR saying I was confused as heck by this function, I still don't see clarifying comments :/ What's .host and .guest ?
On Tue, May 11, 2021 at 10:42:04AM +0800, Like Xu wrote: > diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c > index 2f89fd599842..c791765f4761 100644 > --- a/arch/x86/events/intel/core.c > +++ b/arch/x86/events/intel/core.c > @@ -3898,31 +3898,49 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) > struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); > struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; > u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); > + u64 pebs_mask = (x86_pmu.flags & PMU_FL_PEBS_ALL) ? > + cpuc->pebs_enabled : (cpuc->pebs_enabled & PEBS_COUNTER_MASK); > - if (x86_pmu.flags & PMU_FL_PEBS_ALL) > - arr[0].guest &= ~cpuc->pebs_enabled; > - else > - arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); > - *nr = 1; Instead of endlessly mucking about with branches, do we want something like this instead? --- diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2521d03de5e0..bcfba11196c8 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2819,10 +2819,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) * counters from the GLOBAL_STATUS mask and we always process PEBS * events via drain_pebs(). */ - if (x86_pmu.flags & PMU_FL_PEBS_ALL) - status &= ~cpuc->pebs_enabled; - else - status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); + status &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable); /* * PEBS overflow sets bit 62 in the global status register @@ -3862,10 +3859,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; arr[0].host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask; arr[0].guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask; - if (x86_pmu.flags & PMU_FL_PEBS_ALL) - arr[0].guest &= ~cpuc->pebs_enabled; - else - arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); + arr[0].guest &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable); *nr = 1; if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) { @@ -5546,6 +5540,7 @@ __init int intel_pmu_init(void) x86_pmu.events_mask_len = eax.split.mask_length; x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); + x86_pmu.pebs_capable = PEBS_COUNTER_MASK; /* * Quirk: v2 perfmon does not report fixed-purpose events, so @@ -5730,6 +5725,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.lbr_pt_coexist = true; + x86_pmu.pebs_capable = ~0ULL; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.get_event_constraints = glp_get_event_constraints; @@ -6080,6 +6076,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; + x86_pmu.pebs_capable = ~0ULL; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; x86_pmu.flags |= PMU_FL_PEBS_ALL; @@ -6123,6 +6120,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; + x86_pmu.pebs_capable = ~0ULL; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; x86_pmu.flags |= PMU_FL_PEBS_ALL; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 27fa85e7d4fd..6f3cf81ccb1b 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -805,6 +805,7 @@ struct x86_pmu { void (*pebs_aliases)(struct perf_event *event); unsigned long large_pebs_flags; u64 rtm_abort_event; + u64 pebs_capable; /* * Intel LBR
On 2021/5/17 16:33, Peter Zijlstra wrote: > On Tue, May 11, 2021 at 10:42:04AM +0800, Like Xu wrote: >> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c >> index 2f89fd599842..c791765f4761 100644 >> --- a/arch/x86/events/intel/core.c >> +++ b/arch/x86/events/intel/core.c >> @@ -3898,31 +3898,49 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) >> struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); >> struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; >> u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); >> + u64 pebs_mask = (x86_pmu.flags & PMU_FL_PEBS_ALL) ? >> + cpuc->pebs_enabled : (cpuc->pebs_enabled & PEBS_COUNTER_MASK); >> - if (x86_pmu.flags & PMU_FL_PEBS_ALL) >> - arr[0].guest &= ~cpuc->pebs_enabled; >> - else >> - arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); >> - *nr = 1; > Instead of endlessly mucking about with branches, do we want something > like this instead? Fine to me. How about the commit message for your below patch: x86/perf/core: Add pebs_capable to store valid PEBS_COUNTER_MASK value The value of pebs_counter_mask will be accessed frequently for repeated use in the intel_guest_get_msrs(). So it can be optimized instead of endlessly mucking about with branches. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > > --- > diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c > index 2521d03de5e0..bcfba11196c8 100644 > --- a/arch/x86/events/intel/core.c > +++ b/arch/x86/events/intel/core.c > @@ -2819,10 +2819,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) > * counters from the GLOBAL_STATUS mask and we always process PEBS > * events via drain_pebs(). > */ > - if (x86_pmu.flags & PMU_FL_PEBS_ALL) > - status &= ~cpuc->pebs_enabled; > - else > - status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); > + status &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable); > > /* > * PEBS overflow sets bit 62 in the global status register > @@ -3862,10 +3859,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) > arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; > arr[0].host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask; > arr[0].guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask; > - if (x86_pmu.flags & PMU_FL_PEBS_ALL) > - arr[0].guest &= ~cpuc->pebs_enabled; > - else > - arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); > + arr[0].guest &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable); > *nr = 1; > > if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) { > @@ -5546,6 +5540,7 @@ __init int intel_pmu_init(void) > x86_pmu.events_mask_len = eax.split.mask_length; > > x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); > + x86_pmu.pebs_capable = PEBS_COUNTER_MASK; > > /* > * Quirk: v2 perfmon does not report fixed-purpose events, so > @@ -5730,6 +5725,7 @@ __init int intel_pmu_init(void) > x86_pmu.pebs_aliases = NULL; > x86_pmu.pebs_prec_dist = true; > x86_pmu.lbr_pt_coexist = true; > + x86_pmu.pebs_capable = ~0ULL; > x86_pmu.flags |= PMU_FL_HAS_RSP_1; > x86_pmu.flags |= PMU_FL_PEBS_ALL; > x86_pmu.get_event_constraints = glp_get_event_constraints; > @@ -6080,6 +6076,7 @@ __init int intel_pmu_init(void) > x86_pmu.pebs_aliases = NULL; > x86_pmu.pebs_prec_dist = true; > x86_pmu.pebs_block = true; > + x86_pmu.pebs_capable = ~0ULL; > x86_pmu.flags |= PMU_FL_HAS_RSP_1; > x86_pmu.flags |= PMU_FL_NO_HT_SHARING; > x86_pmu.flags |= PMU_FL_PEBS_ALL; > @@ -6123,6 +6120,7 @@ __init int intel_pmu_init(void) > x86_pmu.pebs_aliases = NULL; > x86_pmu.pebs_prec_dist = true; > x86_pmu.pebs_block = true; > + x86_pmu.pebs_capable = ~0ULL; > x86_pmu.flags |= PMU_FL_HAS_RSP_1; > x86_pmu.flags |= PMU_FL_NO_HT_SHARING; > x86_pmu.flags |= PMU_FL_PEBS_ALL; > diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h > index 27fa85e7d4fd..6f3cf81ccb1b 100644 > --- a/arch/x86/events/perf_event.h > +++ b/arch/x86/events/perf_event.h > @@ -805,6 +805,7 @@ struct x86_pmu { > void (*pebs_aliases)(struct perf_event *event); > unsigned long large_pebs_flags; > u64 rtm_abort_event; > + u64 pebs_capable; > > /* > * Intel LBR
On 2021/5/17 16:32, Peter Zijlstra wrote: > On Tue, May 11, 2021 at 10:42:04AM +0800, Like Xu wrote: >> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c >> index 2f89fd599842..c791765f4761 100644 >> --- a/arch/x86/events/intel/core.c >> +++ b/arch/x86/events/intel/core.c >> @@ -3898,31 +3898,49 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) >> struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); >> struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; >> u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); >> + u64 pebs_mask = (x86_pmu.flags & PMU_FL_PEBS_ALL) ? >> + cpuc->pebs_enabled : (cpuc->pebs_enabled & PEBS_COUNTER_MASK); >> + >> + *nr = 0; >> + arr[(*nr)++] = (struct perf_guest_switch_msr){ >> + .msr = MSR_CORE_PERF_GLOBAL_CTRL, >> + .host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask, >> + .guest = intel_ctrl & (~cpuc->intel_ctrl_host_mask | ~pebs_mask), >> + }; >> >> + if (!x86_pmu.pebs) >> + return arr; >> >> + /* >> + * If PMU counter has PEBS enabled it is not enough to >> + * disable counter on a guest entry since PEBS memory >> + * write can overshoot guest entry and corrupt guest >> + * memory. Disabling PEBS solves the problem. >> + * >> + * Don't do this if the CPU already enforces it. >> + */ >> + if (x86_pmu.pebs_no_isolation) { >> + arr[(*nr)++] = (struct perf_guest_switch_msr){ >> + .msr = MSR_IA32_PEBS_ENABLE, >> + .host = cpuc->pebs_enabled, >> + .guest = 0, >> + }; >> + return arr; >> } >> >> + if (!x86_pmu.pebs_vmx) >> + return arr; >> + >> + arr[*nr] = (struct perf_guest_switch_msr){ >> + .msr = MSR_IA32_PEBS_ENABLE, >> + .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, >> + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, >> + }; >> + >> + /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */ >> + arr[0].guest |= arr[*nr].guest; >> + >> + ++(*nr); >> return arr; >> } > ISTR saying I was confused as heck by this function, I still don't see > clarifying comments :/ > > What's .host and .guest ? Will adding the following comments help you ? +/* + * Currently, the only caller of this function is the atomic_switch_perf_msrs(). + * The host perf conext helps to prepare the values of the real hardware for + * a set of msrs that need to be switched atomically in a vmx transaction. + * + * For example, the pseudocode needed to add a new msr should look like: + * + * arr[(*nr)++] = (struct perf_guest_switch_msr){ + * .msr = the hardware msr address, + * .host = the value the hardware has when it doesn't run a guest, + * .guest = the value the hardware has when it runs a guest, + * }; + * + * These values have nothing to do with the emulated values the guest sees + * when it uses {RD,WR}MSR, which should be handled in the KVM context. + */ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
On Tue, May 18, 2021 at 04:44:13PM +0800, Xu, Like wrote: > Will adding the following comments help you ? > > +/* > + * Currently, the only caller of this function is the atomic_switch_perf_msrs(). > + * The host perf conext helps to prepare the values of the real hardware for > + * a set of msrs that need to be switched atomically in a vmx transaction. > + * > + * For example, the pseudocode needed to add a new msr should look like: > + * > + * arr[(*nr)++] = (struct perf_guest_switch_msr){ > + * .msr = the hardware msr address, > + * .host = the value the hardware has when it doesn't run a guest, > + * .guest = the value the hardware has when it runs a guest, So personally I think the .host and .guest naming is terrible here, because both values are host values. But I don't know enough about virt to know if there's accepted nomencature for this. > + * }; > + * > + * These values have nothing to do with the emulated values the guest sees > + * when it uses {RD,WR}MSR, which should be handled in the KVM context. > + */ > static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) Yes, now at least one can understand wth this function does, even though the actual naming is still horrible. Thanks! Additionally, would it make sense to add a pointer to the KVM code that does the emulation for each MSR listed in this function?
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2f89fd599842..c791765f4761 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3898,31 +3898,49 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); + u64 pebs_mask = (x86_pmu.flags & PMU_FL_PEBS_ALL) ? + cpuc->pebs_enabled : (cpuc->pebs_enabled & PEBS_COUNTER_MASK); + + *nr = 0; + arr[(*nr)++] = (struct perf_guest_switch_msr){ + .msr = MSR_CORE_PERF_GLOBAL_CTRL, + .host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask, + .guest = intel_ctrl & (~cpuc->intel_ctrl_host_mask | ~pebs_mask), + }; - arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; - arr[0].host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask; - arr[0].guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask; - if (x86_pmu.flags & PMU_FL_PEBS_ALL) - arr[0].guest &= ~cpuc->pebs_enabled; - else - arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); - *nr = 1; + if (!x86_pmu.pebs) + return arr; - if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) { - /* - * If PMU counter has PEBS enabled it is not enough to - * disable counter on a guest entry since PEBS memory - * write can overshoot guest entry and corrupt guest - * memory. Disabling PEBS solves the problem. - * - * Don't do this if the CPU already enforces it. - */ - arr[1].msr = MSR_IA32_PEBS_ENABLE; - arr[1].host = cpuc->pebs_enabled; - arr[1].guest = 0; - *nr = 2; + /* + * If PMU counter has PEBS enabled it is not enough to + * disable counter on a guest entry since PEBS memory + * write can overshoot guest entry and corrupt guest + * memory. Disabling PEBS solves the problem. + * + * Don't do this if the CPU already enforces it. + */ + if (x86_pmu.pebs_no_isolation) { + arr[(*nr)++] = (struct perf_guest_switch_msr){ + .msr = MSR_IA32_PEBS_ENABLE, + .host = cpuc->pebs_enabled, + .guest = 0, + }; + return arr; } + if (!x86_pmu.pebs_vmx) + return arr; + + arr[*nr] = (struct perf_guest_switch_msr){ + .msr = MSR_IA32_PEBS_ENABLE, + .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, + }; + + /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */ + arr[0].guest |= arr[*nr].guest; + + ++(*nr); return arr; } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 49b421bd3dd8..0a42079560ac 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -473,6 +473,9 @@ struct kvm_pmu { DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); + u64 pebs_enable; + u64 pebs_enable_mask; + /* * The gate to release perf_events not marked in * pmc_in_use only once in a vcpu time slice. diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 742d89a00721..1ab3f280f3a9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -189,6 +189,12 @@ #define PERF_CAP_PT_IDX 16 #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 +#define PERF_CAP_PEBS_TRAP BIT_ULL(6) +#define PERF_CAP_ARCH_REG BIT_ULL(7) +#define PERF_CAP_PEBS_FORMAT 0xf00 +#define PERF_CAP_PEBS_BASELINE BIT_ULL(14) +#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \ + PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE) #define MSR_IA32_RTIT_CTL 0x00000570 #define RTIT_CTL_TRACEEN BIT(0) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index ac7fe714e6c1..9938b485c31c 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -220,6 +220,9 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) case MSR_CORE_PERF_GLOBAL_OVF_CTRL: ret = pmu->version > 1; break; + case MSR_IA32_PEBS_ENABLE: + ret = vcpu->arch.perf_capabilities & PERF_CAP_PEBS_FORMAT; + break; default: ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) || @@ -367,6 +370,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_CORE_PERF_GLOBAL_OVF_CTRL: msr_info->data = pmu->global_ovf_ctrl; return 0; + case MSR_IA32_PEBS_ENABLE: + msr_info->data = pmu->pebs_enable; + return 0; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { @@ -427,6 +433,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 0; } break; + case MSR_IA32_PEBS_ENABLE: + if (pmu->pebs_enable == data) + return 0; + if (!(data & pmu->pebs_enable_mask)) { + pmu->pebs_enable = data; + return 0; + } + break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { @@ -479,6 +493,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->version = 0; pmu->reserved_bits = 0xffffffff00200000ull; pmu->fixed_ctr_ctrl_mask = ~0ull; + pmu->pebs_enable_mask = ~0ull; entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); if (!entry) @@ -545,6 +560,22 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) if (lbr_desc->records.nr) bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1); + + if (vcpu->arch.perf_capabilities & PERF_CAP_PEBS_FORMAT) { + if (vcpu->arch.perf_capabilities & PERF_CAP_PEBS_BASELINE) { + pmu->pebs_enable_mask = ~pmu->global_ctrl; + pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE; + for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { + pmu->fixed_ctr_ctrl_mask &= + ~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4)); + } + } else { + pmu->pebs_enable_mask = + ~((1ull << pmu->nr_arch_gp_counters) - 1); + } + } else { + vcpu->arch.perf_capabilities &= ~PERF_CAP_PEBS_MASK; + } } static void intel_pmu_init(struct kvm_vcpu *vcpu)