@@ -455,6 +455,8 @@ struct kvm_pmu {
u64 pebs_data_cfg;
u64 pebs_data_cfg_mask;
+ bool counter_cross_mapped;
+
/*
* The gate to release perf_events not marked in
* pmc_in_use only once in a vcpu time slice.
@@ -550,3 +550,28 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
kfree(filter);
return r;
}
+
+/*
+ * The caller needs to ensure that there is no time window for
+ * perf hrtimer irq or any chance to reschedule pmc->perf_event.
+ */
+void kvm_pmu_counter_cross_mapped_check(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc = NULL;
+ int bit;
+
+ pmu->counter_cross_mapped = false;
+
+ for_each_set_bit(bit, (unsigned long *)&pmu->pebs_enable, X86_PMC_IDX_MAX) {
+ pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
+
+ if (!pmc || !pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
+ continue;
+
+ if (pmc->perf_event && (pmc->idx != pmc->perf_event->hw.idx)) {
+ pmu->counter_cross_mapped = true;
+ break;
+ }
+ }
+}
@@ -163,6 +163,7 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
+void kvm_pmu_counter_cross_mapped_check(struct kvm_vcpu *vcpu);
bool is_vmware_backdoor_pmc(u32 pmc_idx);
@@ -6542,6 +6542,9 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
if (!msrs)
return;
+ if (pmu->counter_cross_mapped)
+ msrs[1].guest = 0;
+
if (nr_msrs > 2 && msrs[1].guest) {
msrs[2].guest = pmu->ds_area;
if (nr_msrs > 3)
@@ -8936,6 +8936,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* result in virtual interrupt delivery.
*/
local_irq_disable();
+
+ if (vcpu_to_pmu(vcpu)->global_ctrl & vcpu_to_pmu(vcpu)->pebs_enable)
+ kvm_pmu_counter_cross_mapped_check(vcpu);
+
vcpu->mode = IN_GUEST_MODE;
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
KVM will check if a guest PEBS counter X is cross-mapped to the host counter Y. In this case, the applicable_counter field in the guest PEBS records is filled with the real host counter index(s) which is incorrect. Currently, KVM will disable guest PEBS before vm-entry and the later patches would do more emulations in the KVM to keep PEBS functionality work as host, such as rewriting applicable_counter field in the guest PEBS records buffer. The cross-mapped check should be done right before vm-entry but after local_irq_disable() since perf scheduler would rotate the pmc->perf_event to another host counter or make the event into error state via hrtimer irq. Signed-off-by: Like Xu <like.xu@linux.intel.com> --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/pmu.c | 25 +++++++++++++++++++++++++ arch/x86/kvm/pmu.h | 1 + arch/x86/kvm/vmx/vmx.c | 3 +++ arch/x86/kvm/x86.c | 4 ++++ 5 files changed, 35 insertions(+)