diff mbox series

[v3,3/7] KVM: x86/pmu: Avoid using PEBS perf_events for normal counters

Message ID 20220831085328.45489-4-likexu@tencent.com (mailing list archive)
State New, archived
Headers show
Series x86/pmu: Corner cases fixes and optimization | expand

Commit Message

Like Xu Aug. 31, 2022, 8:53 a.m. UTC
From: Like Xu <likexu@tencent.com>

The check logic in the pmc_resume_counter() to determine whether
a perf_event is reusable is partial and flawed, especially when it
comes to a pseudocode sequence (not correct but clearly valid) like:

  - enabling a counter and its PEBS bit
  - enable global_ctrl
  - run workload
  - disable only the PEBS bit, leaving the global_ctrl bit enabled

In this corner case, a perf_event created for PEBS can be reused by
a normal counter before it has been released and recreated, and when this
normal counter overflows, it triggers a PEBS interrupt (precise_ip != 0).

To address this issue, the reuse check has been revamped and KVM will
go back to do reprogram_counter() when any bit of guest PEBS_ENABLE
msr has changed, which is similar to what global_ctrl_changed() does.

Fixes: 79f3e3b58386 ("KVM: x86/pmu: Reprogram PEBS event to emulate guest PEBS counter")
Signed-off-by: Like Xu <likexu@tencent.com>
---
 arch/x86/kvm/pmu.c           |  4 ++--
 arch/x86/kvm/vmx/pmu_intel.c | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

Comments

Sean Christopherson Sept. 22, 2022, 10:35 p.m. UTC | #1
On Wed, Aug 31, 2022, Like Xu wrote:
> To address this issue, the reuse check has been revamped and KVM will

State what the patch does as a command, don't describe the effects in the past
tense.

> go back to do reprogram_counter() when any bit of guest PEBS_ENABLE
> msr has changed, which is similar to what global_ctrl_changed() does.

This managed to confuse the heck out of me.  I misread reprogram_counter() as
reprogram_counters() because that's what I see in the diff, and then got all
turned around because this patch also stealthily renames global_ctrl_changed() to
reprogram_counters().

This is a very good example of when splitting a refactor with a bug fix can help
tremendously, e.g. with the refactor moved to a separate patch, this fix becomes:

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 390d697efde1..d9b9a0f0db17 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -237,8 +237,8 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
                              get_sample_period(pmc, pmc->counter)))
                return false;
 
-       if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) &&
-           pmc->perf_event->attr.precise_ip)
+       if (test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) !=
+           (!!pmc->perf_event->attr.precise_ip))
                return false;
 
        /* reuse perf_event to serve as pmc_reprogram_counter() does*/
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 5592b1259e1b..25b70a85bef5 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -431,7 +431,9 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                if (pmu->pebs_enable == data)
                        return 0;
                if (!(data & pmu->pebs_enable_mask)) {
+                       diff = pmu->pebs_enable ^ data;
                        pmu->pebs_enable = data;
+                       reprogram_counters(pmu, diff);
                        return 0;
                }
                break;

which is much, much easier to describe and to follow.

TL;DR: I split this into two patches.

> Fixes: 79f3e3b58386 ("KVM: x86/pmu: Reprogram PEBS event to emulate guest PEBS counter")
> Signed-off-by: Like Xu <likexu@tencent.com>
> ---

...

> -/* function is called when global control register has been updated. */
> -static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
> +static void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
diff mbox series

Patch

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 390d697efde1..d9b9a0f0db17 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -237,8 +237,8 @@  static bool pmc_resume_counter(struct kvm_pmc *pmc)
 			      get_sample_period(pmc, pmc->counter)))
 		return false;
 
-	if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) &&
-	    pmc->perf_event->attr.precise_ip)
+	if (test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) !=
+	    (!!pmc->perf_event->attr.precise_ip))
 		return false;
 
 	/* reuse perf_event to serve as pmc_reprogram_counter() does*/
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index d595ff33d32d..6242b0b81116 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -68,15 +68,11 @@  static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
 	}
 }
 
-/* function is called when global control register has been updated. */
-static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
+static void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
 {
 	int bit;
-	u64 diff = pmu->global_ctrl ^ data;
 	struct kvm_pmc *pmc;
 
-	pmu->global_ctrl = data;
-
 	for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) {
 		pmc = intel_pmc_idx_to_pmc(pmu, bit);
 		if (pmc)
@@ -397,7 +393,7 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	struct kvm_pmc *pmc;
 	u32 msr = msr_info->index;
 	u64 data = msr_info->data;
-	u64 reserved_bits;
+	u64 reserved_bits, diff;
 
 	switch (msr) {
 	case MSR_CORE_PERF_FIXED_CTR_CTRL:
@@ -418,7 +414,9 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (pmu->global_ctrl == data)
 			return 0;
 		if (kvm_valid_perf_global_ctrl(pmu, data)) {
-			global_ctrl_changed(pmu, data);
+			diff = pmu->global_ctrl ^ data;
+			pmu->global_ctrl = data;
+			reprogram_counters(pmu, diff);
 			return 0;
 		}
 		break;
@@ -433,7 +431,9 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (pmu->pebs_enable == data)
 			return 0;
 		if (!(data & pmu->pebs_enable_mask)) {
+			diff = pmu->pebs_enable ^ data;
 			pmu->pebs_enable = data;
+			reprogram_counters(pmu, diff);
 			return 0;
 		}
 		break;