diff mbox series

[v5,04/13] KVM: vmx/pmu: Emulate MSR_ARCH_LBR_DEPTH for guest Arch LBR

Message ID 1625825111-6604-5-git-send-email-weijiang.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce Architectural LBR for vPMU | expand

Commit Message

Yang Weijiang July 9, 2021, 10:05 a.m. UTC
From: Like Xu <like.xu@linux.intel.com>

The number of Arch LBR entries available is determined by the value
in host MSR_ARCH_LBR_DEPTH.DEPTH. The supported LBR depth values are
enumerated in CPUID.(EAX=01CH, ECX=0):EAX[7:0]. For each bit "n" set
in this field, the MSR_ARCH_LBR_DEPTH.DEPTH value of "8*(n+1)" is
supported.

On a guest write to MSR_ARCH_LBR_DEPTH, all LBR entries are reset to 0.
KVM emulates the reset behavior by introducing lbr_desc->arch_lbr_reset.
KVM writes guest requested value to the native ARCH_LBR_DEPTH MSR
(this is safe because the two values will be the same) when the Arch LBR
records MSRs are pass-through to the guest.

Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/x86/kvm/vmx/pmu_intel.c | 46 +++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/vmx/vmx.h       |  3 +++
 2 files changed, 48 insertions(+), 1 deletion(-)

Comments

Jim Mattson July 9, 2021, 8:35 p.m. UTC | #1
On Fri, Jul 9, 2021 at 2:51 AM Yang Weijiang <weijiang.yang@intel.com> wrote:
>
> From: Like Xu <like.xu@linux.intel.com>
>
> The number of Arch LBR entries available is determined by the value
> in host MSR_ARCH_LBR_DEPTH.DEPTH. The supported LBR depth values are
> enumerated in CPUID.(EAX=01CH, ECX=0):EAX[7:0]. For each bit "n" set
> in this field, the MSR_ARCH_LBR_DEPTH.DEPTH value of "8*(n+1)" is
> supported.
>
> On a guest write to MSR_ARCH_LBR_DEPTH, all LBR entries are reset to 0.
> KVM emulates the reset behavior by introducing lbr_desc->arch_lbr_reset.
> KVM writes guest requested value to the native ARCH_LBR_DEPTH MSR
> (this is safe because the two values will be the same) when the Arch LBR
> records MSRs are pass-through to the guest.
>
> Signed-off-by: Like Xu <like.xu@linux.intel.com>
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> ---

> @@ -393,6 +417,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  {
>         struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>         struct kvm_pmc *pmc;
> +       struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
>         u32 msr = msr_info->index;
>         u64 data = msr_info->data;
>
> @@ -427,6 +452,12 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                         return 0;
>                 }
>                 break;
> +       case MSR_ARCH_LBR_DEPTH:
> +               if (!arch_lbr_depth_is_valid(vcpu, data))
> +                       return 1;

Does this imply that, when restoring a vCPU, KVM_SET_CPUID2 must be
called before KVM_SET_MSRS, so that arch_lbr_depth_is_valid() knows
what to do? Is this documented anywhere?

> +               lbr_desc->records.nr = data;
> +               lbr_desc->arch_lbr_reset = true;

Doesn't this make it impossible to restore vCPU state, since the LBRs
will be reset on the next VM-entry? At the very least, you probably
shouldn't set arch_lbr_reset when the MSR write is host-initiated.

However, there is another problem: arch_lbr_reset isn't serialized
anywhere. If you fix the host-initiated issue, then you still have a
problem if the last guest instruction prior to suspending the vCPU was
a write to IA32_LBR_DEPTH. If there is no subsequent VM-entry prior to
saving the vCPU state, then the LBRs will be saved/restored as part of
the guest XSAVE state, and they will not get cleared on resuming the
vCPU.

> +               return 0;
>         default:
>                 if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>                     (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> @@ -566,6 +597,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
>         lbr_desc->records.nr = 0;
>         lbr_desc->event = NULL;
>         lbr_desc->msr_passthrough = false;
> +       lbr_desc->arch_lbr_reset = false;

I'm not sure this is entirely correct. If the last guest instruction
prior to a warm reset was a write to IA32_LBR_DEPTH, then the LBRs
should be cleared (and arch_lbr_reset will be true). However, if you
clear that flag here, the LBRs will never get cleared.

>  }
>
Yang Weijiang July 12, 2021, 9:17 a.m. UTC | #2
On Fri, Jul 09, 2021 at 01:35:34PM -0700, Jim Mattson wrote:
> On Fri, Jul 9, 2021 at 2:51 AM Yang Weijiang <weijiang.yang@intel.com> wrote:
> >
> > From: Like Xu <like.xu@linux.intel.com>
> >
> > The number of Arch LBR entries available is determined by the value
> > in host MSR_ARCH_LBR_DEPTH.DEPTH. The supported LBR depth values are
> > enumerated in CPUID.(EAX=01CH, ECX=0):EAX[7:0]. For each bit "n" set
> > in this field, the MSR_ARCH_LBR_DEPTH.DEPTH value of "8*(n+1)" is
> > supported.
> >
> > On a guest write to MSR_ARCH_LBR_DEPTH, all LBR entries are reset to 0.
> > KVM emulates the reset behavior by introducing lbr_desc->arch_lbr_reset.
> > KVM writes guest requested value to the native ARCH_LBR_DEPTH MSR
> > (this is safe because the two values will be the same) when the Arch LBR
> > records MSRs are pass-through to the guest.
> >
> > Signed-off-by: Like Xu <like.xu@linux.intel.com>
> > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> > ---
> 
> > @@ -393,6 +417,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  {
> >         struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> >         struct kvm_pmc *pmc;
> > +       struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
> >         u32 msr = msr_info->index;
> >         u64 data = msr_info->data;
> >
> > @@ -427,6 +452,12 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >                         return 0;
> >                 }
> >                 break;
> > +       case MSR_ARCH_LBR_DEPTH:
> > +               if (!arch_lbr_depth_is_valid(vcpu, data))
> > +                       return 1;
> 
> Does this imply that, when restoring a vCPU, KVM_SET_CPUID2 must be
> called before KVM_SET_MSRS, so that arch_lbr_depth_is_valid() knows
> what to do? Is this documented anywhere?
There shoudn't be such kind of assumption :-D, I'll check and modify it.
Thanks for pointing it out!

> 
> > +               lbr_desc->records.nr = data;
> > +               lbr_desc->arch_lbr_reset = true;
> 
> Doesn't this make it impossible to restore vCPU state, since the LBRs
> will be reset on the next VM-entry? At the very least, you probably
> shouldn't set arch_lbr_reset when the MSR write is host-initiated.
Host/Guest operation should be identified, will change it.

> 
> However, there is another problem: arch_lbr_reset isn't serialized
> anywhere. If you fix the host-initiated issue, then you still have a
> problem if the last guest instruction prior to suspending the vCPU was
> a write to IA32_LBR_DEPTH. If there is no subsequent VM-entry prior to
> saving the vCPU state, then the LBRs will be saved/restored as part of
> the guest XSAVE state, and they will not get cleared on resuming the
> vCPU.
Yes, it's a problem, I'll replace the code with a on-spot MSR write to
reset it.

> 
> > +               return 0;
> >         default:
> >                 if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
> >                     (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> > @@ -566,6 +597,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
> >         lbr_desc->records.nr = 0;
> >         lbr_desc->event = NULL;
> >         lbr_desc->msr_passthrough = false;
> > +       lbr_desc->arch_lbr_reset = false;
> 
> I'm not sure this is entirely correct. If the last guest instruction
> prior to a warm reset was a write to IA32_LBR_DEPTH, then the LBRs
> should be cleared (and arch_lbr_reset will be true). However, if you
> clear that flag here, the LBRs will never get cleared.
I hope the on-spot reset can avoid above issue too.
Thanks!

> 
> >  }
> >
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 9efc1a6b8693..da68f0e74702 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -211,7 +211,7 @@  static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index)
 static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-	int ret;
+	int ret = 0;
 
 	switch (msr) {
 	case MSR_CORE_PERF_FIXED_CTR_CTRL:
@@ -220,6 +220,10 @@  static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 		ret = pmu->version > 1;
 		break;
+	case MSR_ARCH_LBR_DEPTH:
+		if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR))
+			ret = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR);
+		break;
 	default:
 		ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
 			get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
@@ -250,6 +254,7 @@  static inline void intel_pmu_release_guest_lbr_event(struct kvm_vcpu *vcpu)
 	if (lbr_desc->event) {
 		perf_event_release_kernel(lbr_desc->event);
 		lbr_desc->event = NULL;
+		lbr_desc->arch_lbr_reset = false;
 		vcpu_to_pmu(vcpu)->event_count--;
 	}
 }
@@ -348,10 +353,26 @@  static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+/*
+ * Check if the requested depth values is supported
+ * based on the bits [0:7] of the guest cpuid.1c.eax.
+ */
+static bool arch_lbr_depth_is_valid(struct kvm_vcpu *vcpu, u64 depth)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x1c, 0);
+	if (best && depth && (depth < 65) && !(depth & 7))
+		return best->eax & BIT_ULL(depth / 8 - 1);
+
+	return false;
+}
+
 static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc;
+	struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 	u32 msr = msr_info->index;
 
 	switch (msr) {
@@ -367,6 +388,9 @@  static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 		msr_info->data = pmu->global_ovf_ctrl;
 		return 0;
+	case MSR_ARCH_LBR_DEPTH:
+		msr_info->data = lbr_desc->records.nr;
+		return 0;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -393,6 +417,7 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc;
+	struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 	u32 msr = msr_info->index;
 	u64 data = msr_info->data;
 
@@ -427,6 +452,12 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 0;
 		}
 		break;
+	case MSR_ARCH_LBR_DEPTH:
+		if (!arch_lbr_depth_is_valid(vcpu, data))
+			return 1;
+		lbr_desc->records.nr = data;
+		lbr_desc->arch_lbr_reset = true;
+		return 0;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -566,6 +597,7 @@  static void intel_pmu_init(struct kvm_vcpu *vcpu)
 	lbr_desc->records.nr = 0;
 	lbr_desc->event = NULL;
 	lbr_desc->msr_passthrough = false;
+	lbr_desc->arch_lbr_reset = false;
 }
 
 static void intel_pmu_reset(struct kvm_vcpu *vcpu)
@@ -623,6 +655,15 @@  static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
 		intel_pmu_legacy_freezing_lbrs_on_pmi(vcpu);
 }
 
+static void intel_pmu_arch_lbr_reset(struct kvm_vcpu *vcpu)
+{
+	struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
+
+	/* Software write to IA32_LBR_DEPTH will reset all LBR entries. */
+	wrmsrl(MSR_ARCH_LBR_DEPTH, lbr_desc->records.nr);
+	lbr_desc->arch_lbr_reset = false;
+}
+
 static void vmx_update_intercept_for_lbr_msrs(struct kvm_vcpu *vcpu, bool set)
 {
 	struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);
@@ -654,6 +695,9 @@  static inline void vmx_enable_lbr_msrs_passthrough(struct kvm_vcpu *vcpu)
 {
 	struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 
+	if (unlikely(lbr_desc->arch_lbr_reset))
+		intel_pmu_arch_lbr_reset(vcpu);
+
 	if (lbr_desc->msr_passthrough)
 		return;
 
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 16e4e457ba23..cc362e2d3eaa 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -116,6 +116,9 @@  struct lbr_desc {
 
 	/* True if LBRs are marked as not intercepted in the MSR bitmap */
 	bool msr_passthrough;
+
+	/* Reset all LBR entries on a guest write to MSR_ARCH_LBR_DEPTH */
+	bool arch_lbr_reset;
 };
 
 /*