Message ID | 20210303135756.1546253-7-like.xu@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86/pmu: Guest Architectural LBR Enabling | expand |
On Wed, Mar 03, 2021, Like Xu wrote: > diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c > index 25d620685ae7..d14a14eb712d 100644 > --- a/arch/x86/kvm/vmx/pmu_intel.c > +++ b/arch/x86/kvm/vmx/pmu_intel.c > @@ -19,6 +19,7 @@ > #include "pmu.h" > > #define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0) > +#define KVM_ARCH_LBR_CTL_MASK 0x7f000f It would nice to build this up with the individual bits instead of tossing in a magic number. > static struct kvm_event_hw_type_mapping intel_arch_events[] = { > /* Index must match CPUID 0x0A.EBX bit vector */ > @@ -221,6 +222,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) > ret = pmu->version > 1; > break; > case MSR_ARCH_LBR_DEPTH: > + case MSR_ARCH_LBR_CTL: > ret = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR); > break; > default: > @@ -390,6 +392,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > case MSR_ARCH_LBR_DEPTH: > msr_info->data = lbr_desc->records.nr; > return 0; > + case MSR_ARCH_LBR_CTL: > + msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL); > + return 0; > default: > if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || > (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { > @@ -457,6 +462,15 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > lbr_desc->records.nr = data; > lbr_desc->arch_lbr_reset = true; > return 0; > + case MSR_ARCH_LBR_CTL: > + if (!(data & ~KVM_ARCH_LBR_CTL_MASK)) { Maybe invert this to reduce indentation? if (data & ...) break; (or "return 1;") > + vmcs_write64(GUEST_IA32_LBR_CTL, data); > + if (intel_pmu_lbr_is_enabled(vcpu) && !lbr_desc->event && > + (data & ARCH_LBR_CTL_LBREN)) Alignment. > + intel_pmu_create_guest_lbr_event(vcpu); > + return 0; > + } > + break; > default: > if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || > (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { > @@ -635,12 +649,15 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) > */ > static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu) > { > - u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL); > + u32 lbr_ctl_field = GUEST_IA32_DEBUGCTL; > > - if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) { > - data &= ~DEBUGCTLMSR_LBR; > - vmcs_write64(GUEST_IA32_DEBUGCTL, data); > - } > + if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)) > + return; > + > + if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR)) > + lbr_ctl_field = GUEST_IA32_LBR_CTL; > + > + vmcs_write64(lbr_ctl_field, vmcs_read64(lbr_ctl_field) & ~BIT(0)); Use ARCH_LBR_CTL_LBREN? > } > > static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu) > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 6d7e760fdfa0..a0660b9934c6 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -2036,6 +2036,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > VM_EXIT_SAVE_DEBUG_CONTROLS) > get_vmcs12(vcpu)->guest_ia32_debugctl = data; > > + /* > + * For Arch LBR, IA32_DEBUGCTL[bit 0] has no meaning. > + * It can be written to 0 or 1, but reads will always return 0. > + */ > + if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR)) > + data &= ~DEBUGCTLMSR_LBR; > + > vmcs_write64(GUEST_IA32_DEBUGCTL, data); > if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event && > (data & DEBUGCTLMSR_LBR)) > @@ -4463,6 +4470,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > vmcs_writel(GUEST_SYSENTER_ESP, 0); > vmcs_writel(GUEST_SYSENTER_EIP, 0); > vmcs_write64(GUEST_IA32_DEBUGCTL, 0); > + if (cpu_has_vmx_arch_lbr()) > + vmcs_write64(GUEST_IA32_LBR_CTL, 0); Not that any guest is likely to care, but is the MSR cleared on INIT? The SDM has specific language for warm reset, but I can't find anything for INIT. On a warm reset, all LBR MSRs, including IA32_LBR_DEPTH, have their values preserved. However, IA32_LBR_CTL.LBREn is cleared to 0, disabling LBRs. If a warm reset is triggered while the processor is in C6, also known as warm init, all LBR MSRs will be reset to their initial values. > } > > kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); > -- > 2.29.2 >
On 2021/3/4 1:19, Sean Christopherson wrote: > On Wed, Mar 03, 2021, Like Xu wrote: >> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c >> index 25d620685ae7..d14a14eb712d 100644 >> --- a/arch/x86/kvm/vmx/pmu_intel.c >> +++ b/arch/x86/kvm/vmx/pmu_intel.c >> @@ -19,6 +19,7 @@ >> #include "pmu.h" >> >> #define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0) >> +#define KVM_ARCH_LBR_CTL_MASK 0x7f000f > It would nice to build this up with the individual bits instead of tossing in a > magic number. I will move the mask ARCH_LBR_CTL_MASK from lbr.c to msr-index.h and thus, #define KVM_ARCH_LBR_CTL_MASK (ARCH_LBR_CTL_MASK | ARCH_LBR_CTL_LBREN) I assume the move operation would be a separate patch for host side. > >> static struct kvm_event_hw_type_mapping intel_arch_events[] = { >> /* Index must match CPUID 0x0A.EBX bit vector */ >> @@ -221,6 +222,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) >> ret = pmu->version > 1; >> break; >> case MSR_ARCH_LBR_DEPTH: >> + case MSR_ARCH_LBR_CTL: >> ret = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR); >> break; >> default: >> @@ -390,6 +392,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) >> case MSR_ARCH_LBR_DEPTH: >> msr_info->data = lbr_desc->records.nr; >> return 0; >> + case MSR_ARCH_LBR_CTL: >> + msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL); >> + return 0; >> default: >> if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || >> (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { >> @@ -457,6 +462,15 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) >> lbr_desc->records.nr = data; >> lbr_desc->arch_lbr_reset = true; >> return 0; >> + case MSR_ARCH_LBR_CTL: >> + if (!(data & ~KVM_ARCH_LBR_CTL_MASK)) { > Maybe invert this to reduce indentation? > > if (data & ...) > break; (or "return 1;") Fine to me. > > >> + vmcs_write64(GUEST_IA32_LBR_CTL, data); >> + if (intel_pmu_lbr_is_enabled(vcpu) && !lbr_desc->event && >> + (data & ARCH_LBR_CTL_LBREN)) > Alignment. I'll fix it. > >> + intel_pmu_create_guest_lbr_event(vcpu); >> + return 0; >> + } >> + break; >> default: >> if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || >> (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { >> @@ -635,12 +649,15 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) >> */ >> static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu) >> { >> - u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL); >> + u32 lbr_ctl_field = GUEST_IA32_DEBUGCTL; >> >> - if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) { >> - data &= ~DEBUGCTLMSR_LBR; >> - vmcs_write64(GUEST_IA32_DEBUGCTL, data); >> - } >> + if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)) >> + return; >> + >> + if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR)) >> + lbr_ctl_field = GUEST_IA32_LBR_CTL; >> + >> + vmcs_write64(lbr_ctl_field, vmcs_read64(lbr_ctl_field) & ~BIT(0)); > Use ARCH_LBR_CTL_LBREN? I'm trying to unify the usage of EN bit for both Arch LBR and legacy LBR: #define ARCH_LBR_CTL_LBREN BIT(0) #define DEBUGCTLMSR_LBR (1UL << 0) Any suggestion ? > >> } >> >> static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu) >> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c >> index 6d7e760fdfa0..a0660b9934c6 100644 >> --- a/arch/x86/kvm/vmx/vmx.c >> +++ b/arch/x86/kvm/vmx/vmx.c >> @@ -2036,6 +2036,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) >> VM_EXIT_SAVE_DEBUG_CONTROLS) >> get_vmcs12(vcpu)->guest_ia32_debugctl = data; >> >> + /* >> + * For Arch LBR, IA32_DEBUGCTL[bit 0] has no meaning. >> + * It can be written to 0 or 1, but reads will always return 0. >> + */ >> + if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR)) >> + data &= ~DEBUGCTLMSR_LBR; >> + >> vmcs_write64(GUEST_IA32_DEBUGCTL, data); >> if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event && >> (data & DEBUGCTLMSR_LBR)) >> @@ -4463,6 +4470,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) >> vmcs_writel(GUEST_SYSENTER_ESP, 0); >> vmcs_writel(GUEST_SYSENTER_EIP, 0); >> vmcs_write64(GUEST_IA32_DEBUGCTL, 0); >> + if (cpu_has_vmx_arch_lbr()) >> + vmcs_write64(GUEST_IA32_LBR_CTL, 0); > Not that any guest is likely to care, but is the MSR cleared on INIT? The SDM > has specific language for warm reset, but I can't find anything for INIT. > > On a warm reset, all LBR MSRs, including IA32_LBR_DEPTH, have their values > preserved. However, IA32_LBR_CTL.LBREn is cleared to 0, disabling LBRs. If a > warm reset is triggered while the processor is in C6, also known as warm init, > all LBR MSRs will be reset to their initial values. I was told that the reset behavior of GUEST_IA32_LBR_CTL would be the same as the GUEST_IA32_DEBUGCTL (true for INIT as well). It looks we have not strictly distinguished the guest's power concept C*. Do we have two trap paths for "warm reset" and "warm init" ? > >> } >> >> kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); >> -- >> 2.29.2 >>
On Thu, Mar 04, 2021, Xu, Like wrote: > On 2021/3/4 1:19, Sean Christopherson wrote: > > > @@ -4463,6 +4470,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > > > vmcs_writel(GUEST_SYSENTER_ESP, 0); > > > vmcs_writel(GUEST_SYSENTER_EIP, 0); > > > vmcs_write64(GUEST_IA32_DEBUGCTL, 0); > > > + if (cpu_has_vmx_arch_lbr()) > > > + vmcs_write64(GUEST_IA32_LBR_CTL, 0); > > Not that any guest is likely to care, but is the MSR cleared on INIT? The SDM > > has specific language for warm reset, but I can't find anything for INIT. > > > > On a warm reset, all LBR MSRs, including IA32_LBR_DEPTH, have their values > > preserved. However, IA32_LBR_CTL.LBREn is cleared to 0, disabling LBRs. If a > > warm reset is triggered while the processor is in C6, also known as warm init, > > all LBR MSRs will be reset to their initial values. > > I was told that the reset behavior of GUEST_IA32_LBR_CTL > would be the same as the GUEST_IA32_DEBUGCTL (true for INIT as well). Yes, and DEBUGCTL is preserved on INIT. if (!init_event) { vmcs_write32(GUEST_SYSENTER_CS, 0); vmcs_writel(GUEST_SYSENTER_ESP, 0); vmcs_writel(GUEST_SYSENTER_EIP, 0); vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } Table 22-10 in the SDM: All Other MSRs | Pwr up or Reset: | INIT: Undefined Unchanged If IA32_LBR_DEPTH is weirdly exempt, it needs to be documented. I doubt that's the case though. > It looks we have not strictly distinguished the guest's power concept C*. > Do we have two trap paths for "warm reset" and "warm init" ? No. Despite the name .vcpu_reset, KVM doesn't even have a RESET path, userspace is responsible for modelling RESET.
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 358707f60d99..8ec7bc24b37a 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -245,6 +245,8 @@ enum vmcs_field { GUEST_BNDCFGS_HIGH = 0x00002813, GUEST_IA32_RTIT_CTL = 0x00002814, GUEST_IA32_RTIT_CTL_HIGH = 0x00002815, + GUEST_IA32_LBR_CTL = 0x00002816, + GUEST_IA32_LBR_CTL_HIGH = 0x00002817, HOST_IA32_PAT = 0x00002c00, HOST_IA32_PAT_HIGH = 0x00002c01, HOST_IA32_EFER = 0x00002c02, diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 25d620685ae7..d14a14eb712d 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -19,6 +19,7 @@ #include "pmu.h" #define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0) +#define KVM_ARCH_LBR_CTL_MASK 0x7f000f static struct kvm_event_hw_type_mapping intel_arch_events[] = { /* Index must match CPUID 0x0A.EBX bit vector */ @@ -221,6 +222,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) ret = pmu->version > 1; break; case MSR_ARCH_LBR_DEPTH: + case MSR_ARCH_LBR_CTL: ret = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR); break; default: @@ -390,6 +392,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_ARCH_LBR_DEPTH: msr_info->data = lbr_desc->records.nr; return 0; + case MSR_ARCH_LBR_CTL: + msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL); + return 0; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { @@ -457,6 +462,15 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) lbr_desc->records.nr = data; lbr_desc->arch_lbr_reset = true; return 0; + case MSR_ARCH_LBR_CTL: + if (!(data & ~KVM_ARCH_LBR_CTL_MASK)) { + vmcs_write64(GUEST_IA32_LBR_CTL, data); + if (intel_pmu_lbr_is_enabled(vcpu) && !lbr_desc->event && + (data & ARCH_LBR_CTL_LBREN)) + intel_pmu_create_guest_lbr_event(vcpu); + return 0; + } + break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { @@ -635,12 +649,15 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) */ static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu) { - u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL); + u32 lbr_ctl_field = GUEST_IA32_DEBUGCTL; - if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) { - data &= ~DEBUGCTLMSR_LBR; - vmcs_write64(GUEST_IA32_DEBUGCTL, data); - } + if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)) + return; + + if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR)) + lbr_ctl_field = GUEST_IA32_LBR_CTL; + + vmcs_write64(lbr_ctl_field, vmcs_read64(lbr_ctl_field) & ~BIT(0)); } static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6d7e760fdfa0..a0660b9934c6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2036,6 +2036,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) VM_EXIT_SAVE_DEBUG_CONTROLS) get_vmcs12(vcpu)->guest_ia32_debugctl = data; + /* + * For Arch LBR, IA32_DEBUGCTL[bit 0] has no meaning. + * It can be written to 0 or 1, but reads will always return 0. + */ + if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR)) + data &= ~DEBUGCTLMSR_LBR; + vmcs_write64(GUEST_IA32_DEBUGCTL, data); if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event && (data & DEBUGCTLMSR_LBR)) @@ -4463,6 +4470,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_writel(GUEST_SYSENTER_ESP, 0); vmcs_writel(GUEST_SYSENTER_EIP, 0); vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + if (cpu_has_vmx_arch_lbr()) + vmcs_write64(GUEST_IA32_LBR_CTL, 0); } kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
Arch LBRs are enabled by setting MSR_ARCH_LBR_CTL.LBREn to 1. A new guest state field named "Guest IA32_LBR_CTL" is added to enhance guest LBR usage. When guest Arch LBR is enabled, a guest LBR event will be created like the model-specific LBR does. On processors that support Arch LBR, MSR_IA32_DEBUGCTLMSR[bit 0] has no meaning. It can be written to 0 or 1, but reads will always return 0. On the vmx_vcpu_reset(), the IA32_LBR_CTL will be cleared to 0. Signed-off-by: Like Xu <like.xu@linux.intel.com> --- arch/x86/include/asm/vmx.h | 2 ++ arch/x86/kvm/vmx/pmu_intel.c | 27 ++++++++++++++++++++++----- arch/x86/kvm/vmx/vmx.c | 9 +++++++++ 3 files changed, 33 insertions(+), 5 deletions(-)