Message ID | 20250324173121.1275209-22-mizhang@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Mediated vPMU 4.0 for x86 | expand |
On 3/24/2025 10:31 AM, Mingwei Zhang wrote: > From: Dapeng Mi <dapeng1.mi@linux.intel.com> > > Intel processor (vmx) provides capability to save/load guest > IA32_PERF_GLOBAL_CTRL at vm-exit/vm-entry by setting > VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL bit in VM-exit-ctrl or > VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL bit in VM-entry-ctrl. > > Mediated vPMU leverages both capabilities to save/load guest > IA32_PERF_GLOBAL_CTRL automatically at vm-exit/vm-entry. Note that the > former was introduced in SapphireRapids and later Intel CPUs. > > If VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL is unavailable, mediated PMU will be > disabled. Note that mediated PMU can be enabled by falling back to atomic > msr save/retore list. However, that would cause extra overhead per > VM-enter/exit. > > Since these VMX capability bits perform automatic saving/restoring of the > PMU global ctrl between VMCS and the HW MSR. No synchronization was > performed betwen HW MSR and pmu->global_ctrli, the KVM cached value . > Therefore, whenever KVM needs to use this variable, it will need to > explicitly read the value from MSR to pmu->global_ctrl. This is especially > so when guest doesn't own all PMU counters, i.e., when > IA32_PERF_GLOBAL_CTRL is interceped by mediated PMU. > > Suggested-by: Sean Christopherson <seanjc@google.com> > Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> > Co-developed-by: Mingwei Zhang <mizhang@google.com> > Signed-off-by: Mingwei Zhang <mizhang@google.com> > --- > arch/x86/include/asm/kvm_host.h | 4 ++++ > arch/x86/include/asm/vmx.h | 1 + > arch/x86/kvm/pmu.c | 30 ++++++++++++++++++++++++- > arch/x86/kvm/vmx/capabilities.h | 5 +++++ > arch/x86/kvm/vmx/nested.c | 3 ++- > arch/x86/kvm/vmx/pmu_intel.c | 39 ++++++++++++++++++++++++++++++++- > arch/x86/kvm/vmx/vmx.c | 22 ++++++++++++++++++- > arch/x86/kvm/vmx/vmx.h | 3 ++- > 8 files changed, 102 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 0b7af5902ff7..4b3bfefc2d05 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -553,6 +553,10 @@ struct kvm_pmu { > unsigned available_event_types; > u64 fixed_ctr_ctrl; > u64 fixed_ctr_ctrl_rsvd; > + /* > + * kvm_pmu_sync_global_ctrl_from_vmcs() must be called to update > + * this SW-maintained global_ctrl for mediated vPMU before accessing it. > + */ > u64 global_ctrl; > u64 global_status; > u64 counter_bitmask[2]; > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index f7fd4369b821..48e137560f17 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -106,6 +106,7 @@ > #define VM_EXIT_CLEAR_BNDCFGS 0x00800000 > #define VM_EXIT_PT_CONCEAL_PIP 0x01000000 > #define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 > +#define VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL 0x40000000 > > #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff > > diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c > index 6ad71752be4b..4e8cefcce7ab 100644 > --- a/arch/x86/kvm/pmu.c > +++ b/arch/x86/kvm/pmu.c > @@ -646,6 +646,30 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) > } > } > > +static void kvm_pmu_sync_global_ctrl_from_vmcs(struct kvm_vcpu *vcpu) > +{ > + struct msr_data msr_info = { .index = MSR_CORE_PERF_GLOBAL_CTRL }; > + > + if (!kvm_mediated_pmu_enabled(vcpu)) > + return; > + > + /* Sync pmu->global_ctrl from GUEST_IA32_PERF_GLOBAL_CTRL. */ > + kvm_pmu_call(get_msr)(vcpu, &msr_info); > +} > + > +static void kvm_pmu_sync_global_ctrl_to_vmcs(struct kvm_vcpu *vcpu, u64 global_ctrl) > +{ > + struct msr_data msr_info = { > + .index = MSR_CORE_PERF_GLOBAL_CTRL, > + .data = global_ctrl }; > + > + if (!kvm_mediated_pmu_enabled(vcpu)) > + return; > + > + /* Sync pmu->global_ctrl to GUEST_IA32_PERF_GLOBAL_CTRL. */ > + kvm_pmu_call(set_msr)(vcpu, &msr_info); > +} > + > bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) > { > switch (msr) { > @@ -680,7 +704,6 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > msr_info->data = pmu->global_status; > break; > case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: > - case MSR_CORE_PERF_GLOBAL_CTRL: > msr_info->data = pmu->global_ctrl; > break; > case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: > @@ -731,6 +754,9 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) pmu->global_ctrl doesn't always have the up-to-date guest value, need to sync from vmcs/vmbc before comparing it against 'data'. + kvm_pmu_sync_global_ctrl_from_vmcs(vcpu); if (pmu->global_ctrl != data) { > diff = pmu->global_ctrl ^ data; > pmu->global_ctrl = data; > reprogram_counters(pmu, diff); > + > + /* Propagate guest global_ctrl to GUEST_IA32_PERF_GLOBAL_CTRL. */ > + kvm_pmu_sync_global_ctrl_to_vmcs(vcpu, data); > } > break; > case MSR_CORE_PERF_GLOBAL_OVF_CTRL: > @@ -907,6 +933,8 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel) > > BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX); > > + kvm_pmu_sync_global_ctrl_from_vmcs(vcpu); > + > if (!kvm_pmu_has_perf_global_ctrl(pmu)) > bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX); > else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx, > diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h > index 013536fde10b..cc63bd4ab87c 100644 > --- a/arch/x86/kvm/vmx/capabilities.h > +++ b/arch/x86/kvm/vmx/capabilities.h > @@ -101,6 +101,11 @@ static inline bool cpu_has_load_perf_global_ctrl(void) > return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; > } > > +static inline bool cpu_has_save_perf_global_ctrl(void) > +{ > + return vmcs_config.vmexit_ctrl & VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL; > +} > + > static inline bool cpu_has_vmx_mpx(void) > { > return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS; > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > index 8a7af02d466e..ecf72394684d 100644 > --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -7004,7 +7004,8 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, > VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | > VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT | > - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; > + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | > + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL; > > /* We support free control of debug control saving. */ > msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; > diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c > index 2a5f79206b02..04a893e56135 100644 > --- a/arch/x86/kvm/vmx/pmu_intel.c > +++ b/arch/x86/kvm/vmx/pmu_intel.c > @@ -294,6 +294,11 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > u32 msr = msr_info->index; > > switch (msr) { > + case MSR_CORE_PERF_GLOBAL_CTRL: > + if (kvm_mediated_pmu_enabled(vcpu)) > + pmu->global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL); > + msr_info->data = pmu->global_ctrl; > + break; > case MSR_CORE_PERF_FIXED_CTR_CTRL: > msr_info->data = pmu->fixed_ctr_ctrl; > break; > @@ -339,6 +344,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > u64 reserved_bits, diff; > > switch (msr) { > + case MSR_CORE_PERF_GLOBAL_CTRL: > + if (kvm_mediated_pmu_enabled(vcpu)) > + vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, > + pmu->global_ctrl); > + break; > case MSR_CORE_PERF_FIXED_CTR_CTRL: > if (data & pmu->fixed_ctr_ctrl_rsvd) > return 1; > @@ -558,10 +568,37 @@ static void __intel_pmu_refresh(struct kvm_vcpu *vcpu) > > static void intel_pmu_refresh(struct kvm_vcpu *vcpu) > { > + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + bool mediated; > + > __intel_pmu_refresh(vcpu); > > - exec_controls_changebit(to_vmx(vcpu), CPU_BASED_RDPMC_EXITING, > + exec_controls_changebit(vmx, CPU_BASED_RDPMC_EXITING, > !kvm_rdpmc_in_guest(vcpu)); > + > + mediated = kvm_mediated_pmu_enabled(vcpu); > + if (cpu_has_load_perf_global_ctrl()) { > + vm_entry_controls_changebit(vmx, > + VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, mediated); > + /* > + * Initialize guest PERF_GLOBAL_CTRL to reset value as SDM rules. > + * > + * Note: GUEST_IA32_PERF_GLOBAL_CTRL must be initialized to > + * "BIT_ULL(pmu->nr_arch_gp_counters) - 1" instead of pmu->global_ctrl > + * since pmu->global_ctrl is only be initialized when guest > + * pmu->version > 1. Otherwise if pmu->version is 1, pmu->global_ctrl > + * is 0 and guest counters are never really enabled. > + */ > + if (mediated) > + vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, > + BIT_ULL(pmu->nr_arch_gp_counters) - 1); > + } > + > + if (cpu_has_save_perf_global_ctrl()) > + vm_exit_controls_changebit(vmx, > + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | > + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL, mediated); > } > > static void intel_pmu_init(struct kvm_vcpu *vcpu) > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index ff66f17d6358..38ecf3c116bd 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -4390,6 +4390,13 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) > > if (cpu_has_load_ia32_efer()) > vmcs_write64(HOST_IA32_EFER, kvm_host.efer); > + > + /* > + * Initialize host PERF_GLOBAL_CTRL to 0 to disable all counters > + * immediately once VM exits. Mediated vPMU then call perf_guest_exit() > + * to re-enable host perf events. > + */ > + vmcs_write64(HOST_IA32_PERF_GLOBAL_CTRL, 0); > } > > void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) > @@ -4457,7 +4464,8 @@ static u32 vmx_get_initial_vmexit_ctrl(void) > VM_EXIT_CLEAR_IA32_RTIT_CTL); > /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ > return vmexit_ctrl & > - ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); > + ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER | > + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL); > } > > void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) > @@ -7196,6 +7204,9 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) > struct perf_guest_switch_msr *msrs; > struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu); > > + if (kvm_mediated_pmu_enabled(&vmx->vcpu)) > + return; > + > pmu->host_cross_mapped_mask = 0; > if (pmu->pebs_enable & pmu->global_ctrl) > intel_pmu_cross_mapped_check(pmu); > @@ -8451,6 +8462,15 @@ __init int vmx_hardware_setup(void) > enable_sgx = false; > #endif > > + /* > + * All CPUs that support a mediated PMU are expected to support loading > + * and saving PERF_GLOBAL_CTRL via dedicated VMCS fields. > + */ > + if (enable_mediated_pmu && > + (WARN_ON_ONCE(!cpu_has_load_perf_global_ctrl() || > + !cpu_has_save_perf_global_ctrl()))) > + enable_mediated_pmu = false; > + > /* > * set_apic_access_page_addr() is used to reload apic access > * page upon invalidation. No need to do anything if not > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h > index 5c505af553c8..b282165f98a6 100644 > --- a/arch/x86/kvm/vmx/vmx.h > +++ b/arch/x86/kvm/vmx/vmx.h > @@ -510,7 +510,8 @@ static inline u8 vmx_get_rvi(void) > VM_EXIT_LOAD_IA32_EFER | \ > VM_EXIT_CLEAR_BNDCFGS | \ > VM_EXIT_PT_CONCEAL_PIP | \ > - VM_EXIT_CLEAR_IA32_RTIT_CTL) > + VM_EXIT_CLEAR_IA32_RTIT_CTL | \ > + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL) > > #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \ > (PIN_BASED_EXT_INTR_MASK | \
On Wed, Mar 26, 2025 at 9:51 AM Chen, Zide <zide.chen@intel.com> wrote: > > > > On 3/24/2025 10:31 AM, Mingwei Zhang wrote: > > From: Dapeng Mi <dapeng1.mi@linux.intel.com> > > > > Intel processor (vmx) provides capability to save/load guest > > IA32_PERF_GLOBAL_CTRL at vm-exit/vm-entry by setting > > VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL bit in VM-exit-ctrl or > > VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL bit in VM-entry-ctrl. > > > > Mediated vPMU leverages both capabilities to save/load guest > > IA32_PERF_GLOBAL_CTRL automatically at vm-exit/vm-entry. Note that the > > former was introduced in SapphireRapids and later Intel CPUs. > > > > If VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL is unavailable, mediated PMU will be > > disabled. Note that mediated PMU can be enabled by falling back to atomic > > msr save/retore list. However, that would cause extra overhead per > > VM-enter/exit. > > > > Since these VMX capability bits perform automatic saving/restoring of the > > PMU global ctrl between VMCS and the HW MSR. No synchronization was > > performed betwen HW MSR and pmu->global_ctrli, the KVM cached value . > > Therefore, whenever KVM needs to use this variable, it will need to > > explicitly read the value from MSR to pmu->global_ctrl. This is especially > > so when guest doesn't own all PMU counters, i.e., when > > IA32_PERF_GLOBAL_CTRL is interceped by mediated PMU. > > > > Suggested-by: Sean Christopherson <seanjc@google.com> > > Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> > > Co-developed-by: Mingwei Zhang <mizhang@google.com> > > Signed-off-by: Mingwei Zhang <mizhang@google.com> > > --- > > arch/x86/include/asm/kvm_host.h | 4 ++++ > > arch/x86/include/asm/vmx.h | 1 + > > arch/x86/kvm/pmu.c | 30 ++++++++++++++++++++++++- > > arch/x86/kvm/vmx/capabilities.h | 5 +++++ > > arch/x86/kvm/vmx/nested.c | 3 ++- > > arch/x86/kvm/vmx/pmu_intel.c | 39 ++++++++++++++++++++++++++++++++- > > arch/x86/kvm/vmx/vmx.c | 22 ++++++++++++++++++- > > arch/x86/kvm/vmx/vmx.h | 3 ++- > > 8 files changed, 102 insertions(+), 5 deletions(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index 0b7af5902ff7..4b3bfefc2d05 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -553,6 +553,10 @@ struct kvm_pmu { > > unsigned available_event_types; > > u64 fixed_ctr_ctrl; > > u64 fixed_ctr_ctrl_rsvd; > > + /* > > + * kvm_pmu_sync_global_ctrl_from_vmcs() must be called to update > > + * this SW-maintained global_ctrl for mediated vPMU before accessing it. > > + */ > > u64 global_ctrl; > > u64 global_status; > > u64 counter_bitmask[2]; > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > > index f7fd4369b821..48e137560f17 100644 > > --- a/arch/x86/include/asm/vmx.h > > +++ b/arch/x86/include/asm/vmx.h > > @@ -106,6 +106,7 @@ > > #define VM_EXIT_CLEAR_BNDCFGS 0x00800000 > > #define VM_EXIT_PT_CONCEAL_PIP 0x01000000 > > #define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 > > +#define VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL 0x40000000 > > > > #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff > > > > diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c > > index 6ad71752be4b..4e8cefcce7ab 100644 > > --- a/arch/x86/kvm/pmu.c > > +++ b/arch/x86/kvm/pmu.c > > @@ -646,6 +646,30 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) > > } > > } > > > > +static void kvm_pmu_sync_global_ctrl_from_vmcs(struct kvm_vcpu *vcpu) > > +{ > > + struct msr_data msr_info = { .index = MSR_CORE_PERF_GLOBAL_CTRL }; > > + > > + if (!kvm_mediated_pmu_enabled(vcpu)) > > + return; > > + > > + /* Sync pmu->global_ctrl from GUEST_IA32_PERF_GLOBAL_CTRL. */ > > + kvm_pmu_call(get_msr)(vcpu, &msr_info); > > +} > > + > > +static void kvm_pmu_sync_global_ctrl_to_vmcs(struct kvm_vcpu *vcpu, u64 global_ctrl) > > +{ > > + struct msr_data msr_info = { > > + .index = MSR_CORE_PERF_GLOBAL_CTRL, > > + .data = global_ctrl }; > > + > > + if (!kvm_mediated_pmu_enabled(vcpu)) > > + return; > > + > > + /* Sync pmu->global_ctrl to GUEST_IA32_PERF_GLOBAL_CTRL. */ > > + kvm_pmu_call(set_msr)(vcpu, &msr_info); > > +} > > + > > bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) > > { > > switch (msr) { > > @@ -680,7 +704,6 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > > msr_info->data = pmu->global_status; > > break; > > case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: > > - case MSR_CORE_PERF_GLOBAL_CTRL: > > msr_info->data = pmu->global_ctrl; > > break; > > case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: > > @@ -731,6 +754,9 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > > > pmu->global_ctrl doesn't always have the up-to-date guest value, need to > sync from vmcs/vmbc before comparing it against 'data'. > > + kvm_pmu_sync_global_ctrl_from_vmcs(vcpu); > if (pmu->global_ctrl != data) { Good catch. Thanks! This is why I really prefer just unconditionally syncing the global ctrl from VMCS to pmu->global_ctrl and vice versa. We might get into similar problems as well in the future. > > > diff = pmu->global_ctrl ^ data; > > pmu->global_ctrl = data; > > reprogram_counters(pmu, diff); > > + > > + /* Propagate guest global_ctrl to GUEST_IA32_PERF_GLOBAL_CTRL. */ > > + kvm_pmu_sync_global_ctrl_to_vmcs(vcpu, data); > > } > > break; > > case MSR_CORE_PERF_GLOBAL_OVF_CTRL: > > @@ -907,6 +933,8 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel) > > > > BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX); > > > > + kvm_pmu_sync_global_ctrl_from_vmcs(vcpu); > > + > > if (!kvm_pmu_has_perf_global_ctrl(pmu)) > > bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX); > > else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx, > > diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h > > index 013536fde10b..cc63bd4ab87c 100644 > > --- a/arch/x86/kvm/vmx/capabilities.h > > +++ b/arch/x86/kvm/vmx/capabilities.h > > @@ -101,6 +101,11 @@ static inline bool cpu_has_load_perf_global_ctrl(void) > > return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; > > } > > > > +static inline bool cpu_has_save_perf_global_ctrl(void) > > +{ > > + return vmcs_config.vmexit_ctrl & VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL; > > +} > > + > > static inline bool cpu_has_vmx_mpx(void) > > { > > return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS; > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > > index 8a7af02d466e..ecf72394684d 100644 > > --- a/arch/x86/kvm/vmx/nested.c > > +++ b/arch/x86/kvm/vmx/nested.c > > @@ -7004,7 +7004,8 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, > > VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > > VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | > > VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT | > > - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; > > + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | > > + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL; > > > > /* We support free control of debug control saving. */ > > msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; > > diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c > > index 2a5f79206b02..04a893e56135 100644 > > --- a/arch/x86/kvm/vmx/pmu_intel.c > > +++ b/arch/x86/kvm/vmx/pmu_intel.c > > @@ -294,6 +294,11 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > > u32 msr = msr_info->index; > > > > switch (msr) { > > + case MSR_CORE_PERF_GLOBAL_CTRL: > > + if (kvm_mediated_pmu_enabled(vcpu)) > > + pmu->global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL); > > + msr_info->data = pmu->global_ctrl; > > + break; > > case MSR_CORE_PERF_FIXED_CTR_CTRL: > > msr_info->data = pmu->fixed_ctr_ctrl; > > break; > > @@ -339,6 +344,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > > u64 reserved_bits, diff; > > > > switch (msr) { > > + case MSR_CORE_PERF_GLOBAL_CTRL: > > + if (kvm_mediated_pmu_enabled(vcpu)) > > + vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, > > + pmu->global_ctrl); > > + break; > > case MSR_CORE_PERF_FIXED_CTR_CTRL: > > if (data & pmu->fixed_ctr_ctrl_rsvd) > > return 1; > > @@ -558,10 +568,37 @@ static void __intel_pmu_refresh(struct kvm_vcpu *vcpu) > > > > static void intel_pmu_refresh(struct kvm_vcpu *vcpu) > > { > > + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); > > + struct vcpu_vmx *vmx = to_vmx(vcpu); > > + bool mediated; > > + > > __intel_pmu_refresh(vcpu); > > > > - exec_controls_changebit(to_vmx(vcpu), CPU_BASED_RDPMC_EXITING, > > + exec_controls_changebit(vmx, CPU_BASED_RDPMC_EXITING, > > !kvm_rdpmc_in_guest(vcpu)); > > + > > + mediated = kvm_mediated_pmu_enabled(vcpu); > > + if (cpu_has_load_perf_global_ctrl()) { > > + vm_entry_controls_changebit(vmx, > > + VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, mediated); > > + /* > > + * Initialize guest PERF_GLOBAL_CTRL to reset value as SDM rules. > > + * > > + * Note: GUEST_IA32_PERF_GLOBAL_CTRL must be initialized to > > + * "BIT_ULL(pmu->nr_arch_gp_counters) - 1" instead of pmu->global_ctrl > > + * since pmu->global_ctrl is only be initialized when guest > > + * pmu->version > 1. Otherwise if pmu->version is 1, pmu->global_ctrl > > + * is 0 and guest counters are never really enabled. > > + */ > > + if (mediated) > > + vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, > > + BIT_ULL(pmu->nr_arch_gp_counters) - 1); > > + } > > + > > + if (cpu_has_save_perf_global_ctrl()) > > + vm_exit_controls_changebit(vmx, > > + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | > > + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL, mediated); > > } > > > > static void intel_pmu_init(struct kvm_vcpu *vcpu) > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > > index ff66f17d6358..38ecf3c116bd 100644 > > --- a/arch/x86/kvm/vmx/vmx.c > > +++ b/arch/x86/kvm/vmx/vmx.c > > @@ -4390,6 +4390,13 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) > > > > if (cpu_has_load_ia32_efer()) > > vmcs_write64(HOST_IA32_EFER, kvm_host.efer); > > + > > + /* > > + * Initialize host PERF_GLOBAL_CTRL to 0 to disable all counters > > + * immediately once VM exits. Mediated vPMU then call perf_guest_exit() > > + * to re-enable host perf events. > > + */ > > + vmcs_write64(HOST_IA32_PERF_GLOBAL_CTRL, 0); > > } > > > > void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) > > @@ -4457,7 +4464,8 @@ static u32 vmx_get_initial_vmexit_ctrl(void) > > VM_EXIT_CLEAR_IA32_RTIT_CTL); > > /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ > > return vmexit_ctrl & > > - ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); > > + ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER | > > + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL); > > } > > > > void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) > > @@ -7196,6 +7204,9 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) > > struct perf_guest_switch_msr *msrs; > > struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu); > > > > + if (kvm_mediated_pmu_enabled(&vmx->vcpu)) > > + return; > > + > > pmu->host_cross_mapped_mask = 0; > > if (pmu->pebs_enable & pmu->global_ctrl) > > intel_pmu_cross_mapped_check(pmu); > > @@ -8451,6 +8462,15 @@ __init int vmx_hardware_setup(void) > > enable_sgx = false; > > #endif > > > > + /* > > + * All CPUs that support a mediated PMU are expected to support loading > > + * and saving PERF_GLOBAL_CTRL via dedicated VMCS fields. > > + */ > > + if (enable_mediated_pmu && > > + (WARN_ON_ONCE(!cpu_has_load_perf_global_ctrl() || > > + !cpu_has_save_perf_global_ctrl()))) > > + enable_mediated_pmu = false; > > + > > /* > > * set_apic_access_page_addr() is used to reload apic access > > * page upon invalidation. No need to do anything if not > > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h > > index 5c505af553c8..b282165f98a6 100644 > > --- a/arch/x86/kvm/vmx/vmx.h > > +++ b/arch/x86/kvm/vmx/vmx.h > > @@ -510,7 +510,8 @@ static inline u8 vmx_get_rvi(void) > > VM_EXIT_LOAD_IA32_EFER | \ > > VM_EXIT_CLEAR_BNDCFGS | \ > > VM_EXIT_PT_CONCEAL_PIP | \ > > - VM_EXIT_CLEAR_IA32_RTIT_CTL) > > + VM_EXIT_CLEAR_IA32_RTIT_CTL | \ > > + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL) > > > > #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \ > > (PIN_BASED_EXT_INTR_MASK | \ >
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0b7af5902ff7..4b3bfefc2d05 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -553,6 +553,10 @@ struct kvm_pmu { unsigned available_event_types; u64 fixed_ctr_ctrl; u64 fixed_ctr_ctrl_rsvd; + /* + * kvm_pmu_sync_global_ctrl_from_vmcs() must be called to update + * this SW-maintained global_ctrl for mediated vPMU before accessing it. + */ u64 global_ctrl; u64 global_status; u64 counter_bitmask[2]; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index f7fd4369b821..48e137560f17 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -106,6 +106,7 @@ #define VM_EXIT_CLEAR_BNDCFGS 0x00800000 #define VM_EXIT_PT_CONCEAL_PIP 0x01000000 #define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 +#define VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL 0x40000000 #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 6ad71752be4b..4e8cefcce7ab 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -646,6 +646,30 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) } } +static void kvm_pmu_sync_global_ctrl_from_vmcs(struct kvm_vcpu *vcpu) +{ + struct msr_data msr_info = { .index = MSR_CORE_PERF_GLOBAL_CTRL }; + + if (!kvm_mediated_pmu_enabled(vcpu)) + return; + + /* Sync pmu->global_ctrl from GUEST_IA32_PERF_GLOBAL_CTRL. */ + kvm_pmu_call(get_msr)(vcpu, &msr_info); +} + +static void kvm_pmu_sync_global_ctrl_to_vmcs(struct kvm_vcpu *vcpu, u64 global_ctrl) +{ + struct msr_data msr_info = { + .index = MSR_CORE_PERF_GLOBAL_CTRL, + .data = global_ctrl }; + + if (!kvm_mediated_pmu_enabled(vcpu)) + return; + + /* Sync pmu->global_ctrl to GUEST_IA32_PERF_GLOBAL_CTRL. */ + kvm_pmu_call(set_msr)(vcpu, &msr_info); +} + bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) { switch (msr) { @@ -680,7 +704,6 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = pmu->global_status; break; case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: - case MSR_CORE_PERF_GLOBAL_CTRL: msr_info->data = pmu->global_ctrl; break; case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: @@ -731,6 +754,9 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) diff = pmu->global_ctrl ^ data; pmu->global_ctrl = data; reprogram_counters(pmu, diff); + + /* Propagate guest global_ctrl to GUEST_IA32_PERF_GLOBAL_CTRL. */ + kvm_pmu_sync_global_ctrl_to_vmcs(vcpu, data); } break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: @@ -907,6 +933,8 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel) BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX); + kvm_pmu_sync_global_ctrl_from_vmcs(vcpu); + if (!kvm_pmu_has_perf_global_ctrl(pmu)) bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX); else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx, diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 013536fde10b..cc63bd4ab87c 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -101,6 +101,11 @@ static inline bool cpu_has_load_perf_global_ctrl(void) return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; } +static inline bool cpu_has_save_perf_global_ctrl(void) +{ + return vmcs_config.vmexit_ctrl & VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL; +} + static inline bool cpu_has_vmx_mpx(void) { return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 8a7af02d466e..ecf72394684d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7004,7 +7004,8 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT | - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL; /* We support free control of debug control saving. */ msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 2a5f79206b02..04a893e56135 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -294,6 +294,11 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u32 msr = msr_info->index; switch (msr) { + case MSR_CORE_PERF_GLOBAL_CTRL: + if (kvm_mediated_pmu_enabled(vcpu)) + pmu->global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL); + msr_info->data = pmu->global_ctrl; + break; case MSR_CORE_PERF_FIXED_CTR_CTRL: msr_info->data = pmu->fixed_ctr_ctrl; break; @@ -339,6 +344,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u64 reserved_bits, diff; switch (msr) { + case MSR_CORE_PERF_GLOBAL_CTRL: + if (kvm_mediated_pmu_enabled(vcpu)) + vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, + pmu->global_ctrl); + break; case MSR_CORE_PERF_FIXED_CTR_CTRL: if (data & pmu->fixed_ctr_ctrl_rsvd) return 1; @@ -558,10 +568,37 @@ static void __intel_pmu_refresh(struct kvm_vcpu *vcpu) static void intel_pmu_refresh(struct kvm_vcpu *vcpu) { + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + bool mediated; + __intel_pmu_refresh(vcpu); - exec_controls_changebit(to_vmx(vcpu), CPU_BASED_RDPMC_EXITING, + exec_controls_changebit(vmx, CPU_BASED_RDPMC_EXITING, !kvm_rdpmc_in_guest(vcpu)); + + mediated = kvm_mediated_pmu_enabled(vcpu); + if (cpu_has_load_perf_global_ctrl()) { + vm_entry_controls_changebit(vmx, + VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, mediated); + /* + * Initialize guest PERF_GLOBAL_CTRL to reset value as SDM rules. + * + * Note: GUEST_IA32_PERF_GLOBAL_CTRL must be initialized to + * "BIT_ULL(pmu->nr_arch_gp_counters) - 1" instead of pmu->global_ctrl + * since pmu->global_ctrl is only be initialized when guest + * pmu->version > 1. Otherwise if pmu->version is 1, pmu->global_ctrl + * is 0 and guest counters are never really enabled. + */ + if (mediated) + vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, + BIT_ULL(pmu->nr_arch_gp_counters) - 1); + } + + if (cpu_has_save_perf_global_ctrl()) + vm_exit_controls_changebit(vmx, + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL, mediated); } static void intel_pmu_init(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ff66f17d6358..38ecf3c116bd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4390,6 +4390,13 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) if (cpu_has_load_ia32_efer()) vmcs_write64(HOST_IA32_EFER, kvm_host.efer); + + /* + * Initialize host PERF_GLOBAL_CTRL to 0 to disable all counters + * immediately once VM exits. Mediated vPMU then call perf_guest_exit() + * to re-enable host perf events. + */ + vmcs_write64(HOST_IA32_PERF_GLOBAL_CTRL, 0); } void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) @@ -4457,7 +4464,8 @@ static u32 vmx_get_initial_vmexit_ctrl(void) VM_EXIT_CLEAR_IA32_RTIT_CTL); /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ return vmexit_ctrl & - ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); + ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER | + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL); } void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) @@ -7196,6 +7204,9 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) struct perf_guest_switch_msr *msrs; struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu); + if (kvm_mediated_pmu_enabled(&vmx->vcpu)) + return; + pmu->host_cross_mapped_mask = 0; if (pmu->pebs_enable & pmu->global_ctrl) intel_pmu_cross_mapped_check(pmu); @@ -8451,6 +8462,15 @@ __init int vmx_hardware_setup(void) enable_sgx = false; #endif + /* + * All CPUs that support a mediated PMU are expected to support loading + * and saving PERF_GLOBAL_CTRL via dedicated VMCS fields. + */ + if (enable_mediated_pmu && + (WARN_ON_ONCE(!cpu_has_load_perf_global_ctrl() || + !cpu_has_save_perf_global_ctrl()))) + enable_mediated_pmu = false; + /* * set_apic_access_page_addr() is used to reload apic access * page upon invalidation. No need to do anything if not diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 5c505af553c8..b282165f98a6 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -510,7 +510,8 @@ static inline u8 vmx_get_rvi(void) VM_EXIT_LOAD_IA32_EFER | \ VM_EXIT_CLEAR_BNDCFGS | \ VM_EXIT_PT_CONCEAL_PIP | \ - VM_EXIT_CLEAR_IA32_RTIT_CTL) + VM_EXIT_CLEAR_IA32_RTIT_CTL | \ + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL) #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \ (PIN_BASED_EXT_INTR_MASK | \