@@ -7684,6 +7684,7 @@ Valid bits in args[0] are::
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
+ #define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
Enabling this capability on a VM provides userspace with a way to no
longer intercept some instructions for improved latency in some
@@ -1381,6 +1381,7 @@ struct kvm_arch {
bool hlt_in_guest;
bool pause_in_guest;
bool cstate_in_guest;
+ bool aperfmperf_in_guest;
unsigned long irq_sources_bitmap;
s64 kvmclock_offset;
@@ -111,6 +111,8 @@ static const struct svm_direct_access_msrs {
{ .index = MSR_IA32_CR_PAT, .always = false },
{ .index = MSR_AMD64_SEV_ES_GHCB, .always = true },
{ .index = MSR_TSC_AUX, .always = false },
+ { .index = MSR_IA32_APERF, .always = false },
+ { .index = MSR_IA32_MPERF, .always = false },
{ .index = X2APIC_MSR(APIC_ID), .always = false },
{ .index = X2APIC_MSR(APIC_LVR), .always = false },
{ .index = X2APIC_MSR(APIC_TASKPRI), .always = false },
@@ -1359,6 +1361,11 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL))
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
+ if (kvm_aperfmperf_in_guest(vcpu->kvm)) {
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_APERF, 1, 0);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_MPERF, 1, 0);
+ }
+
if (kvm_vcpu_apicv_active(vcpu))
avic_init_vmcb(svm, vmcb);
@@ -44,7 +44,7 @@ static inline struct page *__sme_pa_to_page(unsigned long pa)
#define IOPM_SIZE PAGE_SIZE * 3
#define MSRPM_SIZE PAGE_SIZE * 2
-#define MAX_DIRECT_ACCESS_MSRS 48
+#define MAX_DIRECT_ACCESS_MSRS 50
#define MSRPM_OFFSETS 32
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
@@ -186,6 +186,8 @@ static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
MSR_CORE_C3_RESIDENCY,
MSR_CORE_C6_RESIDENCY,
MSR_CORE_C7_RESIDENCY,
+ MSR_IA32_APERF,
+ MSR_IA32_MPERF,
};
/*
@@ -7593,6 +7595,10 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
}
+ if (kvm_aperfmperf_in_guest(vcpu->kvm)) {
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_APERF, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R);
+ }
vmx->loaded_vmcs = &vmx->vmcs01;
@@ -356,7 +356,7 @@ struct vcpu_vmx {
struct lbr_desc lbr_desc;
/* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS 16
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS 18
struct {
DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
@@ -4535,6 +4535,9 @@ static u64 kvm_get_allowed_disable_exits(void)
{
u64 r = KVM_X86_DISABLE_EXITS_PAUSE;
+ if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+ r |= KVM_X86_DISABLE_EXITS_APERFMPERF;
+
if (!mitigate_smt_rsb) {
r |= KVM_X86_DISABLE_EXITS_HLT |
KVM_X86_DISABLE_EXITS_CSTATE;
@@ -6543,7 +6546,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
if (!mitigate_smt_rsb && boot_cpu_has_bug(X86_BUG_SMT_RSB) &&
cpu_smt_possible() &&
- (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE))
+ (cap->args[0] & ~(KVM_X86_DISABLE_EXITS_PAUSE |
+ KVM_X86_DISABLE_EXITS_APERFMPERF)))
pr_warn_once(SMT_RSB_MSG);
if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
@@ -6554,6 +6558,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
kvm->arch.hlt_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
kvm->arch.cstate_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_APERFMPERF)
+ kvm->arch.aperfmperf_in_guest = true;
r = 0;
disable_exits_unlock:
mutex_unlock(&kvm->lock);
@@ -488,6 +488,11 @@ static inline bool kvm_cstate_in_guest(struct kvm *kvm)
return kvm->arch.cstate_in_guest;
}
+static inline bool kvm_aperfmperf_in_guest(struct kvm *kvm)
+{
+ return kvm->arch.aperfmperf_in_guest;
+}
+
static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm)
{
return kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_ENABLED;
@@ -617,6 +617,7 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
+#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
@@ -617,10 +617,12 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
+#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
KVM_X86_DISABLE_EXITS_HLT | \
KVM_X86_DISABLE_EXITS_PAUSE | \
- KVM_X86_DISABLE_EXITS_CSTATE)
+ KVM_X86_DISABLE_EXITS_CSTATE | \
+ KVM_X86_DISABLE_EXITS_APERFMPERF)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
Allow a guest to read the physical IA32_APERF and IA32_MPERF MSRs without interception. The IA32_APERF and IA32_MPERF MSRs are not virtualized. Writes are not handled at all. The MSR values are not zeroed on vCPU creation, saved on suspend, or restored on resume. No accommodation is made for processor migration or for sharing a logical processor with other tasks. No adjustments are made for non-unit TSC multipliers. The MSRs do not account for time the same way as the comparable PMU events, whether the PMU is virtualized by the traditional emulation method or the new mediated pass-through approach. Nonetheless, in a properly constrained environment, this capability can be combined with a guest CPUID table that advertises support for CPUID.6:ECX.APERFMPERF[bit 0] to induce a Linux guest to report the effective physical CPU frequency in /proc/cpuinfo. Moreover, there is no performance cost for this capability. Signed-off-by: Jim Mattson <jmattson@google.com> --- Documentation/virt/kvm/api.rst | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/svm.c | 7 +++++++ arch/x86/kvm/svm/svm.h | 2 +- arch/x86/kvm/vmx/vmx.c | 6 ++++++ arch/x86/kvm/vmx/vmx.h | 2 +- arch/x86/kvm/x86.c | 8 +++++++- arch/x86/kvm/x86.h | 5 +++++ include/uapi/linux/kvm.h | 1 + tools/include/uapi/linux/kvm.h | 4 +++- 10 files changed, 33 insertions(+), 4 deletions(-)