@@ -6155,3 +6155,14 @@ KVM can therefore start protected VMs.
This capability governs the KVM_S390_PV_COMMAND ioctl and the
KVM_MP_STATE_LOAD MP_STATE. KVM_SET_MP_STATE can fail for protected
guests when the state change is invalid.
+
+
+8.24 KVM_CAP_ENFORCE_PV_CPUID
+-----------------------------
+
+Architectures: x86
+
+When enabled, KVM will disable paravirtual features provided to the
+guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
+(0x40000001). Otherwise, a guest may use the paravirtual features
+regardless of what has actually been exposed through the CPUID leaf.
@@ -788,6 +788,12 @@ struct kvm_vcpu_arch {
/* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr;
+
+ /*
+ * Indicates whether PV emulation should be disabled if not present in
+ * the guest's cpuid.
+ */
+ bool enforce_pv_feature_cpuid;
};
struct kvm_lpage_info {
@@ -5,6 +5,7 @@
#include "x86.h"
#include <asm/cpu.h>
#include <asm/processor.h>
+#include <uapi/asm/kvm_para.h>
extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
void kvm_set_cpu_caps(void);
@@ -308,4 +309,19 @@ static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
}
+static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
+ unsigned int kvm_feature)
+{
+ struct kvm_cpuid_entry2 *cpuid;
+
+ if (!vcpu->arch.enforce_pv_feature_cpuid)
+ return true;
+
+ cpuid = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+ if (!cpuid)
+ return false;
+
+ return cpuid->eax & (1u << kvm_feature);
+}
+
#endif
@@ -2763,6 +2763,14 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
if (data & 0x30)
return 1;
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) &&
+ (data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT))
+ return 1;
+
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) &&
+ (data & KVM_ASYNC_PF_DELIVERY_AS_INT))
+ return 1;
+
if (!lapic_in_kernel(vcpu))
return 1;
@@ -2840,10 +2848,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
* Doing a TLB flush here, on the guest's behalf, can avoid
* expensive IPIs.
*/
- trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
- st->preempted & KVM_VCPU_FLUSH_TLB);
- if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
- kvm_vcpu_flush_tlb_guest(vcpu);
+ if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
+ trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
+ st->preempted & KVM_VCPU_FLUSH_TLB);
+ if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ kvm_vcpu_flush_tlb_guest(vcpu);
+ }
vcpu->arch.st.preempted = 0;
@@ -2998,30 +3008,54 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.smi_count = data;
break;
case MSR_KVM_WALL_CLOCK_NEW:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
+ return 1;
+
+ kvm_write_wall_clock(vcpu->kvm, data);
+ break;
case MSR_KVM_WALL_CLOCK:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
+ return 1;
+
kvm_write_wall_clock(vcpu->kvm, data);
break;
case MSR_KVM_SYSTEM_TIME_NEW:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
+ return 1;
+
kvm_write_system_time(vcpu, data, false, msr_info->host_initiated);
break;
case MSR_KVM_SYSTEM_TIME:
- kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
+ if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
+ return 1;
+
+ kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
break;
case MSR_KVM_ASYNC_PF_EN:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+ return 1;
+
if (kvm_pv_enable_async_pf(vcpu, data))
return 1;
break;
case MSR_KVM_ASYNC_PF_INT:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
+ return 1;
+
if (kvm_pv_enable_async_pf_int(vcpu, data))
return 1;
break;
case MSR_KVM_ASYNC_PF_ACK:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+ return 1;
if (data & 0x1) {
vcpu->arch.apf.pageready_pending = false;
kvm_check_async_pf_completion(vcpu);
}
break;
case MSR_KVM_STEAL_TIME:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
+ return 1;
if (unlikely(!sched_info_on()))
return 1;
@@ -3038,11 +3072,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_KVM_PV_EOI_EN:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
+ return 1;
+
if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
return 1;
break;
case MSR_KVM_POLL_CONTROL:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
+ return 1;
+
/* only enable bit supported */
if (data & (-1ULL << 1))
return 1;
@@ -3522,6 +3562,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_EXCEPTION_PAYLOAD:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_LAST_CPU:
+ case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@@ -4389,6 +4430,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return kvm_x86_ops.enable_direct_tlbflush(vcpu);
+ case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
+ vcpu->arch.enforce_pv_feature_cpuid = cap->args[0];
+
+ return 0;
+
default:
return -EINVAL;
}
@@ -7723,11 +7769,16 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
goto out;
}
+ ret = -KVM_ENOSYS;
+
switch (nr) {
case KVM_HC_VAPIC_POLL_IRQ:
ret = 0;
break;
case KVM_HC_KICK_CPU:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
+ break;
+
kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
kvm_sched_yield(vcpu->kvm, a1);
ret = 0;
@@ -7738,9 +7789,15 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
break;
#endif
case KVM_HC_SEND_IPI:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
+ break;
+
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
case KVM_HC_SCHED_YIELD:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
+ break;
+
kvm_sched_yield(vcpu->kvm, a0);
ret = 0;
break;
@@ -1035,6 +1035,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_LAST_CPU 184
#define KVM_CAP_SMALLER_MAXPHYADDR 185
#define KVM_CAP_S390_DIAG318 186
+#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 187
#ifdef KVM_CAP_IRQ_ROUTING