@@ -166,10 +166,13 @@ void avic_physid_shadow_table_update_vcpu_location(struct kvm_vcpu *vcpu, int cp
raw_spin_lock_irqsave(&kvm_svm->avic.table_entries_lock, flags);
list_for_each_entry(e, &vcpu_svm->nested.physid_ref_entries, link) {
- u64 sentry = READ_ONCE(*e->sentry);
+ u64 old_sentry = READ_ONCE(*e->sentry);
+ u64 new_sentry = old_sentry;
- physid_entry_set_apicid(&sentry, cpu);
- WRITE_ONCE(*e->sentry, sentry);
+ physid_entry_set_apicid(&new_sentry, cpu);
+
+ if (new_sentry != old_sentry)
+ WRITE_ONCE(*e->sentry, new_sentry);
nentries++;
}
@@ -1507,7 +1510,7 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- u64 entry;
+ u64 old_entry, new_entry;
/* ID = 0xff (broadcast), ID > 0xff (reserved) */
int h_physical_id = kvm_cpu_get_apicid(cpu);
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1531,14 +1534,16 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (kvm_vcpu_is_blocking(vcpu))
return;
- entry = READ_ONCE(*(svm->avic_physical_id_cache));
- WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
+ old_entry = READ_ONCE(*(svm->avic_physical_id_cache));
+ new_entry = old_entry;
- entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
- entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
- entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+ new_entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
+ new_entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
+ new_entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+
+ if (old_entry != new_entry)
+ WRITE_ONCE(*(svm->avic_physical_id_cache), new_entry);
- WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
}
@@ -1549,14 +1554,24 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
lockdep_assert_preemption_disabled();
+ avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
+
+ /*
+ * It is only meaningful to intercept IPIs from the guest
+ * when either vCPU is blocked, or in guest mode.
+ * In all other cases (e.g userspace vmexit, or preemption
+ * by other task, the vCPU is guaranteed to return to guest mode
+ * as soon as it can
+ */
+ if (!avic_doorbell_strict && !kvm_vcpu_is_blocking(vcpu) && !is_guest_mode(vcpu))
+ return;
+
entry = READ_ONCE(*(svm->avic_physical_id_cache));
/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
return;
- avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
-
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
@@ -202,6 +202,9 @@ module_param(tsc_scaling, int, 0444);
static bool avic;
module_param(avic, bool, 0444);
+bool avic_doorbell_strict = true;
+module_param(avic_doorbell_strict, bool, 0444);
+
bool __read_mostly dump_invalid_vmcb;
module_param(dump_invalid_vmcb, bool, 0644);
@@ -1340,7 +1343,8 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
svm->loaded = false;
if (svm->nested.initialized && svm->avic_enabled)
- avic_physid_shadow_table_update_vcpu_location(vcpu, -1);
+ if (!avic_doorbell_strict || kvm_vcpu_is_blocking(vcpu))
+ avic_physid_shadow_table_update_vcpu_location(vcpu, -1);
svm_prepare_host_switch(vcpu);
@@ -4707,7 +4711,6 @@ static __init void svm_set_cpu_caps(void)
/* CPUID 0x80000001 and 0x8000000A (SVM features) */
if (nested) {
kvm_cpu_cap_set(X86_FEATURE_SVM);
- kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
if (nrips)
kvm_cpu_cap_set(X86_FEATURE_NRIPS);
@@ -33,6 +33,7 @@
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
extern bool intercept_smi;
+extern bool avic_doorbell_strict;
/*
* Clean bits in VMCB.
@@ -3291,9 +3291,10 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
vcpu->stat.generic.blocking = 1;
+ prepare_to_rcuwait(wait);
+
kvm_arch_vcpu_blocking(vcpu);
- prepare_to_rcuwait(wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
Allow optionally to make KVM not update is_running unless it is functionally needed which is only when a vCPU halts, or is in the guest mode. This means security wise that if a vCPU is scheduled out, other vCPUs could still send doorbell messages to the last physical CPU where this vCPU was last running. This in theory can be considered less secure, thus this option is not enabled by default. The option is avic_doorbell_strict and is true by default, setting it to false allows this relaxed non strict mode. Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> --- arch/x86/kvm/svm/avic.c | 39 +++++++++++++++++++++++++++------------ arch/x86/kvm/svm/svm.c | 7 +++++-- arch/x86/kvm/svm/svm.h | 1 + virt/kvm/kvm_main.c | 3 ++- 4 files changed, 35 insertions(+), 15 deletions(-)