@@ -9,7 +9,7 @@ KVM Lock Overview
The acquisition orders for mutexes are as follows:
-- cpus_read_lock() is taken outside kvm_lock
+- cpus_read_lock() is taken outside kvm_lock and kvm_usage_lock
- kvm->lock is taken outside vcpu->mutex
@@ -24,6 +24,12 @@ The acquisition orders for mutexes are as follows:
are taken on the waiting side when modifying memslots, so MMU notifiers
must not take either kvm->slots_lock or kvm->slots_arch_lock.
+cpus_read_lock() vs kvm_lock:
+- Taking cpus_read_lock() outside of kvm_lock is problematic, despite that
+ being the official ordering, as it is quite easy to unknowingly trigger
+ cpus_read_lock() while holding kvm_lock. Use caution when walking vm_list,
+ e.g. avoid complex operations when possible.
+
For SRCU:
- ``synchronize_srcu(&kvm->srcu)`` is called inside critical sections
@@ -227,10 +233,17 @@ time it will be set using the Dirty tracking mechanism described above.
:Type: mutex
:Arch: any
:Protects: - vm_list
- - kvm_usage_count
+
+``kvm_usage_lock``
+^^^^^^^^^^^^^^^^^^
+
+:Type: mutex
+:Arch: any
+:Protects: - kvm_usage_count
- hardware virtualization enable/disable
-:Comment: KVM also disables CPU hotplug via cpus_read_lock() during
- enable/disable.
+:Comment: Exists because using kvm_lock leads to deadlock (see earlier comment
+ on cpus_read_lock() vs kvm_lock). Note, KVM also disables CPU hotplug via
+ cpus_read_lock() when enabling/disabling virtualization.
``kvm->mn_invalidate_lock``
^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -290,11 +303,12 @@ time it will be set using the Dirty tracking mechanism described above.
wakeup.
``vendor_module_lock``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^
:Type: mutex
:Arch: x86
:Protects: loading a vendor module (kvm_amd or kvm_intel)
-:Comment: Exists because using kvm_lock leads to deadlock. cpu_hotplug_lock is
- taken outside of kvm_lock, e.g. in KVM's CPU online/offline callbacks, and
- many operations need to take cpu_hotplug_lock when loading a vendor module,
- e.g. updating static calls.
+:Comment: Exists because using kvm_lock leads to deadlock. kvm_lock is taken
+ in notifiers, e.g. __kvmclock_cpufreq_notifier(), that may be invoked while
+ cpu_hotplug_lock is held, e.g. from cpufreq_boost_trigger_state(), and many
+ operations need to take cpu_hotplug_lock when loading a vendor module, e.g.
+ updating static calls.
@@ -5576,6 +5576,7 @@ __visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
static DEFINE_PER_CPU(bool, hardware_enabled);
+static DEFINE_MUTEX(kvm_usage_lock);
static int kvm_usage_count;
static int __hardware_enable_nolock(void)
@@ -5608,10 +5609,10 @@ static int kvm_online_cpu(unsigned int cpu)
* be enabled. Otherwise running VMs would encounter unrecoverable
* errors when scheduled to this CPU.
*/
- mutex_lock(&kvm_lock);
+ mutex_lock(&kvm_usage_lock);
if (kvm_usage_count)
ret = __hardware_enable_nolock();
- mutex_unlock(&kvm_lock);
+ mutex_unlock(&kvm_usage_lock);
return ret;
}
@@ -5631,10 +5632,10 @@ static void hardware_disable_nolock(void *junk)
static int kvm_offline_cpu(unsigned int cpu)
{
- mutex_lock(&kvm_lock);
+ mutex_lock(&kvm_usage_lock);
if (kvm_usage_count)
hardware_disable_nolock(NULL);
- mutex_unlock(&kvm_lock);
+ mutex_unlock(&kvm_usage_lock);
return 0;
}
@@ -5650,9 +5651,9 @@ static void hardware_disable_all_nolock(void)
static void hardware_disable_all(void)
{
cpus_read_lock();
- mutex_lock(&kvm_lock);
+ mutex_lock(&kvm_usage_lock);
hardware_disable_all_nolock();
- mutex_unlock(&kvm_lock);
+ mutex_unlock(&kvm_usage_lock);
cpus_read_unlock();
}
@@ -5683,7 +5684,7 @@ static int hardware_enable_all(void)
* enable hardware multiple times.
*/
cpus_read_lock();
- mutex_lock(&kvm_lock);
+ mutex_lock(&kvm_usage_lock);
r = 0;
@@ -5697,7 +5698,7 @@ static int hardware_enable_all(void)
}
}
- mutex_unlock(&kvm_lock);
+ mutex_unlock(&kvm_usage_lock);
cpus_read_unlock();
return r;
@@ -5725,13 +5726,13 @@ static int kvm_suspend(void)
{
/*
* Secondary CPUs and CPU hotplug are disabled across the suspend/resume
- * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count
- * is stable. Assert that kvm_lock is not held to ensure the system
- * isn't suspended while KVM is enabling hardware. Hardware enabling
- * can be preempted, but the task cannot be frozen until it has dropped
- * all locks (userspace tasks are frozen via a fake signal).
+ * callbacks, i.e. no need to acquire kvm_usage_lock to ensure the usage
+ * count is stable. Assert that kvm_usage_lock is not held to ensure
+ * the system isn't suspended while KVM is enabling hardware. Hardware
+ * enabling can be preempted, but the task cannot be frozen until it has
+ * dropped all locks (userspace tasks are frozen via a fake signal).
*/
- lockdep_assert_not_held(&kvm_lock);
+ lockdep_assert_not_held(&kvm_usage_lock);
lockdep_assert_irqs_disabled();
if (kvm_usage_count)
@@ -5741,7 +5742,7 @@ static int kvm_suspend(void)
static void kvm_resume(void)
{
- lockdep_assert_not_held(&kvm_lock);
+ lockdep_assert_not_held(&kvm_usage_lock);
lockdep_assert_irqs_disabled();
if (kvm_usage_count)