Message ID | 92836b09c8e0f19f8e506008e45993881d22b6d1.1663869838.git.isaku.yamahata@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: hardware enable/disable reorganize | expand |
On Thu, Sep 22, 2022, isaku.yamahata@intel.com wrote: > From: Isaku Yamahata <isaku.yamahata@intel.com> > > Because kvm_count_lock unnecessarily complicates the KVM locking convention > Drop kvm_count_lock and instead protect kvm_usage_count with kvm_lock for > simplicity. kvm_arch_hardware_enable/disable() callbacks depend on > non-preemptiblity with the spin lock. Add preempt_disable/enable() > around hardware enable/disable callback to keep the assumption. There's the other "minor" wrinkle that prior to patch 7, "KVM: Rename and move CPUHP_AP_KVM_STARTING to ONLINE section, kvm_online_cpu() was called with IRQs disabled and couldn't sleep, i.e. couldn't acquire a mutex. That's very important to capture in the changelog. > Because kvm_suspend() and kvm_resume() is called with interrupt disabled, > they don't need preempt_disable/enable() pair. > > Opportunistically add some comments on locking. > > Suggested-by: Sean Christopherson <seanjc@google.com> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> ... > @@ -5028,13 +5029,20 @@ static int kvm_online_cpu(unsigned int cpu) > if (kvm_usage_count) { > WARN_ON_ONCE(atomic_read(&hardware_enable_failed)); > > + /* > + * arch callback kvm_arch_hardware_eanble() assumes that s/eanble/enable Though even better would be to avoid function names entirely. > + * preemption is disabled for historical reason. Disable > + * preemption until all arch callbacks are fixed. > + */ Probably better to put this comment above to the WARN_ON_ONCE() in hardware_enable_nolock() since that's where the oddity and dependency on arch behavior lies. And then it can be turned into a FIXME, e.g. /* * FIXME: drop the "preemption disabled" requirement here and in the * disable path once all arch code plays nice with preemption. */ > + preempt_disable(); > hardware_enable_nolock(NULL); > + preempt_enable(); > if (atomic_read(&hardware_enable_failed)) { > atomic_set(&hardware_enable_failed, 0); > ret = -EIO; > } > } > - raw_spin_unlock(&kvm_count_lock); > + mutex_unlock(&kvm_lock); > return ret; > } > > @@ -5042,6 +5050,8 @@ static void hardware_disable_nolock(void *junk) > { > int cpu = raw_smp_processor_id(); > > + WARN_ON_ONCE(preemptible()); > + > if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) > return; > cpumask_clear_cpu(cpu, cpus_hardware_enabled); > @@ -5050,10 +5060,18 @@ static void hardware_disable_nolock(void *junk) > > static int kvm_offline_cpu(unsigned int cpu) > { > - raw_spin_lock(&kvm_count_lock); > - if (kvm_usage_count) > + mutex_lock(&kvm_lock); > + if (kvm_usage_count) { > + /* > + * arch callback kvm_arch_hardware_disable() assumes that > + * preemption is disabled for historical reason. Disable > + * preemption until all arch callbacks are fixed. > + */ I vote to drop this comment and instead document everything in the enable FIXME (see above). > + preempt_disable(); > hardware_disable_nolock(NULL); > - raw_spin_unlock(&kvm_count_lock); > + preempt_enable(); > + } > + mutex_unlock(&kvm_lock); > return 0; > } ... > @@ -5708,15 +5728,27 @@ static void kvm_init_debug(void) > > static int kvm_suspend(void) > { > - if (kvm_usage_count) > + /* > + * The caller ensures that CPU hotplug is disabled by > + * cpu_hotplug_disable() and other CPUs are offlined. No need for > + * locking. Disabling CPU hotplug prevents racing with kvm_online_cpu()/kvm_offline_cpu(), but doesn't prevent racing with hardware_enable_all()/hardware_disable_all(). And the lockdep doesn't mesh with the comment, which explains why kvm_lock doesn't _need_ to be held, but not why kvm_lock _can't_ be held. Maybe this? /* * Secondary CPUs and CPU hotplug are disabled across the suspend/resume * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count * is stable. Assert that kvm_lock is not held as a paranoid sanity * check that the system isn't suspended when KVM is enabling hardware. */ > + */ > + lockdep_assert_not_held(&kvm_lock); > + > + if (kvm_usage_count) { > + /* > + * Because kvm_suspend() is called with interrupt disabled, no > + * need to disable preemption. > + */ Add a lockdep and drop the comment, e.g. below the lockdep_assert_not_held(), add lockdep_assert_irqs_disabled(); That covers the "why doesn't this disable preemption" _and_ enforces that IRQs are indeed disabled. > hardware_disable_nolock(NULL); > + } > return 0; > } > > static void kvm_resume(void) > { > if (kvm_usage_count) { > - lockdep_assert_not_held(&kvm_count_lock); > + lockdep_assert_not_held(&kvm_lock); > hardware_enable_nolock(NULL); > } > } > -- > 2.25.1 >
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index 845a561629f1..55d6559ace2a 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -216,15 +216,11 @@ time it will be set using the Dirty tracking mechanism described above. :Type: mutex :Arch: any :Protects: - vm_list - -``kvm_count_lock`` -^^^^^^^^^^^^^^^^^^ - -:Type: raw_spinlock_t -:Arch: any -:Protects: - hardware virtualization enable/disable -:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt - migration. + - kvm_usage_count + - hardware virtualization enable/disable +:Comment: Use cpus_read_lock() for hardware virtualization enable/disable + because hardware enabling/disabling must be atomic /wrt + CPU hotplug. The lock order is cpus lock => kvm_lock. ``kvm->mn_invalidate_lock`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b1bf44af523c..c4b908553726 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -100,7 +100,6 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); */ DEFINE_MUTEX(kvm_lock); -static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; @@ -4996,6 +4995,8 @@ static void hardware_enable_nolock(void *junk) int cpu = raw_smp_processor_id(); int r; + WARN_ON_ONCE(preemptible()); + if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; @@ -5019,7 +5020,7 @@ static int kvm_online_cpu(unsigned int cpu) if (ret) return ret; - raw_spin_lock(&kvm_count_lock); + mutex_lock(&kvm_lock); /* * Abort the CPU online process if hardware virtualization cannot * be enabled. Otherwise running VMs would encounter unrecoverable @@ -5028,13 +5029,20 @@ static int kvm_online_cpu(unsigned int cpu) if (kvm_usage_count) { WARN_ON_ONCE(atomic_read(&hardware_enable_failed)); + /* + * arch callback kvm_arch_hardware_eanble() assumes that + * preemption is disabled for historical reason. Disable + * preemption until all arch callbacks are fixed. + */ + preempt_disable(); hardware_enable_nolock(NULL); + preempt_enable(); if (atomic_read(&hardware_enable_failed)) { atomic_set(&hardware_enable_failed, 0); ret = -EIO; } } - raw_spin_unlock(&kvm_count_lock); + mutex_unlock(&kvm_lock); return ret; } @@ -5042,6 +5050,8 @@ static void hardware_disable_nolock(void *junk) { int cpu = raw_smp_processor_id(); + WARN_ON_ONCE(preemptible()); + if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; cpumask_clear_cpu(cpu, cpus_hardware_enabled); @@ -5050,10 +5060,18 @@ static void hardware_disable_nolock(void *junk) static int kvm_offline_cpu(unsigned int cpu) { - raw_spin_lock(&kvm_count_lock); - if (kvm_usage_count) + mutex_lock(&kvm_lock); + if (kvm_usage_count) { + /* + * arch callback kvm_arch_hardware_disable() assumes that + * preemption is disabled for historical reason. Disable + * preemption until all arch callbacks are fixed. + */ + preempt_disable(); hardware_disable_nolock(NULL); - raw_spin_unlock(&kvm_count_lock); + preempt_enable(); + } + mutex_unlock(&kvm_lock); return 0; } @@ -5068,9 +5086,11 @@ static void hardware_disable_all_nolock(void) static void hardware_disable_all(void) { - raw_spin_lock(&kvm_count_lock); + cpus_read_lock(); + mutex_lock(&kvm_lock); hardware_disable_all_nolock(); - raw_spin_unlock(&kvm_count_lock); + mutex_unlock(&kvm_lock); + cpus_read_unlock(); } static int hardware_enable_all(void) @@ -5088,7 +5108,7 @@ static int hardware_enable_all(void) * Disable CPU hotplug to prevent this case from happening. */ cpus_read_lock(); - raw_spin_lock(&kvm_count_lock); + mutex_lock(&kvm_lock); kvm_usage_count++; if (kvm_usage_count == 1) { @@ -5101,7 +5121,7 @@ static int hardware_enable_all(void) } } - raw_spin_unlock(&kvm_count_lock); + mutex_unlock(&kvm_lock); cpus_read_unlock(); return r; @@ -5708,15 +5728,27 @@ static void kvm_init_debug(void) static int kvm_suspend(void) { - if (kvm_usage_count) + /* + * The caller ensures that CPU hotplug is disabled by + * cpu_hotplug_disable() and other CPUs are offlined. No need for + * locking. + */ + lockdep_assert_not_held(&kvm_lock); + + if (kvm_usage_count) { + /* + * Because kvm_suspend() is called with interrupt disabled, no + * need to disable preemption. + */ hardware_disable_nolock(NULL); + } return 0; } static void kvm_resume(void) { if (kvm_usage_count) { - lockdep_assert_not_held(&kvm_count_lock); + lockdep_assert_not_held(&kvm_lock); hardware_enable_nolock(NULL); } }