Message ID | 20250409014136.2816971-3-mlevitsk@redhat.com (mailing list archive) |
---|---|
State | Handled Elsewhere |
Delegated to: | Björn Töpel |
Headers | show |
Series | KVM: extract lock_all_vcpus/unlock_all_vcpus | expand |
Context | Check | Description |
---|---|---|
bjorn/pre-ci_am | success | Success |
bjorn/build-rv32-defconfig | success | build-rv32-defconfig |
bjorn/build-rv64-clang-allmodconfig | success | build-rv64-clang-allmodconfig |
bjorn/build-rv64-gcc-allmodconfig | success | build-rv64-gcc-allmodconfig |
bjorn/build-rv64-nommu-k210-defconfig | success | build-rv64-nommu-k210-defconfig |
bjorn/build-rv64-nommu-k210-virt | success | build-rv64-nommu-k210-virt |
bjorn/checkpatch | warning | checkpatch |
bjorn/dtb-warn-rv64 | success | dtb-warn-rv64 |
bjorn/header-inline | success | header-inline |
bjorn/kdoc | success | kdoc |
bjorn/module-param | success | module-param |
bjorn/verify-fixes | success | verify-fixes |
bjorn/verify-signedoff | success | verify-signedoff |
On 4/8/25 9:41 PM, Maxim Levitsky wrote: > Move sev_lock/unlock_vcpus_for_migration to kvm_main and call the > new functions the kvm_lock_all_vcpus/kvm_unlock_all_vcpus > and kvm_lock_all_vcpus_nested. > > This code allows to lock all vCPUs without triggering lockdep warning > about reaching MAX_LOCK_DEPTH depth by coercing the lockdep into > thinking that we release all the locks other than vcpu'0 lock > immediately after we take them. > > No functional change intended. > > Suggested-by: Paolo Bonzini <pbonzini@redhat.com> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> > --- > arch/x86/kvm/svm/sev.c | 65 +++--------------------------------- > include/linux/kvm_host.h | 6 ++++ > virt/kvm/kvm_main.c | 71 ++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 81 insertions(+), 61 deletions(-) > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 0bc708ee2788..7adc54b1f741 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -1889,63 +1889,6 @@ enum sev_migration_role { > SEV_NR_MIGRATION_ROLES, > }; > > -static int sev_lock_vcpus_for_migration(struct kvm *kvm, > - enum sev_migration_role role) > -{ > - struct kvm_vcpu *vcpu; > - unsigned long i, j; > - > - kvm_for_each_vcpu(i, vcpu, kvm) { > - if (mutex_lock_killable_nested(&vcpu->mutex, role)) > - goto out_unlock; > - > -#ifdef CONFIG_PROVE_LOCKING > - if (!i) > - /* > - * Reset the role to one that avoids colliding with > - * the role used for the first vcpu mutex. > - */ > - role = SEV_NR_MIGRATION_ROLES; > - else > - mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); > -#endif > - } > - > - return 0; > - > -out_unlock: > - > - kvm_for_each_vcpu(j, vcpu, kvm) { > - if (i == j) > - break; > - > -#ifdef CONFIG_PROVE_LOCKING > - if (j) > - mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_); > -#endif > - > - mutex_unlock(&vcpu->mutex); > - } > - return -EINTR; > -} > - > -static void sev_unlock_vcpus_for_migration(struct kvm *kvm) > -{ > - struct kvm_vcpu *vcpu; > - unsigned long i; > - bool first = true; > - > - kvm_for_each_vcpu(i, vcpu, kvm) { > - if (first) > - first = false; > - else > - mutex_acquire(&vcpu->mutex.dep_map, > - SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_); > - > - mutex_unlock(&vcpu->mutex); > - } > -} > - > static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) > { > struct kvm_sev_info *dst = to_kvm_sev_info(dst_kvm); > @@ -2083,10 +2026,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) > charged = true; > } > > - ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE); > + ret = kvm_lock_all_vcpus_nested(kvm, false, SEV_MIGRATION_SOURCE); > if (ret) > goto out_dst_cgroup; > - ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET); > + ret = kvm_lock_all_vcpus_nested(source_kvm, false, SEV_MIGRATION_TARGET); > if (ret) > goto out_dst_vcpu; > > @@ -2100,9 +2043,9 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) > ret = 0; > > out_source_vcpu: > - sev_unlock_vcpus_for_migration(source_kvm); > + kvm_unlock_all_vcpus(source_kvm); > out_dst_vcpu: > - sev_unlock_vcpus_for_migration(kvm); > + kvm_unlock_all_vcpus(kvm); > out_dst_cgroup: > /* Operates on the source on success, on the destination on failure. */ > if (charged) > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 1dedc421b3e3..30cf28bf5c80 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -1015,6 +1015,12 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) > > void kvm_destroy_vcpus(struct kvm *kvm); > > +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role); > +void kvm_unlock_all_vcpus(struct kvm *kvm); > + > +#define kvm_lock_all_vcpus(kvm, trylock) \ > + kvm_lock_all_vcpus_nested(kvm, trylock, 0) > + > void vcpu_load(struct kvm_vcpu *vcpu); > void vcpu_put(struct kvm_vcpu *vcpu); > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 69782df3617f..71c0d8c35b4b 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -1368,6 +1368,77 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) > return 0; > } > > + > +/* > + * Lock all VM vCPUs. > + * Can be used nested (to lock vCPUS of two VMs for example) > + */ > +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role) > +{ > + struct kvm_vcpu *vcpu; > + unsigned long i, j; > + > + lockdep_assert_held(&kvm->lock); > + > + kvm_for_each_vcpu(i, vcpu, kvm) { > + > + if (trylock && !mutex_trylock_nested(&vcpu->mutex, role)) > + goto out_unlock; > + else if (!trylock && mutex_lock_killable_nested(&vcpu->mutex, role)) > + goto out_unlock; > + > +#ifdef CONFIG_PROVE_LOCKING > + if (!i) > + /* > + * Reset the role to one that avoids colliding with > + * the role used for the first vcpu mutex. > + */ > + role = MAX_LOCK_DEPTH - 1; > + else > + mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); > +#endif Lockdep supports up to 8 subclasses, but MAX_LOCK_DEPTH is 48. I believe it is OK to add a mutex_trylock_nested(), but can you just use 0 and 1 for the subclasses? Cheers, Longman
On Tue, Apr 08, 2025 at 09:41:34PM -0400, Maxim Levitsky wrote: > Move sev_lock/unlock_vcpus_for_migration to kvm_main and call the > new functions the kvm_lock_all_vcpus/kvm_unlock_all_vcpus > and kvm_lock_all_vcpus_nested. > > This code allows to lock all vCPUs without triggering lockdep warning > about reaching MAX_LOCK_DEPTH depth by coercing the lockdep into > thinking that we release all the locks other than vcpu'0 lock > immediately after we take them. > > No functional change intended. > > Suggested-by: Paolo Bonzini <pbonzini@redhat.com> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> > --- > arch/x86/kvm/svm/sev.c | 65 +++--------------------------------- > include/linux/kvm_host.h | 6 ++++ > virt/kvm/kvm_main.c | 71 ++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 81 insertions(+), 61 deletions(-) > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 0bc708ee2788..7adc54b1f741 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -1889,63 +1889,6 @@ enum sev_migration_role { > SEV_NR_MIGRATION_ROLES, > }; > > -static int sev_lock_vcpus_for_migration(struct kvm *kvm, > - enum sev_migration_role role) > -{ > - struct kvm_vcpu *vcpu; > - unsigned long i, j; > - > - kvm_for_each_vcpu(i, vcpu, kvm) { > - if (mutex_lock_killable_nested(&vcpu->mutex, role)) > - goto out_unlock; > - > -#ifdef CONFIG_PROVE_LOCKING > - if (!i) > - /* > - * Reset the role to one that avoids colliding with > - * the role used for the first vcpu mutex. > - */ > - role = SEV_NR_MIGRATION_ROLES; > - else > - mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); > -#endif > - } > - > - return 0; > - > -out_unlock: > - > - kvm_for_each_vcpu(j, vcpu, kvm) { > - if (i == j) > - break; > - > -#ifdef CONFIG_PROVE_LOCKING > - if (j) > - mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_); > -#endif > - > - mutex_unlock(&vcpu->mutex); > - } > - return -EINTR; > -} > - > -static void sev_unlock_vcpus_for_migration(struct kvm *kvm) > -{ > - struct kvm_vcpu *vcpu; > - unsigned long i; > - bool first = true; > - > - kvm_for_each_vcpu(i, vcpu, kvm) { > - if (first) > - first = false; > - else > - mutex_acquire(&vcpu->mutex.dep_map, > - SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_); > - > - mutex_unlock(&vcpu->mutex); > - } > -} > - > static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) > { > struct kvm_sev_info *dst = to_kvm_sev_info(dst_kvm); > @@ -2083,10 +2026,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) > charged = true; > } > > - ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE); > + ret = kvm_lock_all_vcpus_nested(kvm, false, SEV_MIGRATION_SOURCE); > if (ret) > goto out_dst_cgroup; > - ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET); > + ret = kvm_lock_all_vcpus_nested(source_kvm, false, SEV_MIGRATION_TARGET); > if (ret) > goto out_dst_vcpu; > > @@ -2100,9 +2043,9 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) > ret = 0; > > out_source_vcpu: > - sev_unlock_vcpus_for_migration(source_kvm); > + kvm_unlock_all_vcpus(source_kvm); > out_dst_vcpu: > - sev_unlock_vcpus_for_migration(kvm); > + kvm_unlock_all_vcpus(kvm); > out_dst_cgroup: > /* Operates on the source on success, on the destination on failure. */ > if (charged) > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 1dedc421b3e3..30cf28bf5c80 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -1015,6 +1015,12 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) > > void kvm_destroy_vcpus(struct kvm *kvm); > > +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role); > +void kvm_unlock_all_vcpus(struct kvm *kvm); > + > +#define kvm_lock_all_vcpus(kvm, trylock) \ > + kvm_lock_all_vcpus_nested(kvm, trylock, 0) > + Can you instead add lock / trylock variants of this? kvm_trylock_all_vcpus(kvm) seems a bit more obvious in the calling code. Thanks, Oliver
On Tue, Apr 08, 2025 at 09:41:34PM -0400, Maxim Levitsky wrote: > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 69782df3617f..71c0d8c35b4b 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -1368,6 +1368,77 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) > return 0; > } > > + > +/* > + * Lock all VM vCPUs. > + * Can be used nested (to lock vCPUS of two VMs for example) > + */ > +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role) > +{ > + struct kvm_vcpu *vcpu; > + unsigned long i, j; > + > + lockdep_assert_held(&kvm->lock); > + > + kvm_for_each_vcpu(i, vcpu, kvm) { > + > + if (trylock && !mutex_trylock_nested(&vcpu->mutex, role)) > + goto out_unlock; > + else if (!trylock && mutex_lock_killable_nested(&vcpu->mutex, role)) > + goto out_unlock; > + > +#ifdef CONFIG_PROVE_LOCKING > + if (!i) > + /* > + * Reset the role to one that avoids colliding with > + * the role used for the first vcpu mutex. > + */ > + role = MAX_LOCK_DEPTH - 1; > + else > + mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); > +#endif > + } This code is all sorts of terrible. Per the lockdep_assert_held() above, you serialize all these locks by holding that lock, this means you can be using the _nest_lock() annotation. Also, the original code didn't have this trylock nonsense, and the Changelog doesn't mention this -- in fact the Changelog claims no change, which is patently false. Anyway, please write like: kvm_for_each_vcpu(i, vcpu, kvm) { if (mutex_lock_killable_nest_lock(&vcpu->mutex, &kvm->lock)) goto unlock; } return 0; unlock: kvm_for_each_vcpu(j, vcpu, kvm) { if (j == i) break; mutex_unlock(&vcpu->mutex); } return -EINTR; And yes, you'll have to add mutex_lock_killable_nest_lock(), but that should be trivial.
On 4/10/25 10:16, Peter Zijlstra wrote: > On Tue, Apr 08, 2025 at 09:41:34PM -0400, Maxim Levitsky wrote: >> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c >> index 69782df3617f..71c0d8c35b4b 100644 >> --- a/virt/kvm/kvm_main.c >> +++ b/virt/kvm/kvm_main.c >> @@ -1368,6 +1368,77 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) >> return 0; >> } >> >> + >> +/* >> + * Lock all VM vCPUs. >> + * Can be used nested (to lock vCPUS of two VMs for example) >> + */ >> +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role) >> +{ >> + struct kvm_vcpu *vcpu; >> + unsigned long i, j; >> + >> + lockdep_assert_held(&kvm->lock); >> + >> + kvm_for_each_vcpu(i, vcpu, kvm) { >> + >> + if (trylock && !mutex_trylock_nested(&vcpu->mutex, role)) >> + goto out_unlock; >> + else if (!trylock && mutex_lock_killable_nested(&vcpu->mutex, role)) >> + goto out_unlock; >> + >> +#ifdef CONFIG_PROVE_LOCKING >> + if (!i) >> + /* >> + * Reset the role to one that avoids colliding with >> + * the role used for the first vcpu mutex. >> + */ >> + role = MAX_LOCK_DEPTH - 1; >> + else >> + mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); >> +#endif >> + } > > This code is all sorts of terrible. > > Per the lockdep_assert_held() above, you serialize all these locks by > holding that lock, this means you can be using the _nest_lock() > annotation. > > Also, the original code didn't have this trylock nonsense, and the > Changelog doesn't mention this -- in fact the Changelog claims no > change, which is patently false. > > Anyway, please write like: > > kvm_for_each_vcpu(i, vcpu, kvm) { > if (mutex_lock_killable_nest_lock(&vcpu->mutex, &kvm->lock)) > goto unlock; > } > > return 0; > > unlock: > > kvm_for_each_vcpu(j, vcpu, kvm) { > if (j == i) > break; > > mutex_unlock(&vcpu->mutex); > } > return -EINTR; > > And yes, you'll have to add mutex_lock_killable_nest_lock(), but that > should be trivial. If I understand correctly, that would be actually _mutex_lock_killable_nest_lock() plus a wrapper macro. But yes, that is easy so it sounds good. For the ARM case, which is the actual buggy one (it was complaining about too high a depth) it still needs mutex_trylock_nest_lock(); the nest_lock is needed to avoid bumping the depth on every mutex_trylock(). It should be something like diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 2143d05116be..328f573cab6d 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -174,6 +174,12 @@ do { \ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ } while (0) +#define mutex_trylock_nest_lock(lock, nest_lock) \ +do { \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ + _mutex_trylock_nest_lock(lock, &(nest_lock)->dep_map); \ +} while (0) + #else extern void mutex_lock(struct mutex *lock); extern int __must_check mutex_lock_interruptible(struct mutex *lock); @@ -185,6 +191,7 @@ extern void mutex_lock_io(struct mutex *lock); # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) # define mutex_lock_io_nested(lock, subclass) mutex_lock_io(lock) +# define mutex_trylock_nest_lock(lock, nest_lock) mutex_trylock(lock) #endif /* @@ -193,9 +200,14 @@ extern void mutex_lock_io(struct mutex *lock); * * Returns 1 if the mutex has been acquired successfully, and 0 on contention. */ -extern int mutex_trylock(struct mutex *lock); +extern int _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); extern void mutex_unlock(struct mutex *lock); +static inline int mutex_trylock(struct mutex *lock) +{ + return _mutex_trylock_nest_lock(lock, NULL); +} + extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 555e2b3a665a..d5d1e79495fc 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -1063,8 +1063,10 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, #endif /** - * mutex_trylock - try to acquire the mutex, without waiting + * _mutex_trylock_nest_lock - try to acquire the mutex, without waiting * @lock: the mutex to be acquired + * @nest_lock: if not NULL, a mutex that is always taken whenever multiple + * instances of @lock are * * Try to acquire the mutex atomically. Returns 1 if the mutex * has been acquired successfully, and 0 on contention. @@ -1076,7 +1078,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, * This function must not be used in interrupt context. The * mutex must be released by the same task that acquired it. */ -int __sched mutex_trylock(struct mutex *lock) +int __sched _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock) { bool locked; @@ -1084,11 +1086,11 @@ int __sched mutex_trylock(struct mutex *lock) locked = __mutex_trylock(lock); if (locked) - mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); + mutex_acquire_nest(&lock->dep_map, 0, 1, nest_lock, _RET_IP_); return locked; } -EXPORT_SYMBOL(mutex_trylock); +EXPORT_SYMBOL(_mutex_trylock_nest_lock); #ifndef CONFIG_DEBUG_LOCK_ALLOC int __sched Does that seem sane? Paolo
On Wed, Apr 16, 2025 at 07:48:00PM +0200, Paolo Bonzini wrote: > On 4/10/25 10:16, Peter Zijlstra wrote: > > On Tue, Apr 08, 2025 at 09:41:34PM -0400, Maxim Levitsky wrote: > > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > > > index 69782df3617f..71c0d8c35b4b 100644 > > > --- a/virt/kvm/kvm_main.c > > > +++ b/virt/kvm/kvm_main.c > > > @@ -1368,6 +1368,77 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) > > > return 0; > > > } > > > + > > > +/* > > > + * Lock all VM vCPUs. > > > + * Can be used nested (to lock vCPUS of two VMs for example) > > > + */ > > > +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role) > > > +{ > > > + struct kvm_vcpu *vcpu; > > > + unsigned long i, j; > > > + > > > + lockdep_assert_held(&kvm->lock); > > > + > > > + kvm_for_each_vcpu(i, vcpu, kvm) { > > > + > > > + if (trylock && !mutex_trylock_nested(&vcpu->mutex, role)) > > > + goto out_unlock; > > > + else if (!trylock && mutex_lock_killable_nested(&vcpu->mutex, role)) > > > + goto out_unlock; > > > + > > > +#ifdef CONFIG_PROVE_LOCKING > > > + if (!i) > > > + /* > > > + * Reset the role to one that avoids colliding with > > > + * the role used for the first vcpu mutex. > > > + */ > > > + role = MAX_LOCK_DEPTH - 1; > > > + else > > > + mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); > > > +#endif > > > + } > > > > This code is all sorts of terrible. > > > > Per the lockdep_assert_held() above, you serialize all these locks by > > holding that lock, this means you can be using the _nest_lock() > > annotation. > > > > Also, the original code didn't have this trylock nonsense, and the > > Changelog doesn't mention this -- in fact the Changelog claims no > > change, which is patently false. > > > > Anyway, please write like: > > > > kvm_for_each_vcpu(i, vcpu, kvm) { > > if (mutex_lock_killable_nest_lock(&vcpu->mutex, &kvm->lock)) > > goto unlock; > > } > > > > return 0; > > > > unlock: > > > > kvm_for_each_vcpu(j, vcpu, kvm) { > > if (j == i) > > break; > > > > mutex_unlock(&vcpu->mutex); > > } > > return -EINTR; > > > > And yes, you'll have to add mutex_lock_killable_nest_lock(), but that > > should be trivial. > > If I understand correctly, that would be actually > _mutex_lock_killable_nest_lock() plus a wrapper macro. But yes, > that is easy so it sounds good. > > For the ARM case, which is the actual buggy one (it was complaining > about too high a depth) it still needs mutex_trylock_nest_lock(); > the nest_lock is needed to avoid bumping the depth on every > mutex_trylock(). Got a link to the ARM code in question ? And I'm assuming you're talking about task_struct::lockdep_depth ? The nest lock annotation does not in fact increment depth beyond one of each type. It does a refcount like thing.
On Wed, Apr 16, 2025 at 8:50 PM Peter Zijlstra <peterz@infradead.org> wrote: > > For the ARM case, which is the actual buggy one (it was complaining > > about too high a depth) it still needs mutex_trylock_nest_lock(); > > the nest_lock is needed to avoid bumping the depth on every > > mutex_trylock(). > > Got a link to the ARM code in question ? lock_all_vcpus() in arch/arm64/kvm/arm.c: lockdep_assert_held(&kvm->lock); kvm_for_each_vcpu(c, tmp_vcpu, kvm) { if (!mutex_trylock(&tmp_vcpu->mutex)) { unlock_vcpus(kvm, c - 1); return false; } } > And I'm assuming you're talking about task_struct::lockdep_depth ? > The nest lock annotation does not in fact increment depth beyond > one of each type. It does a refcount like thing. Yes, exactly - mutex_trylock_nest_lock() is needed so that the code above counts per-lock instead of using the per-task depth. Paolo
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0bc708ee2788..7adc54b1f741 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1889,63 +1889,6 @@ enum sev_migration_role { SEV_NR_MIGRATION_ROLES, }; -static int sev_lock_vcpus_for_migration(struct kvm *kvm, - enum sev_migration_role role) -{ - struct kvm_vcpu *vcpu; - unsigned long i, j; - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (mutex_lock_killable_nested(&vcpu->mutex, role)) - goto out_unlock; - -#ifdef CONFIG_PROVE_LOCKING - if (!i) - /* - * Reset the role to one that avoids colliding with - * the role used for the first vcpu mutex. - */ - role = SEV_NR_MIGRATION_ROLES; - else - mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); -#endif - } - - return 0; - -out_unlock: - - kvm_for_each_vcpu(j, vcpu, kvm) { - if (i == j) - break; - -#ifdef CONFIG_PROVE_LOCKING - if (j) - mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_); -#endif - - mutex_unlock(&vcpu->mutex); - } - return -EINTR; -} - -static void sev_unlock_vcpus_for_migration(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - unsigned long i; - bool first = true; - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (first) - first = false; - else - mutex_acquire(&vcpu->mutex.dep_map, - SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_); - - mutex_unlock(&vcpu->mutex); - } -} - static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) { struct kvm_sev_info *dst = to_kvm_sev_info(dst_kvm); @@ -2083,10 +2026,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) charged = true; } - ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE); + ret = kvm_lock_all_vcpus_nested(kvm, false, SEV_MIGRATION_SOURCE); if (ret) goto out_dst_cgroup; - ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET); + ret = kvm_lock_all_vcpus_nested(source_kvm, false, SEV_MIGRATION_TARGET); if (ret) goto out_dst_vcpu; @@ -2100,9 +2043,9 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) ret = 0; out_source_vcpu: - sev_unlock_vcpus_for_migration(source_kvm); + kvm_unlock_all_vcpus(source_kvm); out_dst_vcpu: - sev_unlock_vcpus_for_migration(kvm); + kvm_unlock_all_vcpus(kvm); out_dst_cgroup: /* Operates on the source on success, on the destination on failure. */ if (charged) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1dedc421b3e3..30cf28bf5c80 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1015,6 +1015,12 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) void kvm_destroy_vcpus(struct kvm *kvm); +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role); +void kvm_unlock_all_vcpus(struct kvm *kvm); + +#define kvm_lock_all_vcpus(kvm, trylock) \ + kvm_lock_all_vcpus_nested(kvm, trylock, 0) + void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 69782df3617f..71c0d8c35b4b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1368,6 +1368,77 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) return 0; } + +/* + * Lock all VM vCPUs. + * Can be used nested (to lock vCPUS of two VMs for example) + */ +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role) +{ + struct kvm_vcpu *vcpu; + unsigned long i, j; + + lockdep_assert_held(&kvm->lock); + + kvm_for_each_vcpu(i, vcpu, kvm) { + + if (trylock && !mutex_trylock_nested(&vcpu->mutex, role)) + goto out_unlock; + else if (!trylock && mutex_lock_killable_nested(&vcpu->mutex, role)) + goto out_unlock; + +#ifdef CONFIG_PROVE_LOCKING + if (!i) + /* + * Reset the role to one that avoids colliding with + * the role used for the first vcpu mutex. + */ + role = MAX_LOCK_DEPTH - 1; + else + mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); +#endif + } + + return 0; + +out_unlock: + + kvm_for_each_vcpu(j, vcpu, kvm) { + if (i == j) + break; + +#ifdef CONFIG_PROVE_LOCKING + if (j) + mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_); +#endif + + mutex_unlock(&vcpu->mutex); + } + return -EINTR; +} +EXPORT_SYMBOL_GPL(kvm_lock_all_vcpus_nested); + +void kvm_unlock_all_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + unsigned long i; + bool first = true; + + lockdep_assert_held(&kvm->lock); + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (first) + first = false; + else + mutex_acquire(&vcpu->mutex.dep_map, + MAX_LOCK_DEPTH - 1, 0, _THIS_IP_); + + mutex_unlock(&vcpu->mutex); + } +} +EXPORT_SYMBOL_GPL(kvm_unlock_all_vcpus); + + /* * Allocation size is twice as large as the actual dirty bitmap size. * See kvm_vm_ioctl_get_dirty_log() why this is needed.
Move sev_lock/unlock_vcpus_for_migration to kvm_main and call the new functions the kvm_lock_all_vcpus/kvm_unlock_all_vcpus and kvm_lock_all_vcpus_nested. This code allows to lock all vCPUs without triggering lockdep warning about reaching MAX_LOCK_DEPTH depth by coercing the lockdep into thinking that we release all the locks other than vcpu'0 lock immediately after we take them. No functional change intended. Suggested-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> --- arch/x86/kvm/svm/sev.c | 65 +++--------------------------------- include/linux/kvm_host.h | 6 ++++ virt/kvm/kvm_main.c | 71 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 61 deletions(-)