diff mbox series

KVM: x86: switch pvclock_gtod_sync_lock to a raw spinlock

Message ID 1b02a06421c17993df337493a68ba923f3bd5c0f.camel@infradead.org (mailing list archive)
State New, archived
Headers show
Series KVM: x86: switch pvclock_gtod_sync_lock to a raw spinlock | expand

Commit Message

David Woodhouse Oct. 23, 2021, 8:29 p.m. UTC
From: David Woodhouse <dwmw@amazon.co.uk>

On the preemption path when updating a Xen guest's runstate times, this
lock is taken inside the scheduler rq->lock, which is a raw spinlock.
This was shown in a lockdep warning:

[   89.138354] =============================
[   89.138356] [ BUG: Invalid wait context ]
[   89.138358] 5.15.0-rc5+ #834 Tainted: G S        I E
[   89.138360] -----------------------------
[   89.138361] xen_shinfo_test/2575 is trying to lock:
[   89.138363] ffffa34a0364efd8 (&kvm->arch.pvclock_gtod_sync_lock){....}-{3:3}, at: get_kvmclock_ns+0x1f/0x130 [kvm]
[   89.138442] other info that might help us debug this:
[   89.138444] context-{5:5}
[   89.138445] 4 locks held by xen_shinfo_test/2575:
[   89.138447]  #0: ffff972bdc3b8108 (&vcpu->mutex){+.+.}-{4:4}, at: kvm_vcpu_ioctl+0x77/0x6f0 [kvm]
[   89.138483]  #1: ffffa34a03662e90 (&kvm->srcu){....}-{0:0}, at: kvm_arch_vcpu_ioctl_run+0xdc/0x8b0 [kvm]
[   89.138526]  #2: ffff97331fdbac98 (&rq->__lock){-.-.}-{2:2}, at: __schedule+0xff/0xbd0
[   89.138534]  #3: ffffa34a03662e90 (&kvm->srcu){....}-{0:0}, at: kvm_arch_vcpu_put+0x26/0x170 [kvm]
...
[   89.138695]  get_kvmclock_ns+0x1f/0x130 [kvm]
[   89.138734]  kvm_xen_update_runstate+0x14/0x90 [kvm]
[   89.138783]  kvm_xen_update_runstate_guest+0x15/0xd0 [kvm]
[   89.138830]  kvm_arch_vcpu_put+0xe6/0x170 [kvm]
[   89.138870]  kvm_sched_out+0x2f/0x40 [kvm]
[   89.138900]  __schedule+0x5de/0xbd0

Fixes: 30b5c851af79 ("KVM: x86/xen: Add support for vCPU runstate information")
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
> However, in 5.15-rc5 I'm still seeing the warning below when I run
> xen_shinfo_test. I confess I'm not entirely sure what it's telling
> me.

I think this is what it was telling me...
 
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/x86.c              | 28 ++++++++++++++--------------
 2 files changed, 15 insertions(+), 15 deletions(-)

Comments

Paolo Bonzini Oct. 25, 2021, 12:15 p.m. UTC | #1
On 23/10/21 22:29, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> On the preemption path when updating a Xen guest's runstate times, this
> lock is taken inside the scheduler rq->lock, which is a raw spinlock.
> This was shown in a lockdep warning:
> 
> [   89.138354] =============================
> [   89.138356] [ BUG: Invalid wait context ]
> [   89.138358] 5.15.0-rc5+ #834 Tainted: G S        I E
> [   89.138360] -----------------------------
> [   89.138361] xen_shinfo_test/2575 is trying to lock:
> [   89.138363] ffffa34a0364efd8 (&kvm->arch.pvclock_gtod_sync_lock){....}-{3:3}, at: get_kvmclock_ns+0x1f/0x130 [kvm]
> [   89.138442] other info that might help us debug this:
> [   89.138444] context-{5:5}
> [   89.138445] 4 locks held by xen_shinfo_test/2575:
> [   89.138447]  #0: ffff972bdc3b8108 (&vcpu->mutex){+.+.}-{4:4}, at: kvm_vcpu_ioctl+0x77/0x6f0 [kvm]
> [   89.138483]  #1: ffffa34a03662e90 (&kvm->srcu){....}-{0:0}, at: kvm_arch_vcpu_ioctl_run+0xdc/0x8b0 [kvm]
> [   89.138526]  #2: ffff97331fdbac98 (&rq->__lock){-.-.}-{2:2}, at: __schedule+0xff/0xbd0
> [   89.138534]  #3: ffffa34a03662e90 (&kvm->srcu){....}-{0:0}, at: kvm_arch_vcpu_put+0x26/0x170 [kvm]
> ...
> [   89.138695]  get_kvmclock_ns+0x1f/0x130 [kvm]
> [   89.138734]  kvm_xen_update_runstate+0x14/0x90 [kvm]
> [   89.138783]  kvm_xen_update_runstate_guest+0x15/0xd0 [kvm]
> [   89.138830]  kvm_arch_vcpu_put+0xe6/0x170 [kvm]
> [   89.138870]  kvm_sched_out+0x2f/0x40 [kvm]
> [   89.138900]  __schedule+0x5de/0xbd0
> 
> Fixes: 30b5c851af79 ("KVM: x86/xen: Add support for vCPU runstate information")
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
>> However, in 5.15-rc5 I'm still seeing the warning below when I run
>> xen_shinfo_test. I confess I'm not entirely sure what it's telling
>> me.
> 
> I think this is what it was telling me...

Yes, indeed - I will commit this to 5.15 (with a 'fake' merge commit to 
kvm/next to inform git that the pvclock_gtod_sync_lock is gone in 5.16 - 
and the replacement is already a raw spinlock for partly unrelated reasons).

Paolo

>   arch/x86/include/asm/kvm_host.h |  2 +-
>   arch/x86/kvm/x86.c              | 28 ++++++++++++++--------------
>   2 files changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index f8f48a7ec577..70771376e246 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1097,7 +1097,7 @@ struct kvm_arch {
>   	u64 cur_tsc_generation;
>   	int nr_vcpus_matched_tsc;
>   
> -	spinlock_t pvclock_gtod_sync_lock;
> +	raw_spinlock_t pvclock_gtod_sync_lock;
>   	bool use_master_clock;
>   	u64 master_kernel_ns;
>   	u64 master_cycle_now;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index aabd3a2ec1bc..f0874c3d12ce 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2542,7 +2542,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
>   	kvm_vcpu_write_tsc_offset(vcpu, offset);
>   	raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
>   
> -	spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
> +	raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
>   	if (!matched) {
>   		kvm->arch.nr_vcpus_matched_tsc = 0;
>   	} else if (!already_matched) {
> @@ -2550,7 +2550,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
>   	}
>   
>   	kvm_track_tsc_matching(vcpu);
> -	spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
> +	raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
>   }
>   
>   static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
> @@ -2780,9 +2780,9 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
>   	kvm_make_mclock_inprogress_request(kvm);
>   
>   	/* no guest entries from this point */
> -	spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
> +	raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
>   	pvclock_update_vm_gtod_copy(kvm);
> -	spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
> +	raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
>   
>   	kvm_for_each_vcpu(i, vcpu, kvm)
>   		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
> @@ -2800,15 +2800,15 @@ u64 get_kvmclock_ns(struct kvm *kvm)
>   	unsigned long flags;
>   	u64 ret;
>   
> -	spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
> +	raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
>   	if (!ka->use_master_clock) {
> -		spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
> +		raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
>   		return get_kvmclock_base_ns() + ka->kvmclock_offset;
>   	}
>   
>   	hv_clock.tsc_timestamp = ka->master_cycle_now;
>   	hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
> -	spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
> +	raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
>   
>   	/* both __this_cpu_read() and rdtsc() should be on the same cpu */
>   	get_cpu();
> @@ -2902,13 +2902,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>   	 * If the host uses TSC clock, then passthrough TSC as stable
>   	 * to the guest.
>   	 */
> -	spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
> +	raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
>   	use_master_clock = ka->use_master_clock;
>   	if (use_master_clock) {
>   		host_tsc = ka->master_cycle_now;
>   		kernel_ns = ka->master_kernel_ns;
>   	}
> -	spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
> +	raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
>   
>   	/* Keep irq disabled to prevent changes to the clock */
>   	local_irq_save(flags);
> @@ -6100,13 +6100,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
>   		 * is slightly ahead) here we risk going negative on unsigned
>   		 * 'system_time' when 'user_ns.clock' is very small.
>   		 */
> -		spin_lock_irq(&ka->pvclock_gtod_sync_lock);
> +		raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock);
>   		if (kvm->arch.use_master_clock)
>   			now_ns = ka->master_kernel_ns;
>   		else
>   			now_ns = get_kvmclock_base_ns();
>   		ka->kvmclock_offset = user_ns.clock - now_ns;
> -		spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
> +		raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
>   
>   		kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
>   		break;
> @@ -8139,9 +8139,9 @@ static void kvm_hyperv_tsc_notifier(void)
>   	list_for_each_entry(kvm, &vm_list, vm_list) {
>   		struct kvm_arch *ka = &kvm->arch;
>   
> -		spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
> +		raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
>   		pvclock_update_vm_gtod_copy(kvm);
> -		spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
> +		raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
>   
>   		kvm_for_each_vcpu(cpu, vcpu, kvm)
>   			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
> @@ -11182,7 +11182,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>   
>   	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
>   	mutex_init(&kvm->arch.apic_map_lock);
> -	spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
> +	raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
>   
>   	kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
>   	pvclock_update_vm_gtod_copy(kvm);
>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f8f48a7ec577..70771376e246 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1097,7 +1097,7 @@  struct kvm_arch {
 	u64 cur_tsc_generation;
 	int nr_vcpus_matched_tsc;
 
-	spinlock_t pvclock_gtod_sync_lock;
+	raw_spinlock_t pvclock_gtod_sync_lock;
 	bool use_master_clock;
 	u64 master_kernel_ns;
 	u64 master_cycle_now;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aabd3a2ec1bc..f0874c3d12ce 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2542,7 +2542,7 @@  static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
 	kvm_vcpu_write_tsc_offset(vcpu, offset);
 	raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
 
-	spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
+	raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
 	if (!matched) {
 		kvm->arch.nr_vcpus_matched_tsc = 0;
 	} else if (!already_matched) {
@@ -2550,7 +2550,7 @@  static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
 	}
 
 	kvm_track_tsc_matching(vcpu);
-	spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
+	raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
 }
 
 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
@@ -2780,9 +2780,9 @@  static void kvm_gen_update_masterclock(struct kvm *kvm)
 	kvm_make_mclock_inprogress_request(kvm);
 
 	/* no guest entries from this point */
-	spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+	raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
 	pvclock_update_vm_gtod_copy(kvm);
-	spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+	raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
 	kvm_for_each_vcpu(i, vcpu, kvm)
 		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -2800,15 +2800,15 @@  u64 get_kvmclock_ns(struct kvm *kvm)
 	unsigned long flags;
 	u64 ret;
 
-	spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+	raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
 	if (!ka->use_master_clock) {
-		spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+		raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 		return get_kvmclock_base_ns() + ka->kvmclock_offset;
 	}
 
 	hv_clock.tsc_timestamp = ka->master_cycle_now;
 	hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
-	spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+	raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
 	/* both __this_cpu_read() and rdtsc() should be on the same cpu */
 	get_cpu();
@@ -2902,13 +2902,13 @@  static int kvm_guest_time_update(struct kvm_vcpu *v)
 	 * If the host uses TSC clock, then passthrough TSC as stable
 	 * to the guest.
 	 */
-	spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+	raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
 	use_master_clock = ka->use_master_clock;
 	if (use_master_clock) {
 		host_tsc = ka->master_cycle_now;
 		kernel_ns = ka->master_kernel_ns;
 	}
-	spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+	raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
@@ -6100,13 +6100,13 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		 * is slightly ahead) here we risk going negative on unsigned
 		 * 'system_time' when 'user_ns.clock' is very small.
 		 */
-		spin_lock_irq(&ka->pvclock_gtod_sync_lock);
+		raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock);
 		if (kvm->arch.use_master_clock)
 			now_ns = ka->master_kernel_ns;
 		else
 			now_ns = get_kvmclock_base_ns();
 		ka->kvmclock_offset = user_ns.clock - now_ns;
-		spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
+		raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
 
 		kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
 		break;
@@ -8139,9 +8139,9 @@  static void kvm_hyperv_tsc_notifier(void)
 	list_for_each_entry(kvm, &vm_list, vm_list) {
 		struct kvm_arch *ka = &kvm->arch;
 
-		spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+		raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
 		pvclock_update_vm_gtod_copy(kvm);
-		spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+		raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
 		kvm_for_each_vcpu(cpu, vcpu, kvm)
 			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -11182,7 +11182,7 @@  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
 	mutex_init(&kvm->arch.apic_map_lock);
-	spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
+	raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
 
 	kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
 	pvclock_update_vm_gtod_copy(kvm);