diff mbox series

KVM: x86: Prevent deadlock against tk_core.seq

Message ID 87h7jgm1zy.ffs@nanos.tec.linutronix.de (mailing list archive)
State New, archived
Headers show
Series KVM: x86: Prevent deadlock against tk_core.seq | expand

Commit Message

Thomas Gleixner May 6, 2021, 1:21 p.m. UTC
syzbot reported a possible deadlock in pvclock_gtod_notify():

CPU 0  		  	   	    	    CPU 1
write_seqcount_begin(&tk_core.seq);
  pvclock_gtod_notify()			    spin_lock(&pool->lock);
    queue_work(..., &pvclock_gtod_work)	    ktime_get()
     spin_lock(&pool->lock);		      do {
     						seq = read_seqcount_begin(tk_core.seq)
						...
				              } while (read_seqcount_retry(&tk_core.seq, seq);

While this is unlikely to happen, it's possible.

Delegate queue_work() to irq_work() which postpones it until the
tk_core.seq write held region is left and interrupts are reenabled.

Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes")
Reported-by: syzbot+6beae4000559d41d80f8@syzkaller.appspotmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
Link: https://lore.kernel.org/r/0000000000001d43ac05c0f5c6a0@google.com
---
 arch/x86/kvm/x86.c |   22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

Comments

Peter Zijlstra May 6, 2021, 1:35 p.m. UTC | #1
On Thu, May 06, 2021 at 03:21:37PM +0200, Thomas Gleixner wrote:
> syzbot reported a possible deadlock in pvclock_gtod_notify():
> 
> CPU 0  		  	   	    	    CPU 1
> write_seqcount_begin(&tk_core.seq);
>   pvclock_gtod_notify()			    spin_lock(&pool->lock);
>     queue_work(..., &pvclock_gtod_work)	    ktime_get()
>      spin_lock(&pool->lock);		      do {
>      						seq = read_seqcount_begin(tk_core.seq)
> 						...
> 				              } while (read_seqcount_retry(&tk_core.seq, seq);
> 
> While this is unlikely to happen, it's possible.
> 
> Delegate queue_work() to irq_work() which postpones it until the
> tk_core.seq write held region is left and interrupts are reenabled.
> 
> Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes")
> Reported-by: syzbot+6beae4000559d41d80f8@syzkaller.appspotmail.com
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Paolo Bonzini May 6, 2021, 2:51 p.m. UTC | #2
On 06/05/21 15:21, Thomas Gleixner wrote:
> syzbot reported a possible deadlock in pvclock_gtod_notify():
> 
> CPU 0  		  	   	    	    CPU 1
> write_seqcount_begin(&tk_core.seq);
>    pvclock_gtod_notify()			    spin_lock(&pool->lock);
>      queue_work(..., &pvclock_gtod_work)	    ktime_get()
>       spin_lock(&pool->lock);		      do {
>       						seq = read_seqcount_begin(tk_core.seq)
> 						...
> 				              } while (read_seqcount_retry(&tk_core.seq, seq);
> 
> While this is unlikely to happen, it's possible.
> 
> Delegate queue_work() to irq_work() which postpones it until the
> tk_core.seq write held region is left and interrupts are reenabled.
> 
> Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes")
> Reported-by: syzbot+6beae4000559d41d80f8@syzkaller.appspotmail.com
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> Link: https://lore.kernel.org/r/0000000000001d43ac05c0f5c6a0@google.com
> ---
>   arch/x86/kvm/x86.c |   22 ++++++++++++++++++----
>   1 file changed, 18 insertions(+), 4 deletions(-)
> 
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8040,6 +8040,18 @@ static void pvclock_gtod_update_fn(struc
>   static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
>   
>   /*
> + * Indirection to move queue_work() out of the tk_core.seq write held
> + * region to prevent possible deadlocks against time accessors which
> + * are invoked with work related locks held.
> + */
> +static void pvclock_irq_work_fn(struct irq_work *w)
> +{
> +	queue_work(system_long_wq, &pvclock_gtod_work);
> +}
> +
> +static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
> +
> +/*
>    * Notification about pvclock gtod data update.
>    */
>   static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
> @@ -8050,13 +8062,14 @@ static int pvclock_gtod_notify(struct no
>   
>   	update_pvclock_gtod(tk);
>   
> -	/* disable master clock if host does not trust, or does not
> -	 * use, TSC based clocksource.
> +	/*
> +	 * Disable master clock if host does not trust, or does not use,
> +	 * TSC based clocksource. Delegate queue_work() to irq_work as
> +	 * this is invoked with tk_core.seq write held.
>   	 */
>   	if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
>   	    atomic_read(&kvm_guest_has_master_clock) != 0)
> -		queue_work(system_long_wq, &pvclock_gtod_work);
> -
> +		irq_work_queue(&pvclock_irq_work);
>   	return 0;
>   }
>   
> @@ -8168,6 +8181,7 @@ void kvm_arch_exit(void)
>   	cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
>   #ifdef CONFIG_X86_64
>   	pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
> +	irq_work_sync(&pvclock_irq_work);
>   	cancel_work_sync(&pvclock_gtod_work);
>   #endif
>   	kvm_x86_ops.hardware_enable = NULL;
> 

Queued, thanks.

Paolo
diff mbox series

Patch

--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8040,6 +8040,18 @@  static void pvclock_gtod_update_fn(struc
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
 
 /*
+ * Indirection to move queue_work() out of the tk_core.seq write held
+ * region to prevent possible deadlocks against time accessors which
+ * are invoked with work related locks held.
+ */
+static void pvclock_irq_work_fn(struct irq_work *w)
+{
+	queue_work(system_long_wq, &pvclock_gtod_work);
+}
+
+static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
+
+/*
  * Notification about pvclock gtod data update.
  */
 static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
@@ -8050,13 +8062,14 @@  static int pvclock_gtod_notify(struct no
 
 	update_pvclock_gtod(tk);
 
-	/* disable master clock if host does not trust, or does not
-	 * use, TSC based clocksource.
+	/*
+	 * Disable master clock if host does not trust, or does not use,
+	 * TSC based clocksource. Delegate queue_work() to irq_work as
+	 * this is invoked with tk_core.seq write held.
 	 */
 	if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
 	    atomic_read(&kvm_guest_has_master_clock) != 0)
-		queue_work(system_long_wq, &pvclock_gtod_work);
-
+		irq_work_queue(&pvclock_irq_work);
 	return 0;
 }
 
@@ -8168,6 +8181,7 @@  void kvm_arch_exit(void)
 	cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
 #ifdef CONFIG_X86_64
 	pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+	irq_work_sync(&pvclock_irq_work);
 	cancel_work_sync(&pvclock_gtod_work);
 #endif
 	kvm_x86_ops.hardware_enable = NULL;