diff mbox

[2/4] KVM: x86: refactor PIT state inject_lock

Message ID 1454516585-28491-3-git-send-email-rkrcmar@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Radim Krčmář Feb. 3, 2016, 4:23 p.m. UTC
Following patches would be even uglier if inject_lock didn't go away.

Patch changes the virtual wire comment to better describe our situation.

Signed-off-by: Radim Kr?má? <rkrcmar@redhat.com>
---
 arch/x86/kvm/i8254.c | 67 ++++++++++++++++++++++------------------------------
 arch/x86/kvm/i8254.h |  3 +--
 2 files changed, 29 insertions(+), 41 deletions(-)

Comments

Paolo Bonzini Feb. 3, 2016, 4:45 p.m. UTC | #1
On 03/02/2016 17:23, Radim Kr?má? wrote:
> Following patches would be even uglier if inject_lock didn't go away.
> 
> Patch changes the virtual wire comment to better describe our situation.
> 
> Signed-off-by: Radim Kr?má? <rkrcmar@redhat.com>
> ---
>  arch/x86/kvm/i8254.c | 67 ++++++++++++++++++++++------------------------------
>  arch/x86/kvm/i8254.h |  3 +--
>  2 files changed, 29 insertions(+), 41 deletions(-)
> 
> diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> index de5f5018026f..a137eb381012 100644
> --- a/arch/x86/kvm/i8254.c
> +++ b/arch/x86/kvm/i8254.c
> @@ -236,16 +236,13 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
>  {
>  	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
>  						 irq_ack_notifier);
> -	int value;
>  
> -	spin_lock(&ps->inject_lock);
> +	atomic_set(&ps->irq_ack, 1);

smp_mb__before_atomic();

>  	if (atomic_add_unless(&ps->pending, -1, 0))
>  		/* in this case, we had multiple outstanding pit interrupts
>  		 * that we needed to inject.  Reinject
>  		 */
>  		queue_kthread_work(&ps->pit->worker, &ps->pit->expired);
> -	ps->irq_ack = 1;
> -	spin_unlock(&ps->inject_lock);
>  }
>  
>  void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
> @@ -276,34 +273,25 @@ static void pit_do_work(struct kthread_work *work)
>  	struct kvm_vcpu *vcpu;
>  	int i;
>  	struct kvm_kpit_state *ps = &pit->pit_state;
> -	int inject = 0;
>  
> -	/* Try to inject pending interrupts when
> -	 * last one has been acked.
> +	if (!atomic_xchg(&ps->irq_ack, 0))
> +		return;
> +
> +	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
> +	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
> +
> +	/*
> +	 * Provides NMI watchdog support via Virtual Wire mode.
> +	 * The route is: PIT -> LVT0 in NMI mode.
> +	 *
> +	 * Note: Our Virtual Wire implementation does not follow
> +	 * the MP specification.  We propagate a PIT interrupt to all
> +	 * VCPUs and only when LVT0 is in NMI mode.  The interrupt can
> +	 * also be simultaneously delivered through PIC and IOAPIC.
>  	 */
> -	spin_lock(&ps->inject_lock);
> -	if (ps->irq_ack) {
> -		ps->irq_ack = 0;
> -		inject = 1;
> -	}
> -	spin_unlock(&ps->inject_lock);
> -	if (inject) {
> -		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
> -		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
> -
> -		/*
> -		 * Provides NMI watchdog support via Virtual Wire mode.
> -		 * The route is: PIT -> PIC -> LVT0 in NMI mode.
> -		 *
> -		 * Note: Our Virtual Wire implementation is simplified, only
> -		 * propagating PIT interrupts to all VCPUs when they have set
> -		 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
> -		 * VCPU0, and only if its LVT0 is in EXTINT mode.
> -		 */
> -		if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
> -			kvm_for_each_vcpu(i, vcpu, kvm)
> -				kvm_apic_nmi_wd_deliver(vcpu);
> -	}
> +	if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
> +		kvm_for_each_vcpu(i, vcpu, kvm)
> +			kvm_apic_nmi_wd_deliver(vcpu);
>  }
>  
>  static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
> @@ -323,6 +311,12 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
>  		return HRTIMER_NORESTART;
>  }
>  
> +static void kvm_pit_reset_reinject(struct kvm_pit *pit)
> +{
> +	atomic_set(&pit->pit_state.pending, 0);

smp_wmb()?

Looks safe otherwise.  (Please also add a comment before the memory
barriers to show the pairing).

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Feb. 4, 2016, 1:13 p.m. UTC | #2
2016-02-03 17:45+0100, Paolo Bonzini:
> On 03/02/2016 17:23, Radim Kr?má? wrote:
>> Following patches would be even uglier if inject_lock didn't go away.
>> 
>> Patch changes the virtual wire comment to better describe our situation.
>> 
>> Signed-off-by: Radim Kr?má? <rkrcmar@redhat.com>
>> ---
>> diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
>> @@ -236,16 +236,13 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
>>  {
>>  	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
>>  						 irq_ack_notifier);
>> -	int value;
>>  
>> -	spin_lock(&ps->inject_lock);

(Our code doesn't need barrier between dereferencing the pointer and
 reading its contents, and this bug is not possible happen on x86.)

>> +	atomic_set(&ps->irq_ack, 1);
> 
> smp_mb__before_atomic();

irq_ack has to be set before queueing pit_do_work, or could lose the
interrupt.
atomic_add_unless implies full barriers, so I think we're ok here.

>>  	if (atomic_add_unless(&ps->pending, -1, 0))
>>  		queue_kthread_work(&ps->pit->worker, &ps->pit->expired);
>>  void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
>> @@ -323,6 +311,12 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
>>  		return HRTIMER_NORESTART;
>>  }
>>  
>> +static void kvm_pit_reset_reinject(struct kvm_pit *pit)
>> +{
>> +	atomic_set(&pit->pit_state.pending, 0);
> 
> smp_wmb()?

I don't think that we need to ensure the order of pending and irq_ack
because there isn't a dependency between these two.  The reset is a slap
out of nowhere ... I think we'd need locks to make it behave correctly.

The worst that can happen is one virtual wire NMI injection when the
IOAPIC interrupt was in IRR and number of injections therefore won't
match. The race goes like this:

kvm_pit_ack_irq is running and we have pending > 0, so pit_do_work is
scheduled and executed.   pit_do_work sets irq_ack from 1 to 0.
Then pit_timer_fn gets executed, increases pending and queues
pit_do_work.  Before pit_do_work has a chance to run, we set pending=0
and irq_ack=1 in kvm_pit_reset_reinject.  pit_do_work is executed, sets
irq_ack from 1 to 0, but injects only the NMI, because interrupt is
already in IRR.  When the interrupt does EOI, we don't reinject it,
because pending==0.

Barriers can't solve this, locking would be pretty awkward and I think
that having one interrupt more or less is ok for the delay policy. :)

|  +	atomic_set(&pit->pit_state.irq_ack, 1);

Callers of kvm_pit_reset_reinject are providing barriers, so assignment
can't be reordered into inappropriate places, but it wouldn't hurt to
add barriers here.

> Looks safe otherwise.  (Please also add a comment before the memory
> barriers to show the pairing).

Thanks, I'll comment what I can.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index de5f5018026f..a137eb381012 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -236,16 +236,13 @@  static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
 {
 	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
 						 irq_ack_notifier);
-	int value;
 
-	spin_lock(&ps->inject_lock);
+	atomic_set(&ps->irq_ack, 1);
 	if (atomic_add_unless(&ps->pending, -1, 0))
 		/* in this case, we had multiple outstanding pit interrupts
 		 * that we needed to inject.  Reinject
 		 */
 		queue_kthread_work(&ps->pit->worker, &ps->pit->expired);
-	ps->irq_ack = 1;
-	spin_unlock(&ps->inject_lock);
 }
 
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -276,34 +273,25 @@  static void pit_do_work(struct kthread_work *work)
 	struct kvm_vcpu *vcpu;
 	int i;
 	struct kvm_kpit_state *ps = &pit->pit_state;
-	int inject = 0;
 
-	/* Try to inject pending interrupts when
-	 * last one has been acked.
+	if (!atomic_xchg(&ps->irq_ack, 0))
+		return;
+
+	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
+	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
+
+	/*
+	 * Provides NMI watchdog support via Virtual Wire mode.
+	 * The route is: PIT -> LVT0 in NMI mode.
+	 *
+	 * Note: Our Virtual Wire implementation does not follow
+	 * the MP specification.  We propagate a PIT interrupt to all
+	 * VCPUs and only when LVT0 is in NMI mode.  The interrupt can
+	 * also be simultaneously delivered through PIC and IOAPIC.
 	 */
-	spin_lock(&ps->inject_lock);
-	if (ps->irq_ack) {
-		ps->irq_ack = 0;
-		inject = 1;
-	}
-	spin_unlock(&ps->inject_lock);
-	if (inject) {
-		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
-		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
-
-		/*
-		 * Provides NMI watchdog support via Virtual Wire mode.
-		 * The route is: PIT -> PIC -> LVT0 in NMI mode.
-		 *
-		 * Note: Our Virtual Wire implementation is simplified, only
-		 * propagating PIT interrupts to all VCPUs when they have set
-		 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
-		 * VCPU0, and only if its LVT0 is in EXTINT mode.
-		 */
-		if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
-			kvm_for_each_vcpu(i, vcpu, kvm)
-				kvm_apic_nmi_wd_deliver(vcpu);
-	}
+	if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
+		kvm_for_each_vcpu(i, vcpu, kvm)
+			kvm_apic_nmi_wd_deliver(vcpu);
 }
 
 static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
@@ -323,6 +311,12 @@  static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 		return HRTIMER_NORESTART;
 }
 
+static void kvm_pit_reset_reinject(struct kvm_pit *pit)
+{
+	atomic_set(&pit->pit_state.pending, 0);
+	atomic_set(&pit->pit_state.irq_ack, 1);
+}
+
 static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 {
 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
@@ -345,8 +339,7 @@  static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 	ps->timer.function = pit_timer_fn;
 	ps->kvm = ps->pit->kvm;
 
-	atomic_set(&ps->pending, 0);
-	ps->irq_ack = 1;
+	kvm_pit_reset_reinject(ps->pit);
 
 	/*
 	 * Do not allow the guest to program periodic timers with small
@@ -646,18 +639,15 @@  void kvm_pit_reset(struct kvm_pit *pit)
 	}
 	mutex_unlock(&pit->pit_state.lock);
 
-	atomic_set(&pit->pit_state.pending, 0);
-	pit->pit_state.irq_ack = 1;
+	kvm_pit_reset_reinject(pit);
 }
 
 static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
 {
 	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
 
-	if (!mask) {
-		atomic_set(&pit->pit_state.pending, 0);
-		pit->pit_state.irq_ack = 1;
-	}
+	if (!mask)
+		kvm_pit_reset_reinject(pit);
 }
 
 static const struct kvm_io_device_ops pit_dev_ops = {
@@ -691,7 +681,6 @@  struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 
 	mutex_init(&pit->pit_state.lock);
 	mutex_lock(&pit->pit_state.lock);
-	spin_lock_init(&pit->pit_state.inject_lock);
 
 	pid = get_pid(task_tgid(current));
 	pid_nr = pid_vnr(pid);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index c84990b42b5b..f8cf4b84f435 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -33,8 +33,7 @@  struct kvm_kpit_state {
 	u32    speaker_data_on;
 	struct mutex lock;
 	struct kvm_pit *pit;
-	spinlock_t inject_lock;
-	unsigned long irq_ack;
+	atomic_t irq_ack;
 	struct kvm_irq_ack_notifier irq_ack_notifier;
 };