diff mbox

Fix almost infinite loop in APIC

Message ID 20090121150700.GA10018@amt.cnet (mailing list archive)
State Accepted, archived
Headers show

Commit Message

Marcelo Tosatti Jan. 21, 2009, 3:07 p.m. UTC
On Wed, Jan 21, 2009 at 01:11:23PM +0800, Sheng Yang wrote:
> Use ktime_to_ns() macro is better.
> 
> The remaining parts are fine with me. But please do more test. :)
> 
> Thanks for work!

Alexander, can you please confirm this works for you, thanks.


KVM: x86: fix LAPIC pending count calculation

Simplify LAPIC TMCCT calculation by using hrtimer provided
function to query remaining time until expiration.

Fixes host hang with nested ESX.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Alexander Graf Jan. 21, 2009, 4:01 p.m. UTC | #1
Marcelo Tosatti wrote:
> On Wed, Jan 21, 2009 at 01:11:23PM +0800, Sheng Yang wrote:
>   
>> Use ktime_to_ns() macro is better.
>>
>> The remaining parts are fine with me. But please do more test. :)
>>
>> Thanks for work!
>>     
>
> Alexander, can you please confirm this works for you, thanks.
>   

I don't seem to have hrtimer_expires_remaining in my 2.6.27 kernel.
Actually, 2.6.27 hrtimer doesn't even know have the _expires struct member.

Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf Jan. 21, 2009, 4:03 p.m. UTC | #2
Alexander Graf wrote:
> Marcelo Tosatti wrote:
>   
>> On Wed, Jan 21, 2009 at 01:11:23PM +0800, Sheng Yang wrote:
>>   
>>     
>>> Use ktime_to_ns() macro is better.
>>>
>>> The remaining parts are fine with me. But please do more test. :)
>>>
>>> Thanks for work!
>>>     
>>>       
>> Alexander, can you please confirm this works for you, thanks.
>>   
>>     
>
> I don't seem to have hrtimer_expires_remaining in my 2.6.27 kernel.
> Actually, 2.6.27 hrtimer doesn't even know have the _expires struct member.
>   

Scratch that, I'm trying to work around it. It has an "expires" struct
member.

Alex
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf Jan. 21, 2009, 4:18 p.m. UTC | #3
Marcelo Tosatti wrote:
> On Wed, Jan 21, 2009 at 01:11:23PM +0800, Sheng Yang wrote:
>   
>> Use ktime_to_ns() macro is better.
>>
>> The remaining parts are fine with me. But please do more test. :)
>>
>> Thanks for work!
>>     
>
> Alexander, can you please confirm this works for you, thanks.
>   

Looks good so far. I can't really tell if it's doing the right thing,
but at least it's not crashing the host anymore ;). And the VM does come
up (somewhat).

Signed-off-by: Alexander Graf <agraf@suse.de>

>
> KVM: x86: fix LAPIC pending count calculation
>
> Simplify LAPIC TMCCT calculation by using hrtimer provided
> function to query remaining time until expiration.
>
> Fixes host hang with nested ESX.
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
>
> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> index c019b8e..cf17ed5 100644
> --- a/arch/x86/kvm/irq.c
> +++ b/arch/x86/kvm/irq.c
> @@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
>  }
>  EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
>  
> -void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
> -{
> -	kvm_apic_timer_intr_post(vcpu, vec);
> -	/* TODO: PIT, RTC etc. */
> -}
> -EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
> -
>  void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
>  {
>  	__kvm_migrate_apic_timer(vcpu);
> diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
> index 2bf32a0..82579ee 100644
> --- a/arch/x86/kvm/irq.h
> +++ b/arch/x86/kvm/irq.h
> @@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
>  
>  void kvm_pic_reset(struct kvm_kpic_state *s);
>  
> -void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
>  void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
>  void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
>  void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index afac68c..d8adc50 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -511,52 +511,22 @@ static void apic_send_ipi(struct kvm_lapic *apic)
>  
>  static u32 apic_get_tmcct(struct kvm_lapic *apic)
>  {
> -	u64 counter_passed;
> -	ktime_t passed, now;
> +	ktime_t remaining;
> +	s64 ns;
>  	u32 tmcct;
>  
>  	ASSERT(apic != NULL);
>  
> -	now = apic->timer.dev.base->get_time();
> -	tmcct = apic_get_reg(apic, APIC_TMICT);
> -
>  	/* if initial count is 0, current count should also be 0 */
> -	if (tmcct == 0)
> +	if (apic_get_reg(apic, APIC_TMICT) == 0)
>  		return 0;
>  
> -	if (unlikely(ktime_to_ns(now) <=
> -		ktime_to_ns(apic->timer.last_update))) {
> -		/* Wrap around */
> -		passed = ktime_add(( {
> -				    (ktime_t) {
> -				    .tv64 = KTIME_MAX -
> -				    (apic->timer.last_update).tv64}; }
> -				   ), now);
> -		apic_debug("time elapsed\n");
> -	} else
> -		passed = ktime_sub(now, apic->timer.last_update);
> -
> -	counter_passed = div64_u64(ktime_to_ns(passed),
> -				   (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
> -
> -	if (counter_passed > tmcct) {
> -		if (unlikely(!apic_lvtt_period(apic))) {
> -			/* one-shot timers stick at 0 until reset */
> -			tmcct = 0;
> -		} else {
> -			/*
> -			 * periodic timers reset to APIC_TMICT when they
> -			 * hit 0. The while loop simulates this happening N
> -			 * times. (counter_passed %= tmcct) would also work,
> -			 * but might be slower or not work on 32-bit??
> -			 */
> -			while (counter_passed > tmcct)
> -				counter_passed -= tmcct;
> -			tmcct -= counter_passed;
> -		}
> -	} else {
> -		tmcct -= counter_passed;
> -	}
> +	remaining = hrtimer_expires_remaining(&apic->timer.dev);
> +	if (ktime_to_ns(remaining) < 0)
> +		remaining = ktime_set(0, 0);
> +
> +	ns = ktime_to_ns(remaining) % apic->timer.period;
> +	tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
>  
>  	return tmcct;
>  }
> @@ -653,8 +623,6 @@ static void start_apic_timer(struct kvm_lapic *apic)
>  {
>  	ktime_t now = apic->timer.dev.base->get_time();
>  
> -	apic->timer.last_update = now;
> -
>  	apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
>  		    APIC_BUS_CYCLE_NS * apic->timer.divide_count;
>  	atomic_set(&apic->timer.pending, 0);
> @@ -1110,16 +1078,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
>  	}
>  }
>  
> -void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
> -{
> -	struct kvm_lapic *apic = vcpu->arch.apic;
> -
> -	if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
> -		apic->timer.last_update = ktime_add_ns(
> -				apic->timer.last_update,
> -				apic->timer.period);
> -}
> -
>  int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
>  {
>  	int vector = kvm_apic_has_interrupt(vcpu);
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 8185888..45ab6ee 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -12,7 +12,6 @@ struct kvm_lapic {
>  		atomic_t pending;
>  		s64 period;	/* unit: ns */
>  		u32 divide_count;
> -		ktime_t last_update;
>  		struct hrtimer dev;
>  	} timer;
>  	struct kvm_vcpu *vcpu;
> @@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
>  void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
>  int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
>  int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
> -void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
>  
>  void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
>  void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 14e517e..db5021b 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -2305,7 +2305,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
>  	/* Okay, we can deliver the interrupt: grab it and update PIC state. */
>  	intr_vector = kvm_cpu_get_interrupt(vcpu);
>  	svm_inject_irq(svm, intr_vector);
> -	kvm_timer_intr_post(vcpu, intr_vector);
>  out:
>  	update_cr8_intercept(vcpu);
>  }
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 9b56d21..25aaf11 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3377,7 +3377,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
>  	}
>  	if (vcpu->arch.interrupt.pending) {
>  		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
> -		kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
>  		if (kvm_cpu_has_interrupt(vcpu))
>  			enable_irq_window(vcpu);
>  	}
>   

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 21, 2009, 4:55 p.m. UTC | #4
On Wed, Jan 21, 2009 at 05:18:00PM +0100, Alexander Graf wrote:
> Marcelo Tosatti wrote:
> > On Wed, Jan 21, 2009 at 01:11:23PM +0800, Sheng Yang wrote:
> >   
> >> Use ktime_to_ns() macro is better.
> >>
> >> The remaining parts are fine with me. But please do more test. :)
> >>
> >> Thanks for work!
> >>     
> >
> > Alexander, can you please confirm this works for you, thanks.
> >   
> 
> Looks good so far. I can't really tell if it's doing the right thing,
> but at least it's not crashing the host anymore ;). And the VM does come
> up (somewhat).

What might be going wrong there is if there are accumulated interrupts
to be injected (pending > 0). You might want to check that in
apic_get_tmcct.

> Signed-off-by: Alexander Graf <agraf@suse.de>

Thanks

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Jan. 22, 2009, 1:08 p.m. UTC | #5
Marcelo Tosatti wrote:
> On Wed, Jan 21, 2009 at 01:11:23PM +0800, Sheng Yang wrote:
>   
>> Use ktime_to_ns() macro is better.
>>
>> The remaining parts are fine with me. But please do more test. :)
>>
>> Thanks for work!
>>     
>
> Alexander, can you please confirm this works for you, thanks.
>
>
> KVM: x86: fix LAPIC pending count calculation
>
> Simplify LAPIC TMCCT calculation by using hrtimer provided
> function to query remaining time until expiration.
>
> Fixes host hang with nested ESX.
>
>   

Applied, thanks.
Alex Williamson Jan. 23, 2009, 5:58 p.m. UTC | #6
On Thu, 2009-01-22 at 15:08 +0200, Avi Kivity wrote:
> Marcelo Tosatti wrote:
> > On Wed, Jan 21, 2009 at 01:11:23PM +0800, Sheng Yang wrote:
> >   
> >> Use ktime_to_ns() macro is better.
> >>
> >> The remaining parts are fine with me. But please do more test. :)
> >>
> >> Thanks for work!
> >>     
> >
> > Alexander, can you please confirm this works for you, thanks.
> >
> >
> > KVM: x86: fix LAPIC pending count calculation
> >
> > Simplify LAPIC TMCCT calculation by using hrtimer provided
> > function to query remaining time until expiration.
> >
> > Fixes host hang with nested ESX.
> >
> >   
> 
> Applied, thanks.

This seems to introduce a "Unknown symbol __moddi3" in the kvm module on
a 32bit build.

Alex
diff mbox

Patch

diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index c019b8e..cf17ed5 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -87,13 +87,6 @@  void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
 
-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
-	kvm_apic_timer_intr_post(vcpu, vec);
-	/* TODO: PIT, RTC etc. */
-}
-EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
-
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	__kvm_migrate_apic_timer(vcpu);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2bf32a0..82579ee 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -89,7 +89,6 @@  static inline int irqchip_in_kernel(struct kvm *kvm)
 
 void kvm_pic_reset(struct kvm_kpic_state *s);
 
-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afac68c..d8adc50 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -511,52 +511,22 @@  static void apic_send_ipi(struct kvm_lapic *apic)
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
 {
-	u64 counter_passed;
-	ktime_t passed, now;
+	ktime_t remaining;
+	s64 ns;
 	u32 tmcct;
 
 	ASSERT(apic != NULL);
 
-	now = apic->timer.dev.base->get_time();
-	tmcct = apic_get_reg(apic, APIC_TMICT);
-
 	/* if initial count is 0, current count should also be 0 */
-	if (tmcct == 0)
+	if (apic_get_reg(apic, APIC_TMICT) == 0)
 		return 0;
 
-	if (unlikely(ktime_to_ns(now) <=
-		ktime_to_ns(apic->timer.last_update))) {
-		/* Wrap around */
-		passed = ktime_add(( {
-				    (ktime_t) {
-				    .tv64 = KTIME_MAX -
-				    (apic->timer.last_update).tv64}; }
-				   ), now);
-		apic_debug("time elapsed\n");
-	} else
-		passed = ktime_sub(now, apic->timer.last_update);
-
-	counter_passed = div64_u64(ktime_to_ns(passed),
-				   (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
-
-	if (counter_passed > tmcct) {
-		if (unlikely(!apic_lvtt_period(apic))) {
-			/* one-shot timers stick at 0 until reset */
-			tmcct = 0;
-		} else {
-			/*
-			 * periodic timers reset to APIC_TMICT when they
-			 * hit 0. The while loop simulates this happening N
-			 * times. (counter_passed %= tmcct) would also work,
-			 * but might be slower or not work on 32-bit??
-			 */
-			while (counter_passed > tmcct)
-				counter_passed -= tmcct;
-			tmcct -= counter_passed;
-		}
-	} else {
-		tmcct -= counter_passed;
-	}
+	remaining = hrtimer_expires_remaining(&apic->timer.dev);
+	if (ktime_to_ns(remaining) < 0)
+		remaining = ktime_set(0, 0);
+
+	ns = ktime_to_ns(remaining) % apic->timer.period;
+	tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
 
 	return tmcct;
 }
@@ -653,8 +623,6 @@  static void start_apic_timer(struct kvm_lapic *apic)
 {
 	ktime_t now = apic->timer.dev.base->get_time();
 
-	apic->timer.last_update = now;
-
 	apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
 		    APIC_BUS_CYCLE_NS * apic->timer.divide_count;
 	atomic_set(&apic->timer.pending, 0);
@@ -1110,16 +1078,6 @@  void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 	}
 }
 
-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
-	struct kvm_lapic *apic = vcpu->arch.apic;
-
-	if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
-		apic->timer.last_update = ktime_add_ns(
-				apic->timer.last_update,
-				apic->timer.period);
-}
-
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 {
 	int vector = kvm_apic_has_interrupt(vcpu);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 8185888..45ab6ee 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -12,7 +12,6 @@  struct kvm_lapic {
 		atomic_t pending;
 		s64 period;	/* unit: ns */
 		u32 divide_count;
-		ktime_t last_update;
 		struct hrtimer dev;
 	} timer;
 	struct kvm_vcpu *vcpu;
@@ -42,7 +41,6 @@  void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 
 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 14e517e..db5021b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2305,7 +2305,6 @@  static void svm_intr_assist(struct kvm_vcpu *vcpu)
 	/* Okay, we can deliver the interrupt: grab it and update PIC state. */
 	intr_vector = kvm_cpu_get_interrupt(vcpu);
 	svm_inject_irq(svm, intr_vector);
-	kvm_timer_intr_post(vcpu, intr_vector);
 out:
 	update_cr8_intercept(vcpu);
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9b56d21..25aaf11 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3377,7 +3377,6 @@  static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 	}
 	if (vcpu->arch.interrupt.pending) {
 		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
 		if (kvm_cpu_has_interrupt(vcpu))
 			enable_irq_window(vcpu);
 	}