diff mbox series

[v2,6/7] KVM: lapic: Refactor start_hv_timer()'s semantics and usage

Message ID 20190416174857.506-7-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: lapic: Fix a variety of timer adv issues | expand

Commit Message

Sean Christopherson April 16, 2019, 5:48 p.m. UTC
start_hv_timer() currently returns a bool, with %false interpreted as
"failure" and %true as "success".  But the reality is far more complex,
as the %false is also returned when programming was successful, but
must be canceled because there was already a pending timer interrupt.
It can also return %false when programming was successful but the timer
was marked expired because the deadline already passed, in which case
the hv timer must also be canceled to avoid double injection.

Functionally, the existing code is correct in the sense that it doesn't
doing anything visibily wrong, e.g. generate spurious interrupts or miss
an interrupt.  But it's extremely confusing and inefficient, e.g. there
are multiple extraneous calls to apic_timer_expired() that effectively
get dropped due to @timer_pending being %true.

Refactor start_hv_timer() to return an 'int' and move the handling of
the various combinations of success vs. expiration vs. periodic up a
level to restart_apic_timer(), which eliminates the needs to overload
the meaning of %false.

Cc: Wanpeng Li <wanpengli@tencent.com>
Suggested-by: Liran Alon <liran.alon@oracle.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/lapic.c | 59 ++++++++++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 19 deletions(-)

Comments

Liran Alon April 16, 2019, 6:44 p.m. UTC | #1
> On 16 Apr 2019, at 20:48, Sean Christopherson <sean.j.christopherson@intel.com> wrote:
> 
> start_hv_timer() currently returns a bool, with %false interpreted as
> "failure" and %true as "success".  But the reality is far more complex,
> as the %false is also returned when programming was successful, but
> must be canceled because there was already a pending timer interrupt.
> It can also return %false when programming was successful but the timer
> was marked expired because the deadline already passed, in which case
> the hv timer must also be canceled to avoid double injection.
> 
> Functionally, the existing code is correct in the sense that it doesn't
> doing anything visibily wrong, e.g. generate spurious interrupts or miss
> an interrupt.  But it's extremely confusing and inefficient, e.g. there
> are multiple extraneous calls to apic_timer_expired() that effectively
> get dropped due to @timer_pending being %true.
> 
> Refactor start_hv_timer() to return an 'int' and move the handling of
> the various combinations of success vs. expiration vs. periodic up a
> level to restart_apic_timer(), which eliminates the needs to overload
> the meaning of %false.
> 
> Cc: Wanpeng Li <wanpengli@tencent.com>
> Suggested-by: Liran Alon <liran.alon@oracle.com>
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> ---
> arch/x86/kvm/lapic.c | 59 ++++++++++++++++++++++++++++++--------------
> 1 file changed, 40 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index fa2dbca5ad6a..b867569107d5 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1678,41 +1678,35 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
> 	apic->lapic_timer.hv_timer_in_use = false;
> }
> 
> -static bool start_hv_timer(struct kvm_lapic *apic)
> +/*
> + * Return:
> + *  Negative errno on failure,
> + *  '1' on success and the timer's deadline has passed,
> + *  '0' on success and the timer's deadline is some time in the future.
> + */
> +static int start_hv_timer(struct kvm_lapic *apic)
> {
> 	struct kvm_timer *ktimer = &apic->lapic_timer;
> 	int r;
> 
> 	WARN_ON(preemptible());
> 	if (!kvm_x86_ops->set_hv_timer)
> -		return false;
> +		return -ENOENT;
> 
> 	if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
> -		return false;
> +		return -EBUSY;
> 
> 	if (!ktimer->tscdeadline)
> -		return false;
> +		return -EINVAL;

These 2 upper “if” blocks can just be moved to start of restart_apic_timer() and early-return
in case any of them is true. 

In your code, start_hv_timer() will return r<0 which will cause restart_apic_timer() to call start_sw_timer()
which will then see one of these conditions and do nothing.
This is what I tried to warn about in the v1 email thread.

> 
> 	r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline);
> 	if (r < 0)
> -		return false;
> +		return r;
> 
> 	ktimer->hv_timer_in_use = true;
> 	hrtimer_cancel(&ktimer->timer);
> 
> -	/*
> -	 * Also recheck ktimer->pending, in case the sw timer triggered in
> -	 * the window.  For periodic timer, leave the hv timer running for
> -	 * simplicity, and the deadline will be recomputed on the next vmexit.
> -	 */
> -	if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) {
> -		if (r)
> -			apic_timer_expired(apic);
> -		return false;
> -	}
> -
> -	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true);
> -	return true;
> +	return r;
> }
> 
> static void start_sw_timer(struct kvm_lapic *apic)
> @@ -1734,9 +1728,36 @@ static void start_sw_timer(struct kvm_lapic *apic)
> 
> static void restart_apic_timer(struct kvm_lapic *apic)
> {
> +	int r;
> +
> 	preempt_disable();
> -	if (!start_hv_timer(apic))
> +	r = start_hv_timer(apic);
> +	if (r < 0) {
> 		start_sw_timer(apic);
> +	} else if (!apic_lvtt_period(apic)) {
> +		/*
> +		 * For a periodic timer, leave the timer running for simplicity
> +		 * so that the deadline will be recomputed on the next VM-Exit.
> +		 */
> +		if (atomic_read(&apic->lapic_timer.pending)) {
> +			/*
> +			 * Cancel the hv timer if the sw timer fired while the
> +			 * hv timer was being programmed.
> +			 */
> +			cancel_hv_timer(apic);
> +		} else if (r) {
> +			/*
> +			 * start_hv_timer() returns '1' if the timer's deadline
> +			 * has already passed.  Again, go through with the full
> +			 * programming in the periodic case as the VM-Exit is
> +			 * needed to recompute the periodic timer deadline.
> +			 */
> +			apic_timer_expired(apic);
> +			cancel_hv_timer(apic);
> +		}
> +	}
> +	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
> +				 apic->lapic_timer.hv_timer_in_use);
> 	preempt_enable();
> }
> 
> -- 
> 2.21.0
> 

I think I had a simpler patch.
I will try to send it and see what you think.

-Liran
Liran Alon April 16, 2019, 6:47 p.m. UTC | #2
> On 16 Apr 2019, at 21:44, Liran Alon <liran.alon@oracle.com> wrote:
> 
> 
> 
>> On 16 Apr 2019, at 20:48, Sean Christopherson <sean.j.christopherson@intel.com> wrote:
>> 
>> start_hv_timer() currently returns a bool, with %false interpreted as
>> "failure" and %true as "success".  But the reality is far more complex,
>> as the %false is also returned when programming was successful, but
>> must be canceled because there was already a pending timer interrupt.
>> It can also return %false when programming was successful but the timer
>> was marked expired because the deadline already passed, in which case
>> the hv timer must also be canceled to avoid double injection.
>> 
>> Functionally, the existing code is correct in the sense that it doesn't
>> doing anything visibily wrong, e.g. generate spurious interrupts or miss
>> an interrupt.  But it's extremely confusing and inefficient, e.g. there
>> are multiple extraneous calls to apic_timer_expired() that effectively
>> get dropped due to @timer_pending being %true.
>> 
>> Refactor start_hv_timer() to return an 'int' and move the handling of
>> the various combinations of success vs. expiration vs. periodic up a
>> level to restart_apic_timer(), which eliminates the needs to overload
>> the meaning of %false.
>> 
>> Cc: Wanpeng Li <wanpengli@tencent.com>
>> Suggested-by: Liran Alon <liran.alon@oracle.com>
>> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
>> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
>> ---
>> arch/x86/kvm/lapic.c | 59 ++++++++++++++++++++++++++++++--------------
>> 1 file changed, 40 insertions(+), 19 deletions(-)
>> 
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> index fa2dbca5ad6a..b867569107d5 100644
>> --- a/arch/x86/kvm/lapic.c
>> +++ b/arch/x86/kvm/lapic.c
>> @@ -1678,41 +1678,35 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
>> 	apic->lapic_timer.hv_timer_in_use = false;
>> }
>> 
>> -static bool start_hv_timer(struct kvm_lapic *apic)
>> +/*
>> + * Return:
>> + *  Negative errno on failure,
>> + *  '1' on success and the timer's deadline has passed,
>> + *  '0' on success and the timer's deadline is some time in the future.
>> + */
>> +static int start_hv_timer(struct kvm_lapic *apic)
>> {
>> 	struct kvm_timer *ktimer = &apic->lapic_timer;
>> 	int r;
>> 
>> 	WARN_ON(preemptible());
>> 	if (!kvm_x86_ops->set_hv_timer)
>> -		return false;
>> +		return -ENOENT;
>> 
>> 	if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
>> -		return false;
>> +		return -EBUSY;
>> 
>> 	if (!ktimer->tscdeadline)
>> -		return false;
>> +		return -EINVAL;
> 
> These 2 upper “if” blocks can just be moved to start of restart_apic_timer() and early-return
> in case any of them is true. 
> 
> In your code, start_hv_timer() will return r<0 which will cause restart_apic_timer() to call start_sw_timer()
> which will then see one of these conditions and do nothing.
> This is what I tried to warn about in the v1 email thread.
> 
>> 
>> 	r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline);
>> 	if (r < 0)
>> -		return false;
>> +		return r;
>> 
>> 	ktimer->hv_timer_in_use = true;
>> 	hrtimer_cancel(&ktimer->timer);
>> 
>> -	/*
>> -	 * Also recheck ktimer->pending, in case the sw timer triggered in
>> -	 * the window.  For periodic timer, leave the hv timer running for
>> -	 * simplicity, and the deadline will be recomputed on the next vmexit.
>> -	 */
>> -	if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) {
>> -		if (r)
>> -			apic_timer_expired(apic);
>> -		return false;
>> -	}
>> -
>> -	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true);
>> -	return true;
>> +	return r;
>> }
>> 
>> static void start_sw_timer(struct kvm_lapic *apic)
>> @@ -1734,9 +1728,36 @@ static void start_sw_timer(struct kvm_lapic *apic)
>> 
>> static void restart_apic_timer(struct kvm_lapic *apic)
>> {
>> +	int r;
>> +
>> 	preempt_disable();
>> -	if (!start_hv_timer(apic))
>> +	r = start_hv_timer(apic);
>> +	if (r < 0) {
>> 		start_sw_timer(apic);
>> +	} else if (!apic_lvtt_period(apic)) {
>> +		/*
>> +		 * For a periodic timer, leave the timer running for simplicity
>> +		 * so that the deadline will be recomputed on the next VM-Exit.
>> +		 */
>> +		if (atomic_read(&apic->lapic_timer.pending)) {
>> +			/*
>> +			 * Cancel the hv timer if the sw timer fired while the
>> +			 * hv timer was being programmed.
>> +			 */
>> +			cancel_hv_timer(apic);
>> +		} else if (r) {
>> +			/*
>> +			 * start_hv_timer() returns '1' if the timer's deadline
>> +			 * has already passed.  Again, go through with the full
>> +			 * programming in the periodic case as the VM-Exit is
>> +			 * needed to recompute the periodic timer deadline.
>> +			 */
>> +			apic_timer_expired(apic);
>> +			cancel_hv_timer(apic);
>> +		}
>> +	}
>> +	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
>> +				 apic->lapic_timer.hv_timer_in_use);
>> 	preempt_enable();
>> }
>> 
>> -- 
>> 2.21.0
>> 
> 
> I think I had a simpler patch.
> I will try to send it and see what you think.
> 
> -Liran

Not tested, but something like:

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index fbd37e07e90d..151d88833cfe 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1531,7 +1531,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
        unsigned long flags;
        ktime_t now;

-       if (unlikely(!tscdeadline || !this_tsc_khz))
+       if (unlikely(!this_tsc_khz))
                return;

        local_irq_save(flags);
@@ -1675,13 +1675,8 @@ static bool start_hv_timer(struct kvm_lapic *apic)
        int r;

        WARN_ON(preemptible());
-       if (!kvm_x86_ops->set_hv_timer)
-               return false;

-       if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
-               return false;
-
-       if (!ktimer->tscdeadline)
+       if (!kvm_x86_ops->set_hv_timer)
                return false;

        r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline);
@@ -1697,9 +1692,11 @@ static bool start_hv_timer(struct kvm_lapic *apic)
         * simplicity, and the deadline will be recomputed on the next vmexit.
         */
        if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) {
-               if (r)
+               if (r) {
                        apic_timer_expired(apic);
-               return false;
+                       ktimer->hv_timer_in_use = false;
+               }
+               return true;
        }

        trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true);
@@ -1708,13 +1705,9 @@ static bool start_hv_timer(struct kvm_lapic *apic)

 static void start_sw_timer(struct kvm_lapic *apic)
 {
-       struct kvm_timer *ktimer = &apic->lapic_timer;
-
        WARN_ON(preemptible());
        if (apic->lapic_timer.hv_timer_in_use)
                cancel_hv_timer(apic);
-       if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
-               return;

        if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
                start_sw_period(apic);
@@ -1725,9 +1718,20 @@ static void start_sw_timer(struct kvm_lapic *apic)

 static void restart_apic_timer(struct kvm_lapic *apic)
 {
+       struct kvm_timer *ktimer = &apic->lapic_timer;
+
        preempt_disable();
+
+       if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
+               goto out;
+
+       if (!ktimer->tscdeadline)
+               goto out;
+
        if (!start_hv_timer(apic))
                start_sw_timer(apic);
+
+out:
        preempt_enable();
 }

What do you think?

-Liran
diff mbox series

Patch

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index fa2dbca5ad6a..b867569107d5 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1678,41 +1678,35 @@  static void cancel_hv_timer(struct kvm_lapic *apic)
 	apic->lapic_timer.hv_timer_in_use = false;
 }
 
-static bool start_hv_timer(struct kvm_lapic *apic)
+/*
+ * Return:
+ *  Negative errno on failure,
+ *  '1' on success and the timer's deadline has passed,
+ *  '0' on success and the timer's deadline is some time in the future.
+ */
+static int start_hv_timer(struct kvm_lapic *apic)
 {
 	struct kvm_timer *ktimer = &apic->lapic_timer;
 	int r;
 
 	WARN_ON(preemptible());
 	if (!kvm_x86_ops->set_hv_timer)
-		return false;
+		return -ENOENT;
 
 	if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
-		return false;
+		return -EBUSY;
 
 	if (!ktimer->tscdeadline)
-		return false;
+		return -EINVAL;
 
 	r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline);
 	if (r < 0)
-		return false;
+		return r;
 
 	ktimer->hv_timer_in_use = true;
 	hrtimer_cancel(&ktimer->timer);
 
-	/*
-	 * Also recheck ktimer->pending, in case the sw timer triggered in
-	 * the window.  For periodic timer, leave the hv timer running for
-	 * simplicity, and the deadline will be recomputed on the next vmexit.
-	 */
-	if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) {
-		if (r)
-			apic_timer_expired(apic);
-		return false;
-	}
-
-	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true);
-	return true;
+	return r;
 }
 
 static void start_sw_timer(struct kvm_lapic *apic)
@@ -1734,9 +1728,36 @@  static void start_sw_timer(struct kvm_lapic *apic)
 
 static void restart_apic_timer(struct kvm_lapic *apic)
 {
+	int r;
+
 	preempt_disable();
-	if (!start_hv_timer(apic))
+	r = start_hv_timer(apic);
+	if (r < 0) {
 		start_sw_timer(apic);
+	} else if (!apic_lvtt_period(apic)) {
+		/*
+		 * For a periodic timer, leave the timer running for simplicity
+		 * so that the deadline will be recomputed on the next VM-Exit.
+		 */
+		if (atomic_read(&apic->lapic_timer.pending)) {
+			/*
+			 * Cancel the hv timer if the sw timer fired while the
+			 * hv timer was being programmed.
+			 */
+			cancel_hv_timer(apic);
+		} else if (r) {
+			/*
+			 * start_hv_timer() returns '1' if the timer's deadline
+			 * has already passed.  Again, go through with the full
+			 * programming in the periodic case as the VM-Exit is
+			 * needed to recompute the periodic timer deadline.
+			 */
+			apic_timer_expired(apic);
+			cancel_hv_timer(apic);
+		}
+	}
+	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
+				 apic->lapic_timer.hv_timer_in_use);
 	preempt_enable();
 }