diff mbox

[RFC,2/2] KVM: x86: use __kvm_guest_exit

Message ID 1466065297-4644-3-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paolo Bonzini June 16, 2016, 8:21 a.m. UTC
This gains ~20 clock cycles per vmexit.  On Intel there is no need
anymore to enable the interrupts in vmx_handle_external_intr, since we
are using the "acknowledge interrupt on exit" feature.  AMD needs to do
that temporarily, and must be careful to avoid the interrupt shadow.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c |  6 ++++++
 arch/x86/kvm/vmx.c |  4 +---
 arch/x86/kvm/x86.c | 11 ++---------
 3 files changed, 9 insertions(+), 12 deletions(-)

Comments

David Matlack June 16, 2016, 4:43 p.m. UTC | #1
On Thu, Jun 16, 2016 at 1:21 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> This gains ~20 clock cycles per vmexit.  On Intel there is no need
> anymore to enable the interrupts in vmx_handle_external_intr, since we
> are using the "acknowledge interrupt on exit" feature.  AMD needs to do
> that temporarily, and must be careful to avoid the interrupt shadow.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/svm.c |  6 ++++++
>  arch/x86/kvm/vmx.c |  4 +---
>  arch/x86/kvm/x86.c | 11 ++---------
>  3 files changed, 9 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 5ff292778110..5bfdbbf1ce79 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -4935,6 +4935,12 @@ out:
>  static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
>  {
>         local_irq_enable();
> +       /*
> +        * We must execute an instruction with interrupts enabled, so
> +        * the "cli" doesn't fall right on the interrupt shadow.
> +        */
> +       asm("nop");
> +       local_irq_disable();
>  }
>
>  static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 4e9657730bf6..a46bce9e3683 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -8544,7 +8544,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
>                         "push %[sp]\n\t"
>  #endif
>                         "pushf\n\t"
> -                       "orl $0x200, (%%" _ASM_SP ")\n\t"
>                         __ASM_SIZE(push) " $%c[cs]\n\t"
>                         "call *%[entry]\n\t"
>                         :
> @@ -8557,8 +8556,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
>                         [ss]"i"(__KERNEL_DS),
>                         [cs]"i"(__KERNEL_CS)
>                         );
> -       } else
> -               local_irq_enable();
> +       }

If you make the else case the same as svm_handle_external_intr, can we
avoid requiring ack-intr-on-exit?

>  }
>
>  static bool vmx_has_high_real_mode_segbase(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 7e3041ef050f..cc741b68139c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6706,21 +6706,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>
>         kvm_put_guest_xcr0(vcpu);
>
> -       /* Interrupt is enabled by handle_external_intr() */
>         kvm_x86_ops->handle_external_intr(vcpu);
>
>         ++vcpu->stat.exits;
>
> -       /*
> -        * We must have an instruction between local_irq_enable() and
> -        * kvm_guest_exit(), so the timer interrupt isn't delayed by
> -        * the interrupt shadow.  The stat.exits increment will do nicely.
> -        * But we need to prevent reordering, hence this barrier():
> -        */
> -       barrier();
> -
> -       kvm_guest_exit();
> +       __kvm_guest_exit();
>
> +       local_irq_enable();
>         preempt_enable();
>
>         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
> --
> 1.8.3.1
>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 16, 2016, 4:47 p.m. UTC | #2
On 16/06/2016 18:43, David Matlack wrote:
> On Thu, Jun 16, 2016 at 1:21 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> This gains ~20 clock cycles per vmexit.  On Intel there is no need
>> anymore to enable the interrupts in vmx_handle_external_intr, since we
>> are using the "acknowledge interrupt on exit" feature.  AMD needs to do
>> that temporarily, and must be careful to avoid the interrupt shadow.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  arch/x86/kvm/svm.c |  6 ++++++
>>  arch/x86/kvm/vmx.c |  4 +---
>>  arch/x86/kvm/x86.c | 11 ++---------
>>  3 files changed, 9 insertions(+), 12 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 5ff292778110..5bfdbbf1ce79 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -4935,6 +4935,12 @@ out:
>>  static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
>>  {
>>         local_irq_enable();
>> +       /*
>> +        * We must execute an instruction with interrupts enabled, so
>> +        * the "cli" doesn't fall right on the interrupt shadow.
>> +        */
>> +       asm("nop");
>> +       local_irq_disable();
>>  }
>>
>>  static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 4e9657730bf6..a46bce9e3683 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -8544,7 +8544,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
>>                         "push %[sp]\n\t"
>>  #endif
>>                         "pushf\n\t"
>> -                       "orl $0x200, (%%" _ASM_SP ")\n\t"
>>                         __ASM_SIZE(push) " $%c[cs]\n\t"
>>                         "call *%[entry]\n\t"
>>                         :
>> @@ -8557,8 +8556,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
>>                         [ss]"i"(__KERNEL_DS),
>>                         [cs]"i"(__KERNEL_CS)
>>                         );
>> -       } else
>> -               local_irq_enable();
>> +       }
> 
> If you make the else case the same as svm_handle_external_intr, can we
> avoid requiring ack-intr-on-exit?

Yes, but the sti/nop/cli would be useless if ack-intr-on-exit is
available.  It's a bit ugly, so I RFCed the bold thing instead.

Are you thinking of some distros in particular that lack nested
ack-intr-on-exit?  All processors have it as far as I know.

Paolo


>>  }
>>
>>  static bool vmx_has_high_real_mode_segbase(void)
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 7e3041ef050f..cc741b68139c 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -6706,21 +6706,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>
>>         kvm_put_guest_xcr0(vcpu);
>>
>> -       /* Interrupt is enabled by handle_external_intr() */
>>         kvm_x86_ops->handle_external_intr(vcpu);
>>
>>         ++vcpu->stat.exits;
>>
>> -       /*
>> -        * We must have an instruction between local_irq_enable() and
>> -        * kvm_guest_exit(), so the timer interrupt isn't delayed by
>> -        * the interrupt shadow.  The stat.exits increment will do nicely.
>> -        * But we need to prevent reordering, hence this barrier():
>> -        */
>> -       barrier();
>> -
>> -       kvm_guest_exit();
>> +       __kvm_guest_exit();
>>
>> +       local_irq_enable();
>>         preempt_enable();
>>
>>         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
>> --
>> 1.8.3.1
>>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Matlack June 16, 2016, 5:03 p.m. UTC | #3
On Thu, Jun 16, 2016 at 9:47 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 16/06/2016 18:43, David Matlack wrote:
>> On Thu, Jun 16, 2016 at 1:21 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>> This gains ~20 clock cycles per vmexit.  On Intel there is no need
>>> anymore to enable the interrupts in vmx_handle_external_intr, since we
>>> are using the "acknowledge interrupt on exit" feature.  AMD needs to do
>>> that temporarily, and must be careful to avoid the interrupt shadow.
>>>
>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>> ---
>>>  arch/x86/kvm/svm.c |  6 ++++++
>>>  arch/x86/kvm/vmx.c |  4 +---
>>>  arch/x86/kvm/x86.c | 11 ++---------
>>>  3 files changed, 9 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>> index 5ff292778110..5bfdbbf1ce79 100644
>>> --- a/arch/x86/kvm/svm.c
>>> +++ b/arch/x86/kvm/svm.c
>>> @@ -4935,6 +4935,12 @@ out:
>>>  static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
>>>  {
>>>         local_irq_enable();
>>> +       /*
>>> +        * We must execute an instruction with interrupts enabled, so
>>> +        * the "cli" doesn't fall right on the interrupt shadow.
>>> +        */
>>> +       asm("nop");
>>> +       local_irq_disable();
>>>  }
>>>
>>>  static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index 4e9657730bf6..a46bce9e3683 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -8544,7 +8544,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
>>>                         "push %[sp]\n\t"
>>>  #endif
>>>                         "pushf\n\t"
>>> -                       "orl $0x200, (%%" _ASM_SP ")\n\t"
>>>                         __ASM_SIZE(push) " $%c[cs]\n\t"
>>>                         "call *%[entry]\n\t"
>>>                         :
>>> @@ -8557,8 +8556,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
>>>                         [ss]"i"(__KERNEL_DS),
>>>                         [cs]"i"(__KERNEL_CS)
>>>                         );
>>> -       } else
>>> -               local_irq_enable();
>>> +       }
>>
>> If you make the else case the same as svm_handle_external_intr, can we
>> avoid requiring ack-intr-on-exit?
>
> Yes, but the sti/nop/cli would be useless if ack-intr-on-exit is
> available.  It's a bit ugly, so I RFCed the bold thing instead.

Ahh, and handle_external_intr is called on every VM-exit, not just
VM-exits caused by external interrupts. So we'd be doing the
sti/nop/cli quite often. I was thinking we never hit the else case
when the CPU supports ack-intr-on-exit.

>
> Are you thinking of some distros in particular that lack nested
> ack-intr-on-exit?  All processors have it as far as I know.

Nope, I just thought it was possible to avoid the requirement.

>
> Paolo
>
>
>>>  }
>>>
>>>  static bool vmx_has_high_real_mode_segbase(void)
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 7e3041ef050f..cc741b68139c 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -6706,21 +6706,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>>
>>>         kvm_put_guest_xcr0(vcpu);
>>>
>>> -       /* Interrupt is enabled by handle_external_intr() */
>>>         kvm_x86_ops->handle_external_intr(vcpu);
>>>
>>>         ++vcpu->stat.exits;
>>>
>>> -       /*
>>> -        * We must have an instruction between local_irq_enable() and
>>> -        * kvm_guest_exit(), so the timer interrupt isn't delayed by
>>> -        * the interrupt shadow.  The stat.exits increment will do nicely.
>>> -        * But we need to prevent reordering, hence this barrier():
>>> -        */
>>> -       barrier();
>>> -
>>> -       kvm_guest_exit();
>>> +       __kvm_guest_exit();
>>>
>>> +       local_irq_enable();
>>>         preempt_enable();
>>>
>>>         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
>>> --
>>> 1.8.3.1
>>>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 16, 2016, 5:21 p.m. UTC | #4
On 16/06/2016 19:03, David Matlack wrote:
> > > If you make the else case the same as svm_handle_external_intr, can we
> > > avoid requiring ack-intr-on-exit?
> >
> > Yes, but the sti/nop/cli would be useless if ack-intr-on-exit is
> > available.  It's a bit ugly, so I RFCed the bold thing instead.
>
> Ahh, and handle_external_intr is called on every VM-exit, not just
> VM-exits caused by external interrupts. So we'd be doing the
> sti/nop/cli quite often. I was thinking we never hit the else case
> when the CPU supports ack-intr-on-exit.

Actually it's really just aesthetics, because the sti and cli are pretty
cheap.  It's the pushf/popf that kills performance for kvm_guest_exit.
I also thought of just doing a cli/sti around __kvm_guest_exit and
calling it a day.

Ubuntu 14.04 had kernel 3.13, but the latest hardware enablement kernels
are as recent as 4.4.  And the most recent released RHEL (7.2) has all
the fixes too.

Debian Jessie has 3.16.7-ckt25, and all three patches for APICv support
have been backported to 3.16.7-ckt11.  So they should be there (but I
can only check tomorrow).

Paolo

>> >
>> > Are you thinking of some distros in particular that lack nested
>> > ack-intr-on-exit?  All processors have it as far as I know.
> Nope, I just thought it was possible to avoid the requirement.
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bandan Das June 16, 2016, 10:01 p.m. UTC | #5
...
>  static bool vmx_has_high_real_mode_segbase(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 7e3041ef050f..cc741b68139c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6706,21 +6706,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  
>  	kvm_put_guest_xcr0(vcpu);
>  
> -	/* Interrupt is enabled by handle_external_intr() */
>  	kvm_x86_ops->handle_external_intr(vcpu);
>  
>  	++vcpu->stat.exits;
>  
> -	/*
> -	 * We must have an instruction between local_irq_enable() and
> -	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
> -	 * the interrupt shadow.  The stat.exits increment will do nicely.
> -	 * But we need to prevent reordering, hence this barrier():
> -	 */
> -	barrier();
> -
> -	kvm_guest_exit();
> +	__kvm_guest_exit();

kvm_guest_exit has no more callers and so can be removed.

Bandan

> +	local_irq_enable();
>  	preempt_enable();
>  
>  	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 17, 2016, 5:20 a.m. UTC | #6
> >  static bool vmx_has_high_real_mode_segbase(void)
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 7e3041ef050f..cc741b68139c 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -6706,21 +6706,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >  
> >  	kvm_put_guest_xcr0(vcpu);
> >  
> > -	/* Interrupt is enabled by handle_external_intr() */
> >  	kvm_x86_ops->handle_external_intr(vcpu);
> >  
> >  	++vcpu->stat.exits;
> >  
> > -	/*
> > -	 * We must have an instruction between local_irq_enable() and
> > -	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
> > -	 * the interrupt shadow.  The stat.exits increment will do nicely.
> > -	 * But we need to prevent reordering, hence this barrier():
> > -	 */
> > -	barrier();
> > -
> > -	kvm_guest_exit();
> > +	__kvm_guest_exit();
> 
> kvm_guest_exit has no more callers and so can be removed.

ARM and PPC call it.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 5ff292778110..5bfdbbf1ce79 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4935,6 +4935,12 @@  out:
 static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
 {
 	local_irq_enable();
+	/*
+	 * We must execute an instruction with interrupts enabled, so
+	 * the "cli" doesn't fall right on the interrupt shadow.
+	 */
+	asm("nop");
+	local_irq_disable();
 }
 
 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4e9657730bf6..a46bce9e3683 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8544,7 +8544,6 @@  static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 			"push %[sp]\n\t"
 #endif
 			"pushf\n\t"
-			"orl $0x200, (%%" _ASM_SP ")\n\t"
 			__ASM_SIZE(push) " $%c[cs]\n\t"
 			"call *%[entry]\n\t"
 			:
@@ -8557,8 +8556,7 @@  static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 			[ss]"i"(__KERNEL_DS),
 			[cs]"i"(__KERNEL_CS)
 			);
-	} else
-		local_irq_enable();
+	}
 }
 
 static bool vmx_has_high_real_mode_segbase(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7e3041ef050f..cc741b68139c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6706,21 +6706,13 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	kvm_put_guest_xcr0(vcpu);
 
-	/* Interrupt is enabled by handle_external_intr() */
 	kvm_x86_ops->handle_external_intr(vcpu);
 
 	++vcpu->stat.exits;
 
-	/*
-	 * We must have an instruction between local_irq_enable() and
-	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
-	 * the interrupt shadow.  The stat.exits increment will do nicely.
-	 * But we need to prevent reordering, hence this barrier():
-	 */
-	barrier();
-
-	kvm_guest_exit();
+	__kvm_guest_exit();
 
+	local_irq_enable();
 	preempt_enable();
 
 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);