diff mbox

[v2] kvm: nVMX: Remove superfluous VMX instruction fault checks

Message ID 20170424152851.17976-1-jmattson@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jim Mattson April 24, 2017, 3:28 p.m. UTC
According to the Intel SDM, "Certain exceptions have priority over VM
exits. These include invalid-opcode exceptions, faults based on
privilege level*, and general-protection exceptions that are based on
checking I/O permission bits in the task-state segment (TSS)."

There is no need to check for faulting conditions that the hardware
has already checked.

One of the constraints on the VMX instructions is that they are not
allowed in real-address mode. Though the hardware checks for this
condition as well, when real-address mode is emulated, the faulting
condition does have to be checked in software.

* These include faults generated by attempts to execute, in
  virtual-8086 mode, privileged instructions that are not recognized
  in that mode.

Signed-off-by: Jim Mattson <jmattson@google.com>
---
 arch/x86/kvm/vmx.c | 57 ++++++++++++++----------------------------------------
 1 file changed, 15 insertions(+), 42 deletions(-)

Comments

David Hildenbrand April 25, 2017, 12:38 p.m. UTC | #1
On 24.04.2017 17:28, Jim Mattson wrote:
> According to the Intel SDM, "Certain exceptions have priority over VM
> exits. These include invalid-opcode exceptions, faults based on
> privilege level*, and general-protection exceptions that are based on
> checking I/O permission bits in the task-state segment (TSS)."
> 
> There is no need to check for faulting conditions that the hardware
> has already checked.
> 
> One of the constraints on the VMX instructions is that they are not
> allowed in real-address mode. Though the hardware checks for this
> condition as well, when real-address mode is emulated, the faulting
> condition does have to be checked in software.
> 
> * These include faults generated by attempts to execute, in
>   virtual-8086 mode, privileged instructions that are not recognized
>   in that mode.
> 
> Signed-off-by: Jim Mattson <jmattson@google.com>
> ---
>  arch/x86/kvm/vmx.c | 57 ++++++++++++++----------------------------------------
>  1 file changed, 15 insertions(+), 42 deletions(-)

Sounds perfectly sane to me and looks like a nice cleanup. Wonder if we
have a test to verify that the checks are actually done in HW. (I've
seen the strangest special cases related to virtualization mode in CPUs)

> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 259e9b28ccf8..e7a7edb4cdb0 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -7107,7 +7107,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
>  static int handle_vmon(struct kvm_vcpu *vcpu)
>  {
>  	int ret;
> -	struct kvm_segment cs;
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
>  		| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
> @@ -7115,26 +7114,17 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>  	/* The Intel VMX Instruction Reference lists a bunch of bits that
>  	 * are prerequisite to running VMXON, most notably cr4.VMXE must be
>  	 * set to 1 (see vmx_set_cr4() for when we allow the guest to set this).
> -	 * Otherwise, we should fail with #UD. We test these now:
> +	 * Otherwise, we should fail with #UD. Hardware has already tested
> +	 * most or all of these conditions, with the exception of real-address
> +	 * mode, when real-addresss mode is emulated.
>  	 */
> -	if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE) ||
> -	    !kvm_read_cr0_bits(vcpu, X86_CR0_PE) ||
> -	    (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
> -		kvm_queue_exception(vcpu, UD_VECTOR);
> -		return 1;
> -	}
>  
> -	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
> -	if (is_long_mode(vcpu) && !cs.l) {
> +	if ((!enable_unrestricted_guest &&
> +	     !kvm_read_cr0_bits(vcpu, X86_CR0_PE))) {
>  		kvm_queue_exception(vcpu, UD_VECTOR);
>  		return 1;
>  	}
>  
> -	if (vmx_get_cpl(vcpu)) {
> -		kvm_inject_gp(vcpu, 0);
> -		return 1;
> -	}
> -
>  	if (vmx->nested.vmxon) {
>  		nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
>  		return kvm_skip_emulated_instruction(vcpu);
> @@ -7161,30 +7151,18 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>   * Intel's VMX Instruction Reference specifies a common set of prerequisites
>   * for running VMX instructions (except VMXON, whose prerequisites are
>   * slightly different). It also specifies what exception to inject otherwise.
> + * Note that many of these exceptions have priority over VM exits, so they
> + * don't have to be checked again here.
>   */
> -static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
> +static bool nested_vmx_check_permission(struct kvm_vcpu *vcpu)

I'm a fan of splitting such changes out. (less noise for reviewing the
actual magic).
Jim Mattson April 25, 2017, 2:10 p.m. UTC | #2
On Tue, Apr 25, 2017 at 5:38 AM, David Hildenbrand <david@redhat.com> wrote:
> On 24.04.2017 17:28, Jim Mattson wrote:
>> According to the Intel SDM, "Certain exceptions have priority over VM
>> exits. These include invalid-opcode exceptions, faults based on
>> privilege level*, and general-protection exceptions that are based on
>> checking I/O permission bits in the task-state segment (TSS)."
>>
>> There is no need to check for faulting conditions that the hardware
>> has already checked.
>>
>> One of the constraints on the VMX instructions is that they are not
>> allowed in real-address mode. Though the hardware checks for this
>> condition as well, when real-address mode is emulated, the faulting
>> condition does have to be checked in software.
>>
>> * These include faults generated by attempts to execute, in
>>   virtual-8086 mode, privileged instructions that are not recognized
>>   in that mode.
>>
>> Signed-off-by: Jim Mattson <jmattson@google.com>
>> ---
>>  arch/x86/kvm/vmx.c | 57 ++++++++++++++----------------------------------------
>>  1 file changed, 15 insertions(+), 42 deletions(-)
>
> Sounds perfectly sane to me and looks like a nice cleanup. Wonder if we
> have a test to verify that the checks are actually done in HW. (I've
> seen the strangest special cases related to virtualization mode in CPUs)

I do have a compatibility-mode test for INVEPT that I can try to
crossport to the upstream kvm-unit-tests.

>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 259e9b28ccf8..e7a7edb4cdb0 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -7107,7 +7107,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
>>  static int handle_vmon(struct kvm_vcpu *vcpu)
>>  {
>>       int ret;
>> -     struct kvm_segment cs;
>>       struct vcpu_vmx *vmx = to_vmx(vcpu);
>>       const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
>>               | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
>> @@ -7115,26 +7114,17 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>>       /* The Intel VMX Instruction Reference lists a bunch of bits that
>>        * are prerequisite to running VMXON, most notably cr4.VMXE must be
>>        * set to 1 (see vmx_set_cr4() for when we allow the guest to set this).
>> -      * Otherwise, we should fail with #UD. We test these now:
>> +      * Otherwise, we should fail with #UD. Hardware has already tested
>> +      * most or all of these conditions, with the exception of real-address
>> +      * mode, when real-addresss mode is emulated.
>>        */
>> -     if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE) ||
>> -         !kvm_read_cr0_bits(vcpu, X86_CR0_PE) ||
>> -         (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
>> -             kvm_queue_exception(vcpu, UD_VECTOR);
>> -             return 1;
>> -     }
>>
>> -     vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
>> -     if (is_long_mode(vcpu) && !cs.l) {
>> +     if ((!enable_unrestricted_guest &&
>> +          !kvm_read_cr0_bits(vcpu, X86_CR0_PE))) {
>>               kvm_queue_exception(vcpu, UD_VECTOR);
>>               return 1;
>>       }
>>
>> -     if (vmx_get_cpl(vcpu)) {
>> -             kvm_inject_gp(vcpu, 0);
>> -             return 1;
>> -     }
>> -
>>       if (vmx->nested.vmxon) {
>>               nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
>>               return kvm_skip_emulated_instruction(vcpu);
>> @@ -7161,30 +7151,18 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>>   * Intel's VMX Instruction Reference specifies a common set of prerequisites
>>   * for running VMX instructions (except VMXON, whose prerequisites are
>>   * slightly different). It also specifies what exception to inject otherwise.
>> + * Note that many of these exceptions have priority over VM exits, so they
>> + * don't have to be checked again here.
>>   */
>> -static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
>> +static bool nested_vmx_check_permission(struct kvm_vcpu *vcpu)
>
> I'm a fan of splitting such changes out. (less noise for reviewing the
> actual magic).

Do you mean you'd like to see separate patches for handle_vmon() and
nested_vmx_check_permission()?

>
> --
>
> Thanks,
>
> David
David Hildenbrand April 25, 2017, 4:30 p.m. UTC | #3
>>> -
>>>       if (vmx->nested.vmxon) {
>>>               nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
>>>               return kvm_skip_emulated_instruction(vcpu);
>>> @@ -7161,30 +7151,18 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>>>   * Intel's VMX Instruction Reference specifies a common set of prerequisites
>>>   * for running VMX instructions (except VMXON, whose prerequisites are
>>>   * slightly different). It also specifies what exception to inject otherwise.
>>> + * Note that many of these exceptions have priority over VM exits, so they
>>> + * don't have to be checked again here.
>>>   */
>>> -static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
>>> +static bool nested_vmx_check_permission(struct kvm_vcpu *vcpu)
>>
>> I'm a fan of splitting such changes out. (less noise for reviewing the
>> actual magic).
> 
> Do you mean you'd like to see separate patches for handle_vmon() and
> nested_vmx_check_permission()?

I was talking of the int->bool. But that is just my preference for
reviewing. And by far not worth a resend :)

> 
>>
>> --
>>
>> Thanks,
>>
>> David
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 259e9b28ccf8..e7a7edb4cdb0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7107,7 +7107,6 @@  static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 static int handle_vmon(struct kvm_vcpu *vcpu)
 {
 	int ret;
-	struct kvm_segment cs;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
 		| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
@@ -7115,26 +7114,17 @@  static int handle_vmon(struct kvm_vcpu *vcpu)
 	/* The Intel VMX Instruction Reference lists a bunch of bits that
 	 * are prerequisite to running VMXON, most notably cr4.VMXE must be
 	 * set to 1 (see vmx_set_cr4() for when we allow the guest to set this).
-	 * Otherwise, we should fail with #UD. We test these now:
+	 * Otherwise, we should fail with #UD. Hardware has already tested
+	 * most or all of these conditions, with the exception of real-address
+	 * mode, when real-addresss mode is emulated.
 	 */
-	if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE) ||
-	    !kvm_read_cr0_bits(vcpu, X86_CR0_PE) ||
-	    (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 1;
-	}
 
-	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
-	if (is_long_mode(vcpu) && !cs.l) {
+	if ((!enable_unrestricted_guest &&
+	     !kvm_read_cr0_bits(vcpu, X86_CR0_PE))) {
 		kvm_queue_exception(vcpu, UD_VECTOR);
 		return 1;
 	}
 
-	if (vmx_get_cpl(vcpu)) {
-		kvm_inject_gp(vcpu, 0);
-		return 1;
-	}
-
 	if (vmx->nested.vmxon) {
 		nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
 		return kvm_skip_emulated_instruction(vcpu);
@@ -7161,30 +7151,18 @@  static int handle_vmon(struct kvm_vcpu *vcpu)
  * Intel's VMX Instruction Reference specifies a common set of prerequisites
  * for running VMX instructions (except VMXON, whose prerequisites are
  * slightly different). It also specifies what exception to inject otherwise.
+ * Note that many of these exceptions have priority over VM exits, so they
+ * don't have to be checked again here.
  */
-static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
+static bool nested_vmx_check_permission(struct kvm_vcpu *vcpu)
 {
-	struct kvm_segment cs;
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-
-	if (!vmx->nested.vmxon) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 0;
-	}
-
-	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
-	if ((vmx_get_rflags(vcpu) & X86_EFLAGS_VM) ||
-	    (is_long_mode(vcpu) && !cs.l)) {
+	if (!to_vmx(vcpu)->nested.vmxon ||
+	    (!enable_unrestricted_guest &&
+	     !kvm_read_cr0_bits(vcpu, X86_CR0_PE))) {
 		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 0;
-	}
-
-	if (vmx_get_cpl(vcpu)) {
-		kvm_inject_gp(vcpu, 0);
-		return 0;
+		return false;
 	}
-
-	return 1;
+	return true;
 }
 
 static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
@@ -7527,7 +7505,7 @@  static int handle_vmread(struct kvm_vcpu *vcpu)
 		if (get_vmx_mem_address(vcpu, exit_qualification,
 				vmx_instruction_info, true, &gva))
 			return 1;
-		/* _system ok, as nested_vmx_check_permission verified cpl=0 */
+		/* _system ok, as hardware has verified cpl=0 */
 		kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
 			     &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL);
 	}
@@ -7660,7 +7638,7 @@  static int handle_vmptrst(struct kvm_vcpu *vcpu)
 	if (get_vmx_mem_address(vcpu, exit_qualification,
 			vmx_instruction_info, true, &vmcs_gva))
 		return 1;
-	/* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */
+	/* ok to use *_system, as hardware has verified cpl=0 */
 	if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
 				 (void *)&to_vmx(vcpu)->nested.current_vmptr,
 				 sizeof(u64), &e)) {
@@ -7693,11 +7671,6 @@  static int handle_invept(struct kvm_vcpu *vcpu)
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 1;
-	}
-
 	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);