diff mbox series

[v4,02/14] X86/nVMX: handle_vmptrld: Copy the VMCS12 directly from guest memory

Message ID 1543829467-18025-3-git-send-email-karahmed@amazon.de (mailing list archive)
State New, archived
Headers show
Series KVM/X86: Introduce a new guest mapping interface | expand

Commit Message

KarimAllah Ahmed Dec. 3, 2018, 9:30 a.m. UTC
Copy the VMCS12 directly from guest memory instead of the map->copy->unmap
sequence. This also avoids using kvm_vcpu_gpa_to_page() and kmap() which
assumes that there is a "struct page" for guest memory.

Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
---
v3 -> v4:
- Return VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID on failure (jmattson@)
v1 -> v2:
- Massage commit message a bit.
---
 arch/x86/kvm/vmx.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

Comments

David Hildenbrand Dec. 3, 2018, 12:59 p.m. UTC | #1
On 03.12.18 10:30, KarimAllah Ahmed wrote:
> Copy the VMCS12 directly from guest memory instead of the map->copy->unmap
> sequence. This also avoids using kvm_vcpu_gpa_to_page() and kmap() which
> assumes that there is a "struct page" for guest memory.
> 
> Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
> ---
> v3 -> v4:
> - Return VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID on failure (jmattson@)
> v1 -> v2:
> - Massage commit message a bit.
> ---
>  arch/x86/kvm/vmx.c | 24 ++++++++++++------------
>  1 file changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index b84f230..75817cb 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -9301,20 +9301,22 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
>  		return 1;
>  
>  	if (vmx->nested.current_vmptr != vmptr) {
> -		struct vmcs12 *new_vmcs12;
> -		struct page *page;
> -		page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
> -		if (is_error_page(page))
> -			return nested_vmx_failInvalid(vcpu);
> +		struct vmcs12 *new_vmcs12 = (struct vmcs12 *)__get_free_page(GFP_KERNEL);
> +
> +		if (!new_vmcs12 ||
> +		    kvm_read_guest(vcpu->kvm, vmptr, new_vmcs12,
> +				   sizeof(*new_vmcs12))) {
> +			free_page((unsigned long)new_vmcs12);
> +			return nested_vmx_failValid(vcpu,
> +						    VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
> +		}

If we fail to read, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID seems t be
the right thing to do (I remember that reading from non-existent memory
returns all 1's).

I somewhat dislike that we are converting -ENOMEM to
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID. Isn't there a way we can
report -ENOMEM to user space? (or do we really want to avoid crashing
the guest here? if so, comment + print a warning or something like that?)

>  
> -		new_vmcs12 = kmap(page);
>  		if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
>  		    (new_vmcs12->hdr.shadow_vmcs &&
>  		     !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
> -			kunmap(page);
> -			kvm_release_page_clean(page);
> +			free_page((unsigned long)new_vmcs12);
>  			return nested_vmx_failValid(vcpu,
> -				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
> +						    VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
>  		}
>  
>  		nested_release_vmcs12(vcpu);
> @@ -9324,9 +9326,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
>  		 * cached.
>  		 */
>  		memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
> -		kunmap(page);
> -		kvm_release_page_clean(page);
> -
> +		free_page((unsigned long)new_vmcs12);
>  		set_current_vmptr(vmx, vmptr);
>  	}
>  
>
Jim Mattson Dec. 5, 2018, 11:10 p.m. UTC | #2
On Mon, Dec 3, 2018 at 1:31 AM KarimAllah Ahmed <karahmed@amazon.de> wrote:
>
> Copy the VMCS12 directly from guest memory instead of the map->copy->unmap
> sequence. This also avoids using kvm_vcpu_gpa_to_page() and kmap() which
> assumes that there is a "struct page" for guest memory.
>
> Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
> ---
> v3 -> v4:
> - Return VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID on failure (jmattson@)
> v1 -> v2:
> - Massage commit message a bit.
> ---
>  arch/x86/kvm/vmx.c | 24 ++++++++++++------------
>  1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index b84f230..75817cb 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -9301,20 +9301,22 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
>                 return 1;
>
>         if (vmx->nested.current_vmptr != vmptr) {
> -               struct vmcs12 *new_vmcs12;
> -               struct page *page;
> -               page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
> -               if (is_error_page(page))
> -                       return nested_vmx_failInvalid(vcpu);
> +               struct vmcs12 *new_vmcs12 = (struct vmcs12 *)__get_free_page(GFP_KERNEL);
> +
> +               if (!new_vmcs12 ||
> +                   kvm_read_guest(vcpu->kvm, vmptr, new_vmcs12,
> +                                  sizeof(*new_vmcs12))) {

Isn't this a lot slower than kmap() when there is a struct page?

> +                       free_page((unsigned long)new_vmcs12);
> +                       return nested_vmx_failValid(vcpu,
> +                                                   VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);

I agree with David that this isn't the right thing to do for -ENOMEM.
Paolo Bonzini Dec. 21, 2018, 3:20 p.m. UTC | #3
On 06/12/18 00:10, Jim Mattson wrote:
> On Mon, Dec 3, 2018 at 1:31 AM KarimAllah Ahmed <karahmed@amazon.de> wrote:
>>
>> Copy the VMCS12 directly from guest memory instead of the map->copy->unmap
>> sequence. This also avoids using kvm_vcpu_gpa_to_page() and kmap() which
>> assumes that there is a "struct page" for guest memory.
>>
>> Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
>> ---
>> v3 -> v4:
>> - Return VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID on failure (jmattson@)
>> v1 -> v2:
>> - Massage commit message a bit.
>> ---
>>  arch/x86/kvm/vmx.c | 24 ++++++++++++------------
>>  1 file changed, 12 insertions(+), 12 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index b84f230..75817cb 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -9301,20 +9301,22 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
>>                 return 1;
>>
>>         if (vmx->nested.current_vmptr != vmptr) {
>> -               struct vmcs12 *new_vmcs12;
>> -               struct page *page;
>> -               page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
>> -               if (is_error_page(page))
>> -                       return nested_vmx_failInvalid(vcpu);
>> +               struct vmcs12 *new_vmcs12 = (struct vmcs12 *)__get_free_page(GFP_KERNEL);
>> +
>> +               if (!new_vmcs12 ||
>> +                   kvm_read_guest(vcpu->kvm, vmptr, new_vmcs12,
>> +                                  sizeof(*new_vmcs12))) {
> 
> Isn't this a lot slower than kmap() when there is a struct page?

It wouldn't be slower if he read directly into cached_vmcs12.  However,
as it is now, it's doing two reads instead of one.  By doing this, the
ENOMEM case also disappears.

Paolo
KarimAllah Ahmed Jan. 3, 2019, 2:22 p.m. UTC | #4
On Fri, 2018-12-21 at 16:20 +0100, Paolo Bonzini wrote:
> On 06/12/18 00:10, Jim Mattson wrote:
> > 
> > On Mon, Dec 3, 2018 at 1:31 AM KarimAllah Ahmed <karahmed@amazon.de> wrote:
> > > 
> > > 
> > > Copy the VMCS12 directly from guest memory instead of the map->copy->unmap
> > > sequence. This also avoids using kvm_vcpu_gpa_to_page() and kmap() which
> > > assumes that there is a "struct page" for guest memory.
> > > 
> > > Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
> > > ---
> > > v3 -> v4:
> > > - Return VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID on failure (jmattson@)
> > > v1 -> v2:
> > > - Massage commit message a bit.
> > > ---
> > >  arch/x86/kvm/vmx.c | 24 ++++++++++++------------
> > >  1 file changed, 12 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > > index b84f230..75817cb 100644
> > > --- a/arch/x86/kvm/vmx.c
> > > +++ b/arch/x86/kvm/vmx.c
> > > @@ -9301,20 +9301,22 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
> > >                 return 1;
> > > 
> > >         if (vmx->nested.current_vmptr != vmptr) {
> > > -               struct vmcs12 *new_vmcs12;
> > > -               struct page *page;
> > > -               page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
> > > -               if (is_error_page(page))
> > > -                       return nested_vmx_failInvalid(vcpu);
> > > +               struct vmcs12 *new_vmcs12 = (struct vmcs12 *)__get_free_page(GFP_KERNEL);
> > > +
> > > +               if (!new_vmcs12 ||
> > > +                   kvm_read_guest(vcpu->kvm, vmptr, new_vmcs12,
> > > +                                  sizeof(*new_vmcs12))) {
> > 
> > Isn't this a lot slower than kmap() when there is a struct page?
> 
> It wouldn't be slower if he read directly into cached_vmcs12.  However,
> as it is now, it's doing two reads instead of one.  By doing this, the
> ENOMEM case also disappears.

I can not use "cached_vmcs12" directly here because its old contents will be 
used for "nested_release_vmcs12(..)" a few lines below (i.e. it will be flushed 
into guest memory).

I can just switch this to the new API introduced a few patches later and that 
would have the same semantics for normal use-cases as before.

> 
> Paolo



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrer: Christian Schlaeger, Ralf Herbrich
Ust-ID: DE 289 237 879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b84f230..75817cb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9301,20 +9301,22 @@  static int handle_vmptrld(struct kvm_vcpu *vcpu)
 		return 1;
 
 	if (vmx->nested.current_vmptr != vmptr) {
-		struct vmcs12 *new_vmcs12;
-		struct page *page;
-		page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
-		if (is_error_page(page))
-			return nested_vmx_failInvalid(vcpu);
+		struct vmcs12 *new_vmcs12 = (struct vmcs12 *)__get_free_page(GFP_KERNEL);
+
+		if (!new_vmcs12 ||
+		    kvm_read_guest(vcpu->kvm, vmptr, new_vmcs12,
+				   sizeof(*new_vmcs12))) {
+			free_page((unsigned long)new_vmcs12);
+			return nested_vmx_failValid(vcpu,
+						    VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
+		}
 
-		new_vmcs12 = kmap(page);
 		if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
 		    (new_vmcs12->hdr.shadow_vmcs &&
 		     !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
-			kunmap(page);
-			kvm_release_page_clean(page);
+			free_page((unsigned long)new_vmcs12);
 			return nested_vmx_failValid(vcpu,
-				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
+						    VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
 		}
 
 		nested_release_vmcs12(vcpu);
@@ -9324,9 +9326,7 @@  static int handle_vmptrld(struct kvm_vcpu *vcpu)
 		 * cached.
 		 */
 		memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
-		kunmap(page);
-		kvm_release_page_clean(page);
-
+		free_page((unsigned long)new_vmcs12);
 		set_current_vmptr(vmx, vmptr);
 	}