diff mbox

KVM: nVMX: Fix memory corruption when using VMCS shadowing

Message ID 20160630222432.GA15431@jmattson.sea.corp.google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jim Mattson June 30, 2016, 10:24 p.m. UTC
In copy_shadow_to_vmcs12, a vCPU's shadow VMCS is temporarily loaded
on a logical processor as the current VMCS.  When the copy is
complete, the logical processor's previous current VMCS must be
restored.  However, the logical processor in question may not be the
one on which the vCPU is loaded (for instance, during kvm_vm_release,
when copy_shadow_to_vmcs12 is invoked on the same logical processor
for every vCPU in a VM).  The new functions __vmptrst and __vmptrld
are introduced to save the logical processor's current VMCS before the
copy and to restore it afterwards.

Note that copy_vmcs12_to_shadow does not suffer from this problem,
since it is only called from a context where the vCPU is loaded on the
logical processor.

Signed-off-by: Jim Mattson <jmattson@google.com>

---
 arch/x86/include/asm/vmx.h |  1 +
 arch/x86/kvm/vmx.c         | 27 +++++++++++++++++++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Paolo Bonzini July 1, 2016, 5:24 a.m. UTC | #1
On 01/07/2016 00:24, Jim Mattson wrote:
> In copy_shadow_to_vmcs12, a vCPU's shadow VMCS is temporarily loaded
> on a logical processor as the current VMCS.  When the copy is
> complete, the logical processor's previous current VMCS must be
> restored.  However, the logical processor in question may not be the
> one on which the vCPU is loaded (for instance, during kvm_vm_release,
> when copy_shadow_to_vmcs12 is invoked on the same logical processor
> for every vCPU in a VM).  The new functions __vmptrst and __vmptrld
> are introduced to save the logical processor's current VMCS before the
> copy and to restore it afterwards.
> 
> Note that copy_vmcs12_to_shadow does not suffer from this problem,
> since it is only called from a context where the vCPU is loaded on the
> logical processor.

Are you sure this is enough?  This in nested_release_vmcs12 assumes that
the L1 VMCS is loaded after copy_shadow_to_vmcs12:

        if (enable_shadow_vmcs) {
                /* copy to memory all shadowed fields in case
                   they were modified */
                copy_shadow_to_vmcs12(vmx);
                vmx->nested.sync_shadow_vmcs = false;
                vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
                                SECONDARY_EXEC_SHADOW_VMCS);
                vmcs_write64(VMCS_LINK_POINTER, -1ull);
        }

So perhaps it's the callers of nested_release_vmcs12 that have to ensure
vcpu_load/vcpu_put are called if necessary around the calls to
nested_release_vmcs12.  Only free_nested should need this.  The other
callers of nested_release_vmcs12 and copy_shadow_to_vmcs12 are vmexit
handlers and do have the problem.

Paolo

> Signed-off-by: Jim Mattson <jmattson@google.com>
> 
> ---
>  arch/x86/include/asm/vmx.h |  1 +
>  arch/x86/kvm/vmx.c         | 27 +++++++++++++++++++++++----
>  2 files changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 14c63c7..2d0548a 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -443,6 +443,7 @@ enum vmcs_field {
>  #define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
>  #define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
>  #define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
> +#define ASM_VMX_VMPTRST_RAX       ".byte 0x0f, 0xc7, 0x38"
>  #define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
>  #define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
>  #define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 003618e..c79868a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -1338,15 +1338,30 @@ static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
>  	loaded_vmcs->launched = 0;
>  }
>  
> -static void vmcs_load(struct vmcs *vmcs)
> +static inline u64 __vmptrst(void)
> +{
> +	u64 phys_addr;
> +
> +	asm volatile (__ex(ASM_VMX_VMPTRST_RAX)
> +			: : "a"(&phys_addr) : "cc", "memory");
> +	return phys_addr;
> +}
> +
> +static inline u8 __vmptrld(u64 phys_addr)
>  {
> -	u64 phys_addr = __pa(vmcs);
>  	u8 error;
>  
>  	asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
>  			: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
>  			: "cc", "memory");
> -	if (error)
> +	return error;
> +}
> +
> +static void vmcs_load(struct vmcs *vmcs)
> +{
> +	u64 phys_addr = __pa(vmcs);
> +
> +	if (__vmptrld(phys_addr))
>  		printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
>  		       vmcs, phys_addr);
>  }
> @@ -7136,12 +7151,14 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
>  	int i;
>  	unsigned long field;
>  	u64 field_value;
> +	u64 current_vmcs_pa;
>  	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
>  	const unsigned long *fields = shadow_read_write_fields;
>  	const int num_fields = max_shadow_read_write_fields;
>  
>  	preempt_disable();
>  
> +	current_vmcs_pa = __vmptrst();
>  	vmcs_load(shadow_vmcs);
>  
>  	for (i = 0; i < num_fields; i++) {
> @@ -7167,7 +7184,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
>  	}
>  
>  	vmcs_clear(shadow_vmcs);
> -	vmcs_load(vmx->loaded_vmcs->vmcs);
> +	if (current_vmcs_pa != -1ull)
> +		__vmptrld(current_vmcs_pa);
> +
>  
>  	preempt_enable();
>  }
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini July 1, 2016, 12:33 p.m. UTC | #2
On 01/07/2016 07:24, Paolo Bonzini wrote:
> So perhaps it's the callers of nested_release_vmcs12 that have to ensure
> vcpu_load/vcpu_put are called if necessary around the calls to
> nested_release_vmcs12.  Only free_nested should need this.  The other
> callers of nested_release_vmcs12 and copy_shadow_to_vmcs12 are vmexit
> handlers and do have the problem.

... do not have.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jim Mattson July 1, 2016, 4:17 p.m. UTC | #3
I agree. I'll have a revised proposal next week.

On Fri, Jul 1, 2016 at 5:33 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>
> On 01/07/2016 07:24, Paolo Bonzini wrote:
>> So perhaps it's the callers of nested_release_vmcs12 that have to ensure
>> vcpu_load/vcpu_put are called if necessary around the calls to
>> nested_release_vmcs12.  Only free_nested should need this.  The other
>> callers of nested_release_vmcs12 and copy_shadow_to_vmcs12 are vmexit
>> handlers and do have the problem.
>
> ... do not have.
>
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 14c63c7..2d0548a 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -443,6 +443,7 @@  enum vmcs_field {
 #define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
 #define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
 #define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMPTRST_RAX       ".byte 0x0f, 0xc7, 0x38"
 #define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
 #define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
 #define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 003618e..c79868a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1338,15 +1338,30 @@  static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
 	loaded_vmcs->launched = 0;
 }
 
-static void vmcs_load(struct vmcs *vmcs)
+static inline u64 __vmptrst(void)
+{
+	u64 phys_addr;
+
+	asm volatile (__ex(ASM_VMX_VMPTRST_RAX)
+			: : "a"(&phys_addr) : "cc", "memory");
+	return phys_addr;
+}
+
+static inline u8 __vmptrld(u64 phys_addr)
 {
-	u64 phys_addr = __pa(vmcs);
 	u8 error;
 
 	asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
 			: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
 			: "cc", "memory");
-	if (error)
+	return error;
+}
+
+static void vmcs_load(struct vmcs *vmcs)
+{
+	u64 phys_addr = __pa(vmcs);
+
+	if (__vmptrld(phys_addr))
 		printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
 		       vmcs, phys_addr);
 }
@@ -7136,12 +7151,14 @@  static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 	int i;
 	unsigned long field;
 	u64 field_value;
+	u64 current_vmcs_pa;
 	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
 	const unsigned long *fields = shadow_read_write_fields;
 	const int num_fields = max_shadow_read_write_fields;
 
 	preempt_disable();
 
+	current_vmcs_pa = __vmptrst();
 	vmcs_load(shadow_vmcs);
 
 	for (i = 0; i < num_fields; i++) {
@@ -7167,7 +7184,9 @@  static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 	}
 
 	vmcs_clear(shadow_vmcs);
-	vmcs_load(vmx->loaded_vmcs->vmcs);
+	if (current_vmcs_pa != -1ull)
+		__vmptrld(current_vmcs_pa);
+
 
 	preempt_enable();
 }