diff mbox series

[v3] KVM: VMX: Fix crash due to uninitialized current_vmcs

Message ID 20230123162929.9773-1-alexandru.matei@uipath.com (mailing list archive)
State New, archived
Headers show
Series [v3] KVM: VMX: Fix crash due to uninitialized current_vmcs | expand

Commit Message

Alexandru Matei Jan. 23, 2023, 4:29 p.m. UTC
KVM enables 'Enlightened VMCS' and 'Enlightened MSR Bitmap' when running as
a nested hypervisor on top of Hyper-V. When MSR bitmap is updated,
evmcs_touch_msr_bitmap function uses current_vmcs per-cpu variable to mark
that the msr bitmap was changed.

vmx_vcpu_create() modifies the msr bitmap via vmx_disable_intercept_for_msr
-> vmx_msr_bitmap_l01_changed which in the end calls this function. The
function checks for current_vmcs if it is null but the check is
insufficient because current_vmcs is not initialized. Because of this, the
code might incorrectly write to the structure pointed by current_vmcs value
left by another task. Preemption is not disabled, the current task can be
preempted and moved to another CPU while current_vmcs is accessed multiple
times from evmcs_touch_msr_bitmap() which leads to crash.

The manipulation of MSR bitmaps by callers happens only for vmcs01 so the
solution is to use vmx->vmcs01.vmcs instead of current_vmcs.

BUG: kernel NULL pointer dereference, address: 0000000000000338
PGD 4e1775067 P4D 0
Oops: 0002 [#1] PREEMPT SMP NOPTI
...
RIP: 0010:vmx_msr_bitmap_l01_changed+0x39/0x50 [kvm_intel]
...
Call Trace:
 vmx_disable_intercept_for_msr+0x36/0x260 [kvm_intel]
 vmx_vcpu_create+0xe6/0x540 [kvm_intel]
 ? __vmalloc_node+0x4a/0x70
 kvm_arch_vcpu_create+0x1d1/0x2e0 [kvm]
 kvm_vm_ioctl_create_vcpu+0x178/0x430 [kvm]
 ? __handle_mm_fault+0x3cb/0x750
 kvm_vm_ioctl+0x53f/0x790 [kvm]
 ? syscall_exit_work+0x11a/0x150
 ? syscall_exit_to_user_mode+0x12/0x30
 ? do_syscall_64+0x69/0x90
 ? handle_mm_fault+0xc5/0x2a0
 __x64_sys_ioctl+0x8a/0xc0
 do_syscall_64+0x5c/0x90
 ? exc_page_fault+0x62/0x150
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Alexandru Matei <alexandru.matei@uipath.com>
---
v3:
  - pass hv_enlightened_vmcs * directly

v2:
  - pass (e)vmcs01 to evmcs_touch_msr_bitmap
  - use loaded_vmcs * instead of vcpu_vmx * to avoid
    including vmx.h which generates circular dependency

 arch/x86/kvm/vmx/hyperv.h | 14 +++++++++-----
 arch/x86/kvm/vmx/vmx.c    |  2 +-
 2 files changed, 10 insertions(+), 6 deletions(-)

Comments

Vitaly Kuznetsov Jan. 23, 2023, 5 p.m. UTC | #1
Alexandru Matei <alexandru.matei@uipath.com> writes:

> KVM enables 'Enlightened VMCS' and 'Enlightened MSR Bitmap' when running as
> a nested hypervisor on top of Hyper-V. When MSR bitmap is updated,
> evmcs_touch_msr_bitmap function uses current_vmcs per-cpu variable to mark
> that the msr bitmap was changed.
>
> vmx_vcpu_create() modifies the msr bitmap via vmx_disable_intercept_for_msr
> -> vmx_msr_bitmap_l01_changed which in the end calls this function. The
> function checks for current_vmcs if it is null but the check is
> insufficient because current_vmcs is not initialized. Because of this, the
> code might incorrectly write to the structure pointed by current_vmcs value
> left by another task. Preemption is not disabled, the current task can be
> preempted and moved to another CPU while current_vmcs is accessed multiple
> times from evmcs_touch_msr_bitmap() which leads to crash.
>
> The manipulation of MSR bitmaps by callers happens only for vmcs01 so the
> solution is to use vmx->vmcs01.vmcs instead of current_vmcs.
>
> BUG: kernel NULL pointer dereference, address: 0000000000000338
> PGD 4e1775067 P4D 0
> Oops: 0002 [#1] PREEMPT SMP NOPTI
> ...
> RIP: 0010:vmx_msr_bitmap_l01_changed+0x39/0x50 [kvm_intel]
> ...
> Call Trace:
>  vmx_disable_intercept_for_msr+0x36/0x260 [kvm_intel]
>  vmx_vcpu_create+0xe6/0x540 [kvm_intel]
>  ? __vmalloc_node+0x4a/0x70
>  kvm_arch_vcpu_create+0x1d1/0x2e0 [kvm]
>  kvm_vm_ioctl_create_vcpu+0x178/0x430 [kvm]
>  ? __handle_mm_fault+0x3cb/0x750
>  kvm_vm_ioctl+0x53f/0x790 [kvm]
>  ? syscall_exit_work+0x11a/0x150
>  ? syscall_exit_to_user_mode+0x12/0x30
>  ? do_syscall_64+0x69/0x90
>  ? handle_mm_fault+0xc5/0x2a0
>  __x64_sys_ioctl+0x8a/0xc0
>  do_syscall_64+0x5c/0x90
>  ? exc_page_fault+0x62/0x150
>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> Suggested-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Alexandru Matei <alexandru.matei@uipath.com>
> ---
> v3:
>   - pass hv_enlightened_vmcs * directly
>
> v2:
>   - pass (e)vmcs01 to evmcs_touch_msr_bitmap
>   - use loaded_vmcs * instead of vcpu_vmx * to avoid
>     including vmx.h which generates circular dependency
>
>  arch/x86/kvm/vmx/hyperv.h | 14 +++++++++-----
>  arch/x86/kvm/vmx/vmx.c    |  2 +-
>  2 files changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h
> index 571e7929d14e..4ca6606e7a3b 100644
> --- a/arch/x86/kvm/vmx/hyperv.h
> +++ b/arch/x86/kvm/vmx/hyperv.h
> @@ -190,13 +190,17 @@ static inline u16 evmcs_read16(unsigned long field)
>  	return *(u16 *)((char *)current_evmcs + offset);
>  }
>  
> -static inline void evmcs_touch_msr_bitmap(void)
> +static inline void evmcs_touch_msr_bitmap(struct hv_enlightened_vmcs *evmcs)
>  {
> -	if (unlikely(!current_evmcs))
> +	/*
> +	 * Enlightened MSR Bitmap feature is enabled only for L1, i.e.
> +	 * always operates on evmcs01
> +	 */
> +	if (WARN_ON_ONCE(!evmcs))
>  		return;
>  
> -	if (current_evmcs->hv_enlightenments_control.msr_bitmap)
> -		current_evmcs->hv_clean_fields &=
> +	if (evmcs->hv_enlightenments_control.msr_bitmap)
> +		evmcs->hv_clean_fields &=
>  			~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
>  }
>  
> @@ -219,7 +223,7 @@ static inline u64 evmcs_read64(unsigned long field) { return 0; }
>  static inline u32 evmcs_read32(unsigned long field) { return 0; }
>  static inline u16 evmcs_read16(unsigned long field) { return 0; }
>  static inline void evmcs_load(u64 phys_addr) {}
> -static inline void evmcs_touch_msr_bitmap(void) {}
> +static inline void evmcs_touch_msr_bitmap(struct hv_enlightened_vmcs *evmcs) {}
>  #endif /* IS_ENABLED(CONFIG_HYPERV) */
>  
>  #define EVMPTR_INVALID (-1ULL)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index fe5615fd8295..1d482a80bca8 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -3869,7 +3869,7 @@ static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
>  	 * bitmap has changed.
>  	 */
>  	if (static_branch_unlikely(&enable_evmcs))
> -		evmcs_touch_msr_bitmap();
> +		evmcs_touch_msr_bitmap((struct hv_enlightened_vmcs *)vmx->vmcs01.vmcs);
>  
>  	vmx->nested.force_msr_bitmap_recalc = true;
>  }

Just in case we decide to follow this path and not merge
evmcs_touch_msr_bitmap() into vmx_msr_bitmap_l01_changed():

Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Sean Christopherson Jan. 23, 2023, 7 p.m. UTC | #2
On Mon, Jan 23, 2023, Vitaly Kuznetsov wrote:
> Alexandru Matei <alexandru.matei@uipath.com> writes:
> 
> > KVM enables 'Enlightened VMCS' and 'Enlightened MSR Bitmap' when running as
> > a nested hypervisor on top of Hyper-V. When MSR bitmap is updated,
> > evmcs_touch_msr_bitmap function uses current_vmcs per-cpu variable to mark
> > that the msr bitmap was changed.

...

> > @@ -219,7 +223,7 @@ static inline u64 evmcs_read64(unsigned long field) { return 0; }
> >  static inline u32 evmcs_read32(unsigned long field) { return 0; }
> >  static inline u16 evmcs_read16(unsigned long field) { return 0; }
> >  static inline void evmcs_load(u64 phys_addr) {}
> > -static inline void evmcs_touch_msr_bitmap(void) {}
> > +static inline void evmcs_touch_msr_bitmap(struct hv_enlightened_vmcs *evmcs) {}
> >  #endif /* IS_ENABLED(CONFIG_HYPERV) */
> >  
> >  #define EVMPTR_INVALID (-1ULL)
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index fe5615fd8295..1d482a80bca8 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -3869,7 +3869,7 @@ static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
> >  	 * bitmap has changed.
> >  	 */
> >  	if (static_branch_unlikely(&enable_evmcs))
> > -		evmcs_touch_msr_bitmap();
> > +		evmcs_touch_msr_bitmap((struct hv_enlightened_vmcs *)vmx->vmcs01.vmcs);
> >  
> >  	vmx->nested.force_msr_bitmap_recalc = true;
> >  }
> 
> Just in case we decide to follow this path and not merge
> evmcs_touch_msr_bitmap() into vmx_msr_bitmap_l01_changed():

This is the only approach that I'm outright opposed to.  The evmcs_touch_msr_bitmap()
stub is a lie in that it should never be reached with CONFIG_HYPERV=n, i.e. should
really WARN.  Ditto for the WARN_ON_ONCE() in the actual helper; if vmx->vmcs01.vmcs
is NULL then KVM is completely hosed.

KVM already consumes hv_enlightenments_control.msr_bitmap in vmx.c and in nested.c,
shoving this case into hyperv.h but leaving those in VMX proper is odd/kludgy.
Alexandru Matei Jan. 23, 2023, 8:03 p.m. UTC | #3
On 1/23/2023 9:00 PM, Sean Christopherson wrote:
> On Mon, Jan 23, 2023, Vitaly Kuznetsov wrote:
>> Alexandru Matei <alexandru.matei@uipath.com> writes:
>>
>>> KVM enables 'Enlightened VMCS' and 'Enlightened MSR Bitmap' when running as
>>> a nested hypervisor on top of Hyper-V. When MSR bitmap is updated,
>>> evmcs_touch_msr_bitmap function uses current_vmcs per-cpu variable to mark
>>> that the msr bitmap was changed.
> 
> ...
> 
>>> @@ -219,7 +223,7 @@ static inline u64 evmcs_read64(unsigned long field) { return 0; }
>>>  static inline u32 evmcs_read32(unsigned long field) { return 0; }
>>>  static inline u16 evmcs_read16(unsigned long field) { return 0; }
>>>  static inline void evmcs_load(u64 phys_addr) {}
>>> -static inline void evmcs_touch_msr_bitmap(void) {}
>>> +static inline void evmcs_touch_msr_bitmap(struct hv_enlightened_vmcs *evmcs) {}
>>>  #endif /* IS_ENABLED(CONFIG_HYPERV) */
>>>  
>>>  #define EVMPTR_INVALID (-1ULL)
>>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>>> index fe5615fd8295..1d482a80bca8 100644
>>> --- a/arch/x86/kvm/vmx/vmx.c
>>> +++ b/arch/x86/kvm/vmx/vmx.c
>>> @@ -3869,7 +3869,7 @@ static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
>>>  	 * bitmap has changed.
>>>  	 */
>>>  	if (static_branch_unlikely(&enable_evmcs))
>>> -		evmcs_touch_msr_bitmap();
>>> +		evmcs_touch_msr_bitmap((struct hv_enlightened_vmcs *)vmx->vmcs01.vmcs);
>>>  
>>>  	vmx->nested.force_msr_bitmap_recalc = true;
>>>  }
>>
>> Just in case we decide to follow this path and not merge
>> evmcs_touch_msr_bitmap() into vmx_msr_bitmap_l01_changed():
> 
> This is the only approach that I'm outright opposed to.  The evmcs_touch_msr_bitmap()
> stub is a lie in that it should never be reached with CONFIG_HYPERV=n, i.e. should
> really WARN.  Ditto for the WARN_ON_ONCE() in the actual helper; if vmx->vmcs01.vmcs
> is NULL then KVM is completely hosed.
> 
> KVM already consumes hv_enlightenments_control.msr_bitmap in vmx.c and in nested.c,
> shoving this case into hyperv.h but leaving those in VMX proper is odd/kludgy.

Got it, I'll send a v4 with your patch.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h
index 571e7929d14e..4ca6606e7a3b 100644
--- a/arch/x86/kvm/vmx/hyperv.h
+++ b/arch/x86/kvm/vmx/hyperv.h
@@ -190,13 +190,17 @@  static inline u16 evmcs_read16(unsigned long field)
 	return *(u16 *)((char *)current_evmcs + offset);
 }
 
-static inline void evmcs_touch_msr_bitmap(void)
+static inline void evmcs_touch_msr_bitmap(struct hv_enlightened_vmcs *evmcs)
 {
-	if (unlikely(!current_evmcs))
+	/*
+	 * Enlightened MSR Bitmap feature is enabled only for L1, i.e.
+	 * always operates on evmcs01
+	 */
+	if (WARN_ON_ONCE(!evmcs))
 		return;
 
-	if (current_evmcs->hv_enlightenments_control.msr_bitmap)
-		current_evmcs->hv_clean_fields &=
+	if (evmcs->hv_enlightenments_control.msr_bitmap)
+		evmcs->hv_clean_fields &=
 			~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
 }
 
@@ -219,7 +223,7 @@  static inline u64 evmcs_read64(unsigned long field) { return 0; }
 static inline u32 evmcs_read32(unsigned long field) { return 0; }
 static inline u16 evmcs_read16(unsigned long field) { return 0; }
 static inline void evmcs_load(u64 phys_addr) {}
-static inline void evmcs_touch_msr_bitmap(void) {}
+static inline void evmcs_touch_msr_bitmap(struct hv_enlightened_vmcs *evmcs) {}
 #endif /* IS_ENABLED(CONFIG_HYPERV) */
 
 #define EVMPTR_INVALID (-1ULL)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index fe5615fd8295..1d482a80bca8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3869,7 +3869,7 @@  static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
 	 * bitmap has changed.
 	 */
 	if (static_branch_unlikely(&enable_evmcs))
-		evmcs_touch_msr_bitmap();
+		evmcs_touch_msr_bitmap((struct hv_enlightened_vmcs *)vmx->vmcs01.vmcs);
 
 	vmx->nested.force_msr_bitmap_recalc = true;
 }