diff mbox series

x86/kvm/mmu: fix switch between root and guest MMUs

Message ID 20190222164501.13605-1-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show
Series x86/kvm/mmu: fix switch between root and guest MMUs | expand

Commit Message

Vitaly Kuznetsov Feb. 22, 2019, 4:45 p.m. UTC
Commit 14c07ad89f4d ("x86/kvm/mmu: introduce guest_mmu") brought one subtle
change: previously, when switching back from L2 to L1, we were resetting
MMU hooks (like mmu->get_cr3()) in kvm_init_mmu() called from
nested_vmx_load_cr3() and now we do that in nested_ept_uninit_mmu_context()
when we re-target vcpu->arch.mmu pointer.
The change itself looks logical: if nested_ept_init_mmu_context() changes
something than nested_ept_uninit_mmu_context() restores it back. There is,
however, one thing: the following call chain:

 nested_vmx_load_cr3()
  kvm_mmu_new_cr3()
    __kvm_mmu_new_cr3()
      fast_cr3_switch()
        cached_root_available()

now happens with MMU hooks pointing to the new MMU (root MMU in our case)
while previously it was happening with the old one. cached_root_available()
tries to stash current root but it is incorrect to read current CR3 with
mmu->get_cr3(), we need to use old_mmu->get_cr3() which in case we're
switching from L2 to L1 is guest_mmu. (BTW, in shadow page tables case this
is a non-issue because we don't switch MMU).

While we could've tried to guess that we're switching between MMUs and call
the right ->get_cr3() from cached_root_available() this seems to be overly
complicated. Instead, just stash the corresponding CR3 when setting
root_hpa and make cached_root_available() use the stashed value.

Fixes: 14c07ad89f4d ("x86/kvm/mmu: introduce guest_mmu")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu.c              | 17 +++++++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

Comments

Paolo Bonzini Feb. 22, 2019, 5:12 p.m. UTC | #1
On 22/02/19 17:45, Vitaly Kuznetsov wrote:
> Commit 14c07ad89f4d ("x86/kvm/mmu: introduce guest_mmu") brought one subtle
> change: previously, when switching back from L2 to L1, we were resetting
> MMU hooks (like mmu->get_cr3()) in kvm_init_mmu() called from
> nested_vmx_load_cr3() and now we do that in nested_ept_uninit_mmu_context()
> when we re-target vcpu->arch.mmu pointer.
> The change itself looks logical: if nested_ept_init_mmu_context() changes
> something than nested_ept_uninit_mmu_context() restores it back. There is,
> however, one thing: the following call chain:
> 
>  nested_vmx_load_cr3()
>   kvm_mmu_new_cr3()
>     __kvm_mmu_new_cr3()
>       fast_cr3_switch()
>         cached_root_available()
> 
> now happens with MMU hooks pointing to the new MMU (root MMU in our case)
> while previously it was happening with the old one. cached_root_available()
> tries to stash current root but it is incorrect to read current CR3 with
> mmu->get_cr3(), we need to use old_mmu->get_cr3() which in case we're
> switching from L2 to L1 is guest_mmu. (BTW, in shadow page tables case this
> is a non-issue because we don't switch MMU).
> 
> While we could've tried to guess that we're switching between MMUs and call
> the right ->get_cr3() from cached_root_available() this seems to be overly
> complicated. Instead, just stash the corresponding CR3 when setting
> root_hpa and make cached_root_available() use the stashed value.
> 
> Fixes: 14c07ad89f4d ("x86/kvm/mmu: introduce guest_mmu")
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

Is the bug latent until the other patch?  Or are they completely
separate issues?

Thanks,

Paolo

> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/mmu.c              | 17 +++++++++++++----
>  2 files changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index be87f71ccaa5..180373360e34 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -398,6 +398,7 @@ struct kvm_mmu {
>  	void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
>  			   u64 *spte, const void *pte);
>  	hpa_t root_hpa;
> +	gpa_t root_cr3;
>  	union kvm_mmu_role mmu_role;
>  	u8 root_level;
>  	u8 shadow_root_level;
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 90ba1a1755a4..f2d1d230d5b8 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3555,6 +3555,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>  							   &invalid_list);
>  			mmu->root_hpa = INVALID_PAGE;
>  		}
> +		mmu->root_cr3 = 0;
>  	}
>  
>  	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
> @@ -3610,6 +3611,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
>  		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
>  	} else
>  		BUG();
> +	vcpu->arch.mmu->root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
>  
>  	return 0;
>  }
> @@ -3618,10 +3620,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_mmu_page *sp;
>  	u64 pdptr, pm_mask;
> -	gfn_t root_gfn;
> +	gfn_t root_gfn, root_cr3;
>  	int i;
>  
> -	root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT;
> +	root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
> +	root_gfn = root_cr3 >> PAGE_SHIFT;
>  
>  	if (mmu_check_root(vcpu, root_gfn))
>  		return 1;
> @@ -3646,7 +3649,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
>  		++sp->root_count;
>  		spin_unlock(&vcpu->kvm->mmu_lock);
>  		vcpu->arch.mmu->root_hpa = root;
> -		return 0;
> +		goto set_root_cr3;
>  	}
>  
>  	/*
> @@ -3712,6 +3715,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
>  		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
>  	}
>  
> +set_root_cr3:
> +	vcpu->arch.mmu->root_cr3 = root_cr3;
> +
>  	return 0;
>  }
>  
> @@ -4163,7 +4169,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
>  	struct kvm_mmu_root_info root;
>  	struct kvm_mmu *mmu = vcpu->arch.mmu;
>  
> -	root.cr3 = mmu->get_cr3(vcpu);
> +	root.cr3 = mmu->root_cr3;
>  	root.hpa = mmu->root_hpa;
>  
>  	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
> @@ -4176,6 +4182,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
>  	}
>  
>  	mmu->root_hpa = root.hpa;
> +	mmu->root_cr3 = root.cr3;
>  
>  	return i < KVM_MMU_NUM_PREV_ROOTS;
>  }
> @@ -5517,11 +5524,13 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
>  	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
>  
>  	vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
> +	vcpu->arch.root_mmu.root_cr3 = 0;
>  	vcpu->arch.root_mmu.translate_gpa = translate_gpa;
>  	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
>  		vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
>  
>  	vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
> +	vcpu->arch.guest_mmu.root_cr3 = 0;
>  	vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
>  	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
>  		vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
>
Vitaly Kuznetsov Feb. 22, 2019, 6:54 p.m. UTC | #2
Paolo Bonzini <pbonzini@redhat.com> writes:

> On 22/02/19 17:45, Vitaly Kuznetsov wrote:
>> Commit 14c07ad89f4d ("x86/kvm/mmu: introduce guest_mmu") brought one subtle
>> change: previously, when switching back from L2 to L1, we were resetting
>> MMU hooks (like mmu->get_cr3()) in kvm_init_mmu() called from
>> nested_vmx_load_cr3() and now we do that in nested_ept_uninit_mmu_context()
>> when we re-target vcpu->arch.mmu pointer.
>> The change itself looks logical: if nested_ept_init_mmu_context() changes
>> something than nested_ept_uninit_mmu_context() restores it back. There is,
>> however, one thing: the following call chain:
>> 
>>  nested_vmx_load_cr3()
>>   kvm_mmu_new_cr3()
>>     __kvm_mmu_new_cr3()
>>       fast_cr3_switch()
>>         cached_root_available()
>> 
>> now happens with MMU hooks pointing to the new MMU (root MMU in our case)
>> while previously it was happening with the old one. cached_root_available()
>> tries to stash current root but it is incorrect to read current CR3 with
>> mmu->get_cr3(), we need to use old_mmu->get_cr3() which in case we're
>> switching from L2 to L1 is guest_mmu. (BTW, in shadow page tables case this
>> is a non-issue because we don't switch MMU).
>> 
>> While we could've tried to guess that we're switching between MMUs and call
>> the right ->get_cr3() from cached_root_available() this seems to be overly
>> complicated. Instead, just stash the corresponding CR3 when setting
>> root_hpa and make cached_root_available() use the stashed value.
>> 
>> Fixes: 14c07ad89f4d ("x86/kvm/mmu: introduce guest_mmu")
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>
> Is the bug latent until the other patch?  Or are they completely
> separate issues?
>

This one was reported as https://bugs.launchpad.net/qemu/+bug/1813165
(and I still don't completely understand why it is only SMM which is
broken - or is it?). While working on it I noticed that
fast_cr3_switch() actually doesn't work (even after this patch when we
stop putting incorrect values in the cache with
cached_root_available()). So in the end they seem to be fairly
independent.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index be87f71ccaa5..180373360e34 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -398,6 +398,7 @@  struct kvm_mmu {
 	void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			   u64 *spte, const void *pte);
 	hpa_t root_hpa;
+	gpa_t root_cr3;
 	union kvm_mmu_role mmu_role;
 	u8 root_level;
 	u8 shadow_root_level;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 90ba1a1755a4..f2d1d230d5b8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3555,6 +3555,7 @@  void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 							   &invalid_list);
 			mmu->root_hpa = INVALID_PAGE;
 		}
+		mmu->root_cr3 = 0;
 	}
 
 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
@@ -3610,6 +3611,7 @@  static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
 	} else
 		BUG();
+	vcpu->arch.mmu->root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
 
 	return 0;
 }
@@ -3618,10 +3620,11 @@  static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu_page *sp;
 	u64 pdptr, pm_mask;
-	gfn_t root_gfn;
+	gfn_t root_gfn, root_cr3;
 	int i;
 
-	root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT;
+	root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
+	root_gfn = root_cr3 >> PAGE_SHIFT;
 
 	if (mmu_check_root(vcpu, root_gfn))
 		return 1;
@@ -3646,7 +3649,7 @@  static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 		vcpu->arch.mmu->root_hpa = root;
-		return 0;
+		goto set_root_cr3;
 	}
 
 	/*
@@ -3712,6 +3715,9 @@  static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
 	}
 
+set_root_cr3:
+	vcpu->arch.mmu->root_cr3 = root_cr3;
+
 	return 0;
 }
 
@@ -4163,7 +4169,7 @@  static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
 	struct kvm_mmu_root_info root;
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
 
-	root.cr3 = mmu->get_cr3(vcpu);
+	root.cr3 = mmu->root_cr3;
 	root.hpa = mmu->root_hpa;
 
 	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
@@ -4176,6 +4182,7 @@  static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
 	}
 
 	mmu->root_hpa = root.hpa;
+	mmu->root_cr3 = root.cr3;
 
 	return i < KVM_MMU_NUM_PREV_ROOTS;
 }
@@ -5517,11 +5524,13 @@  int kvm_mmu_create(struct kvm_vcpu *vcpu)
 	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
 
 	vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
+	vcpu->arch.root_mmu.root_cr3 = 0;
 	vcpu->arch.root_mmu.translate_gpa = translate_gpa;
 	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
 		vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
 
 	vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
+	vcpu->arch.guest_mmu.root_cr3 = 0;
 	vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
 	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
 		vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;