diff mbox series

[13/15] KVM: SVM: Add and use svm_register_cache_reset()

Message ID 20211108124407.12187-14-jiangshanlai@gmail.com (mailing list archive)
State New, archived
Headers show
Series KVM: X86: Fix and clean up for register caches | expand

Commit Message

Lai Jiangshan Nov. 8, 2021, 12:44 p.m. UTC
From: Lai Jiangshan <laijs@linux.alibaba.com>

It resets all the appropriate bits like vmx.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/svm/svm.c |  3 +--
 arch/x86/kvm/svm/svm.h | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

Comments

Paolo Bonzini Nov. 18, 2021, 3:37 p.m. UTC | #1
On 11/8/21 13:44, Lai Jiangshan wrote:
> From: Lai Jiangshan <laijs@linux.alibaba.com>
> 
> It resets all the appropriate bits like vmx.
> 
> Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
> ---
>   arch/x86/kvm/svm/svm.c |  3 +--
>   arch/x86/kvm/svm/svm.h | 26 ++++++++++++++++++++++++++
>   2 files changed, 27 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index b7da66935e72..ba9cfddd2875 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3969,8 +3969,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
>   
>   	svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
>   	vmcb_mark_all_clean(svm->vmcb);
> -
> -	kvm_register_clear_available(vcpu, VCPU_EXREG_PDPTR);
> +	svm_register_cache_reset(vcpu);
>   
>   	/*
>   	 * We need to handle MC intercepts here before the vcpu has a chance to
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 0d7bbe548ac3..1cf5d5e2d0cd 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -274,6 +274,32 @@ static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
>           return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
>   }
>   
> +static inline void svm_register_cache_reset(struct kvm_vcpu *vcpu)
> +{
> +/*
> + * SVM_REGS_AVAIL_SET - The set of registers that will be updated in cache on
> + *			demand.  Other registers not listed here are synced to
> + *			the cache immediately after VM-Exit.
> + *
> + * SVM_REGS_DIRTY_SET - The set of registers that might be outdated in
> + *			architecture. Other registers not listed here are synced
> + *			to the architecture immediately when modifying.
> + *
> + *			Special case: VCPU_EXREG_CR3 should be in this set due
> + *			to the fact.  But KVM_REQ_LOAD_MMU_PGD is always
> + *			requested when the cache vcpu->arch.cr3 is changed and
> + *			svm_load_mmu_pgd() always syncs the new CR3 value into
> + *			the architecture.  So the dirty information of
> + *			VCPU_EXREG_CR3 is not used which means VCPU_EXREG_CR3
> + *			isn't required to be put in this set.
> + */
> +#define SVM_REGS_AVAIL_SET	(1 << VCPU_EXREG_PDPTR)
> +#define SVM_REGS_DIRTY_SET	(0)
> +
> +	vcpu->arch.regs_avail &= ~SVM_REGS_AVAIL_SET;
> +	vcpu->arch.regs_dirty &= ~SVM_REGS_DIRTY_SET;
> +}

I think touching regs_dirty is confusing here, so I'd go with this:

         vcpu->arch.regs_avail &= ~SVM_REGS_LAZY_LOAD_SET;

         /*
          * SVM does not use vcpu->arch.regs_dirty.  The only register that
          * might be out of date in the VMCB is CR3, but KVM_REQ_LOAD_MMU_PGD
          * is always requested when the cache vcpu->arch.cr3 is changed and
          * svm_load_mmu_pgd() always syncs the new CR3 value into the VMCB.
          */

(VMX instead needs VCPU_EXREG_CR3 mostly because it does not want to
update it unconditionally on exit).

Paolo
Lai Jiangshan Nov. 18, 2021, 4:28 p.m. UTC | #2
On 2021/11/18 23:37, Paolo Bonzini wrote:
> On 11/8/21 13:44, Lai Jiangshan wrote:
>> From: Lai Jiangshan <laijs@linux.alibaba.com>
>>
>> It resets all the appropriate bits like vmx.
>>
>> Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
>> ---
>>   arch/x86/kvm/svm/svm.c |  3 +--
>>   arch/x86/kvm/svm/svm.h | 26 ++++++++++++++++++++++++++
>>   2 files changed, 27 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>> index b7da66935e72..ba9cfddd2875 100644
>> --- a/arch/x86/kvm/svm/svm.c
>> +++ b/arch/x86/kvm/svm/svm.c
>> @@ -3969,8 +3969,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
>>       svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
>>       vmcb_mark_all_clean(svm->vmcb);
>> -
>> -    kvm_register_clear_available(vcpu, VCPU_EXREG_PDPTR);
>> +    svm_register_cache_reset(vcpu);
>>       /*
>>        * We need to handle MC intercepts here before the vcpu has a chance to
>> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
>> index 0d7bbe548ac3..1cf5d5e2d0cd 100644
>> --- a/arch/x86/kvm/svm/svm.h
>> +++ b/arch/x86/kvm/svm/svm.h
>> @@ -274,6 +274,32 @@ static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
>>           return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
>>   }
>> +static inline void svm_register_cache_reset(struct kvm_vcpu *vcpu)
>> +{
>> +/*
>> + * SVM_REGS_AVAIL_SET - The set of registers that will be updated in cache on
>> + *            demand.  Other registers not listed here are synced to
>> + *            the cache immediately after VM-Exit.
>> + *
>> + * SVM_REGS_DIRTY_SET - The set of registers that might be outdated in
>> + *            architecture. Other registers not listed here are synced
>> + *            to the architecture immediately when modifying.
>> + *
>> + *            Special case: VCPU_EXREG_CR3 should be in this set due
>> + *            to the fact.  But KVM_REQ_LOAD_MMU_PGD is always
>> + *            requested when the cache vcpu->arch.cr3 is changed and
>> + *            svm_load_mmu_pgd() always syncs the new CR3 value into
>> + *            the architecture.  So the dirty information of
>> + *            VCPU_EXREG_CR3 is not used which means VCPU_EXREG_CR3
>> + *            isn't required to be put in this set.
>> + */
>> +#define SVM_REGS_AVAIL_SET    (1 << VCPU_EXREG_PDPTR)
>> +#define SVM_REGS_DIRTY_SET    (0)
>> +
>> +    vcpu->arch.regs_avail &= ~SVM_REGS_AVAIL_SET;
>> +    vcpu->arch.regs_dirty &= ~SVM_REGS_DIRTY_SET;
>> +}
> 
> I think touching regs_dirty is confusing here, so I'd go with this:

It makes the code the same as vmx by clearing all the SVM_REGS_DIRTY_SET
bits.  And the compiler will remove this line of code since
SVM_REGS_DIRTY_SET is (0), and regs_dirty is not touched.

Using VMX_REGS_DIRTY_SET and SVM_REGS_DIRTY_SET and making the code
similar is my intent for patch12,13.  If it causes confusing, I would
like to make a second thought.  SVM_REGS_DIRTY_SET does be special
in svm where VCPU_EXREG_CR3 is in it by definition, but it is not
added into SVM_REGS_DIRTY_SET in the patch just for optimization to allow
the compiler optimizes the line of code out.

I'm Ok to not queue patch12,13,14.

> 
>          vcpu->arch.regs_avail &= ~SVM_REGS_LAZY_LOAD_SET;
> 
>          /*
>           * SVM does not use vcpu->arch.regs_dirty.  The only register that
>           * might be out of date in the VMCB is CR3, but KVM_REQ_LOAD_MMU_PGD
>           * is always requested when the cache vcpu->arch.cr3 is changed and
>           * svm_load_mmu_pgd() always syncs the new CR3 value into the VMCB.
>           */
> 
> (VMX instead needs VCPU_EXREG_CR3 mostly because it does not want to
> update it unconditionally on exit).
> 
> Paolo
Paolo Bonzini Nov. 18, 2021, 5:54 p.m. UTC | #3
On 11/18/21 17:28, Lai Jiangshan wrote:
> Using VMX_REGS_DIRTY_SET and SVM_REGS_DIRTY_SET and making the code
> similar is my intent for patch12,13.  If it causes confusing, I would
> like to make a second thought.  SVM_REGS_DIRTY_SET does be special
> in svm where VCPU_EXREG_CR3 is in it by definition, but it is not
> added into SVM_REGS_DIRTY_SET in the patch just for optimization to allow
> the compiler optimizes the line of code out.

I think this is where we disagree.  In my opinion it is enough to
document that CR3 _can_ be out of date, but it doesn't have to be marked
dirty because its dirty bit is effectively KVM_REQ_LOAD_MMU_PGD.

For VMX, it is important to clear VCPU_EXREG_CR3 because the combination
"avail=0, dirty=1" is nonsensical:

	av d
	0  0    in VMCS
	0  1    *INVALID*
	1  0	in vcpu->arch
	1  1	in vcpu->arch, needs store

But on SVM, VCPU_EXREG_CR3 is always available.

Thinking more about it, it makes more sense for VMX to reset _all_
bits of dirty to 0, just like it was before your change, but doing
so even earlier in vmx_vcpu_run.

I appreciate that VMX_REGS_LAZY_UPDATE_SET is useful for documentation,
but it's also important that the values in avail/dirty make sense as
a pair.

So here is what I would do:

diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 6e6d0d01f18d..ac3d3bd662f4 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -43,6 +43,13 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
  BUILD_KVM_GPR_ACCESSORS(r15, R15)
  #endif
  
+/*
+ * avail  dirty
+ * 0	  0	  register in VMCS/VMCB
+ * 0	  1	  *INVALID*
+ * 1	  0	  register in vcpu->arch
+ * 1	  1	  register in vcpu->arch, needs to be stored back
+ */
  static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
  					     enum kvm_reg reg)
  {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6fce61fc98e3..72ae67e214b5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6635,6 +6635,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
  		vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
  	if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
  		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
+	vcpu->arch.regs_dirty = 0;
  
  	cr3 = __get_current_cr3_fast();
  	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
@@ -6729,7 +6730,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
  	loadsegment(es, __USER_DS);
  #endif
  
-	vmx_register_cache_reset(vcpu);
+	vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
  
  	pt_guest_exit(vmx);
  
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 4df2ac24ffc1..f978699480e3 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -473,19 +473,21 @@ BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL)
  BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL)
  BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL)
  
-static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
-				  | (1 << VCPU_EXREG_RFLAGS)
-				  | (1 << VCPU_EXREG_PDPTR)
-				  | (1 << VCPU_EXREG_SEGMENTS)
-				  | (1 << VCPU_EXREG_CR0)
-				  | (1 << VCPU_EXREG_CR3)
-				  | (1 << VCPU_EXREG_CR4)
-				  | (1 << VCPU_EXREG_EXIT_INFO_1)
-				  | (1 << VCPU_EXREG_EXIT_INFO_2));
-	vcpu->arch.regs_dirty = 0;
-}
+/*
+ * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the
+ * cache on demand.  Other registers not listed here are synced to
+ * the cache immediately after VM-Exit.
+ */
+#define VMX_REGS_LAZY_LOAD_SET	((1 << VCPU_REGS_RIP) |         \
+				(1 << VCPU_REGS_RSP) |          \
+				(1 << VCPU_EXREG_RFLAGS) |      \
+				(1 << VCPU_EXREG_PDPTR) |       \
+				(1 << VCPU_EXREG_SEGMENTS) |    \
+				(1 << VCPU_EXREG_CR0) |         \
+				(1 << VCPU_EXREG_CR3) |         \
+				(1 << VCPU_EXREG_CR4) |         \
+				(1 << VCPU_EXREG_EXIT_INFO_1) | \
+				(1 << VCPU_EXREG_EXIT_INFO_2))
  
  static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
  {

and likewise for SVM:

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index eb2a2609cae8..4b22aa7d55d0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3944,6 +3944,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
  		vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
  		vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
  	}
+	vcpu->arch.regs_dirty = 0;
  
  	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
  		kvm_before_interrupt(vcpu);
@@ -3978,7 +3978,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
  		vcpu->arch.apf.host_apf_flags =
  			kvm_read_and_reset_apf_flags();
  
-	kvm_register_clear_available(vcpu, VCPU_EXREG_PDPTR);
+	vcpu->arch.regs_avail &= ~SVM_REGS_LAZY_LOAD_SET;
  
  	/*
  	 * We need to handle MC intercepts here before the vcpu has a chance to
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 32769d227860..b3c3c3098216 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -321,6 +321,16 @@ static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
          return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
  }
  
+/*
+ * Only the PDPTRs are loaded on demand into the shadow MMU.  All other
+ * fields are synchronized in handle_exit, because accessing the VMCB is cheap.
+ *
+ * CR3 might be out of date in the VMCB but it is not marked dirty; instead,
+ * KVM_REQ_LOAD_MMU_PGD is always requested when the cached vcpu->arch.cr3
+ * is changed.  svm_load_mmu_pgd() then syncs the new CR3 value into the VMCB.
+ */
+#define SVM_REGS_LAZY_LOAD_SET	(1 << VCPU_EXREG_PDPTR)
+
  static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
  {
  	return container_of(vcpu, struct vcpu_svm, vcpu);
Lai Jiangshan Nov. 19, 2021, 12:49 a.m. UTC | #4
On 2021/11/19 01:54, Paolo Bonzini wrote:
> On 11/18/21 17:28, Lai Jiangshan wrote:
>> Using VMX_REGS_DIRTY_SET and SVM_REGS_DIRTY_SET and making the code
>> similar is my intent for patch12,13.  If it causes confusing, I would
>> like to make a second thought.  SVM_REGS_DIRTY_SET does be special
>> in svm where VCPU_EXREG_CR3 is in it by definition, but it is not
>> added into SVM_REGS_DIRTY_SET in the patch just for optimization to allow
>> the compiler optimizes the line of code out.
> 
> I think this is where we disagree.  In my opinion it is enough to
> document that CR3 _can_ be out of date, but it doesn't have to be marked
> dirty because its dirty bit is effectively KVM_REQ_LOAD_MMU_PGD.
> 
> For VMX, it is important to clear VCPU_EXREG_CR3 because the combination
> "avail=0, dirty=1" is nonsensical:
> 
>      av d
>      0  0    in VMCS
>      0  1    *INVALID*
>      1  0    in vcpu->arch
>      1  1    in vcpu->arch, needs store
> 
> But on SVM, VCPU_EXREG_CR3 is always available.
> 
> Thinking more about it, it makes more sense for VMX to reset _all_
> bits of dirty to 0, just like it was before your change, but doing
> so even earlier in vmx_vcpu_run.
> 
> I appreciate that VMX_REGS_LAZY_UPDATE_SET is useful for documentation,
> but it's also important that the values in avail/dirty make sense as
> a pair.
> 
> So here is what I would do:

Reviewed-by: Lai Jiangshan <laijs@linux.alibaba.com>


> 
> diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
> index 6e6d0d01f18d..ac3d3bd662f4 100644
> --- a/arch/x86/kvm/kvm_cache_regs.h
> +++ b/arch/x86/kvm/kvm_cache_regs.h
> @@ -43,6 +43,13 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
>   BUILD_KVM_GPR_ACCESSORS(r15, R15)
>   #endif
> 
> +/*
> + * avail  dirty
> + * 0      0      register in VMCS/VMCB
> + * 0      1      *INVALID*
> + * 1      0      register in vcpu->arch
> + * 1      1      register in vcpu->arch, needs to be stored back
> + */
>   static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
>                            enum kvm_reg reg)
>   {
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 6fce61fc98e3..72ae67e214b5 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6635,6 +6635,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>           vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
>       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
>           vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
> +    vcpu->arch.regs_dirty = 0;
> 
>       cr3 = __get_current_cr3_fast();
>       if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
> @@ -6729,7 +6730,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>       loadsegment(es, __USER_DS);
>   #endif
> 
> -    vmx_register_cache_reset(vcpu);
> +    vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
> 
>       pt_guest_exit(vmx);
> 
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 4df2ac24ffc1..f978699480e3 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -473,19 +473,21 @@ BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL)
>   BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL)
>   BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL)
> 
> -static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu)
> -{
> -    vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
> -                  | (1 << VCPU_EXREG_RFLAGS)
> -                  | (1 << VCPU_EXREG_PDPTR)
> -                  | (1 << VCPU_EXREG_SEGMENTS)
> -                  | (1 << VCPU_EXREG_CR0)
> -                  | (1 << VCPU_EXREG_CR3)
> -                  | (1 << VCPU_EXREG_CR4)
> -                  | (1 << VCPU_EXREG_EXIT_INFO_1)
> -                  | (1 << VCPU_EXREG_EXIT_INFO_2));
> -    vcpu->arch.regs_dirty = 0;
> -}
> +/*
> + * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the
> + * cache on demand.  Other registers not listed here are synced to
> + * the cache immediately after VM-Exit.
> + */
> +#define VMX_REGS_LAZY_LOAD_SET    ((1 << VCPU_REGS_RIP) |         \
> +                (1 << VCPU_REGS_RSP) |          \
> +                (1 << VCPU_EXREG_RFLAGS) |      \
> +                (1 << VCPU_EXREG_PDPTR) |       \
> +                (1 << VCPU_EXREG_SEGMENTS) |    \
> +                (1 << VCPU_EXREG_CR0) |         \
> +                (1 << VCPU_EXREG_CR3) |         \
> +                (1 << VCPU_EXREG_CR4) |         \
> +                (1 << VCPU_EXREG_EXIT_INFO_1) | \
> +                (1 << VCPU_EXREG_EXIT_INFO_2))
> 
>   static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
>   {
> 
> and likewise for SVM:
> 
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index eb2a2609cae8..4b22aa7d55d0 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3944,6 +3944,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
>           vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
>           vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
>       }
> +    vcpu->arch.regs_dirty = 0;
> 
>       if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
>           kvm_before_interrupt(vcpu);
> @@ -3978,7 +3978,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
>           vcpu->arch.apf.host_apf_flags =
>               kvm_read_and_reset_apf_flags();
> 
> -    kvm_register_clear_available(vcpu, VCPU_EXREG_PDPTR);
> +    vcpu->arch.regs_avail &= ~SVM_REGS_LAZY_LOAD_SET;
> 
>       /*
>        * We need to handle MC intercepts here before the vcpu has a chance to
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 32769d227860..b3c3c3098216 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -321,6 +321,16 @@ static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
>           return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
>   }
> 
> +/*
> + * Only the PDPTRs are loaded on demand into the shadow MMU.  All other
> + * fields are synchronized in handle_exit, because accessing the VMCB is cheap.
> + *
> + * CR3 might be out of date in the VMCB but it is not marked dirty; instead,
> + * KVM_REQ_LOAD_MMU_PGD is always requested when the cached vcpu->arch.cr3
> + * is changed.  svm_load_mmu_pgd() then syncs the new CR3 value into the VMCB.
> + */
> +#define SVM_REGS_LAZY_LOAD_SET    (1 << VCPU_EXREG_PDPTR)
> +
>   static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
>   {
>       return container_of(vcpu, struct vcpu_svm, vcpu);
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b7da66935e72..ba9cfddd2875 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3969,8 +3969,7 @@  static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
 	vmcb_mark_all_clean(svm->vmcb);
-
-	kvm_register_clear_available(vcpu, VCPU_EXREG_PDPTR);
+	svm_register_cache_reset(vcpu);
 
 	/*
 	 * We need to handle MC intercepts here before the vcpu has a chance to
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 0d7bbe548ac3..1cf5d5e2d0cd 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -274,6 +274,32 @@  static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
         return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
 }
 
+static inline void svm_register_cache_reset(struct kvm_vcpu *vcpu)
+{
+/*
+ * SVM_REGS_AVAIL_SET - The set of registers that will be updated in cache on
+ *			demand.  Other registers not listed here are synced to
+ *			the cache immediately after VM-Exit.
+ *
+ * SVM_REGS_DIRTY_SET - The set of registers that might be outdated in
+ *			architecture. Other registers not listed here are synced
+ *			to the architecture immediately when modifying.
+ *
+ *			Special case: VCPU_EXREG_CR3 should be in this set due
+ *			to the fact.  But KVM_REQ_LOAD_MMU_PGD is always
+ *			requested when the cache vcpu->arch.cr3 is changed and
+ *			svm_load_mmu_pgd() always syncs the new CR3 value into
+ *			the architecture.  So the dirty information of
+ *			VCPU_EXREG_CR3 is not used which means VCPU_EXREG_CR3
+ *			isn't required to be put in this set.
+ */
+#define SVM_REGS_AVAIL_SET	(1 << VCPU_EXREG_PDPTR)
+#define SVM_REGS_DIRTY_SET	(0)
+
+	vcpu->arch.regs_avail &= ~SVM_REGS_AVAIL_SET;
+	vcpu->arch.regs_dirty &= ~SVM_REGS_DIRTY_SET;
+}
+
 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 {
 	return container_of(vcpu, struct vcpu_svm, vcpu);