diff mbox series

[RFC,v5,045/104] KVM: x86/tdp_mmu: make REMOVED_SPTE include shadow_initial value

Message ID 6614d2a2bc34441ed598830392b425fdf8e5ca52.1646422845.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM TDX basic feature support | expand

Commit Message

Isaku Yamahata March 4, 2022, 7:49 p.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

TDP MMU uses REMOVED_SPTE = 0x5a0ULL as special constant to indicate the
intermediate value to indicate one thread is operating on it and the value
should be semi-arbitrary value.  For TDX (more correctly to use #VE), the
value should include suppress #VE value which is shadow_init_value.

Define SHADOW_REMOVED_SPTE as shadow_init_value | REMOVED_SPTE, and replace
REMOVED_SPTE with SHADOW_REMOVED_SPTE to use suppress #VE bit properly for
TDX.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/mmu/spte.h    | 14 ++++++++++++--
 arch/x86/kvm/mmu/tdp_mmu.c | 23 ++++++++++++++++-------
 2 files changed, 28 insertions(+), 9 deletions(-)

Comments

Paolo Bonzini April 5, 2022, 2:22 p.m. UTC | #1
On 3/4/22 20:49, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> TDP MMU uses REMOVED_SPTE = 0x5a0ULL as special constant to indicate the
> intermediate value to indicate one thread is operating on it and the value
> should be semi-arbitrary value.  For TDX (more correctly to use #VE), the
> value should include suppress #VE value which is shadow_init_value.
> 
> Define SHADOW_REMOVED_SPTE as shadow_init_value | REMOVED_SPTE, and replace
> REMOVED_SPTE with SHADOW_REMOVED_SPTE to use suppress #VE bit properly for
> TDX.
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
>   arch/x86/kvm/mmu/spte.h    | 14 ++++++++++++--
>   arch/x86/kvm/mmu/tdp_mmu.c | 23 ++++++++++++++++-------
>   2 files changed, 28 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index bde843bce878..e88f796724b4 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -194,7 +194,9 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
>    * If a thread running without exclusive control of the MMU lock must perform a
>    * multi-part operation on an SPTE, it can set the SPTE to REMOVED_SPTE as a
>    * non-present intermediate value. Other threads which encounter this value
> - * should not modify the SPTE.
> + * should not modify the SPTE.  When TDX is enabled, shadow_init_value, which
> + * is "suppress #VE" bit set, is also set to removed SPTE, because TDX module
> + * always enables "EPT violation #VE".
>    *
>    * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on
>    * bot AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF
> @@ -207,9 +209,17 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
>   /* Removed SPTEs must not be misconstrued as shadow present PTEs. */
>   static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK));
>   
> +/*
> + * See above comment around REMOVED_SPTE.  SHADOW_REMOVED_SPTE is the actual
> + * intermediate value set to the removed SPET.  When TDX is enabled, it sets
> + * the "suppress #VE" bit, otherwise it's REMOVED_SPTE.
> + */
> +extern u64 __read_mostly shadow_init_value;
> +#define SHADOW_REMOVED_SPTE	(shadow_init_value | REMOVED_SPTE)

Please rename the existing REMOVED_SPTE to REMOVED_SPTE_MASK, and call 
this simply REMOVED_SPTE.  This also makes the patch smaller.

Paolo

>   }
>   
>   /*
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index ebd0a02620e8..b6ec2f112c26 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -338,7 +338,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
>   			 * value to the removed SPTE value.
>   			 */
>   			for (;;) {
> -				old_child_spte = xchg(sptep, REMOVED_SPTE);
> +				old_child_spte = xchg(sptep, SHADOW_REMOVED_SPTE);
>   				if (!is_removed_spte(old_child_spte))
>   					break;
>   				cpu_relax();
> @@ -365,10 +365,10 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
>   			 * the two branches consistent and simplifies
>   			 * the function.
>   			 */
> -			WRITE_ONCE(*sptep, REMOVED_SPTE);
> +			WRITE_ONCE(*sptep, SHADOW_REMOVED_SPTE);
>   		}
>   		handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn,
> -				    old_child_spte, REMOVED_SPTE, level,
> +				    old_child_spte, SHADOW_REMOVED_SPTE, level,
>   				    shared);
>   	}
>   
> @@ -537,7 +537,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
>   	 * immediately installing a present entry in its place
>   	 * before the TLBs are flushed.
>   	 */
> -	if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE))
> +	if (!tdp_mmu_set_spte_atomic(kvm, iter, SHADOW_REMOVED_SPTE))
>   		return false;
>   
>   	kvm_flush_remote_tlbs_with_address(kvm, iter->gfn,
> @@ -550,8 +550,16 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
>   	 * special removed SPTE value. No bookkeeping is needed
>   	 * here since the SPTE is going from non-present
>   	 * to non-present.
> +	 *
> +	 * Set non-present value to shadow_init_value, rather than 0.
> +	 * It is because when TDX is enabled, TDX module always
> +	 * enables "EPT-violation #VE", so KVM needs to set
> +	 * "suppress #VE" bit in EPT table entries, in order to get
> +	 * real EPT violation, rather than TDVMCALL.  KVM sets
> +	 * shadow_init_value (which sets "suppress #VE" bit) so it
> +	 * can be set when EPT table entries are zapped.
>   	 */
> -	WRITE_ONCE(*rcu_dereference(iter->sptep), 0);
> +	WRITE_ONCE(*rcu_dereference(iter->sptep), shadow_init_value);
>   
>   	return true;
>   }
> @@ -748,7 +756,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
>   			continue;
>   
>   		if (!shared) {
> -			tdp_mmu_set_spte(kvm, &iter, 0);
> +			/* see comments in tdp_mmu_zap_spte_atomic() */
> +			tdp_mmu_set_spte(kvm, &iter, shadow_init_value);
>   			flush = true;
>   		} else if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
>   			/*
> @@ -1135,7 +1144,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
>   	 * invariant that the PFN of a present * leaf SPTE can never change.
>   	 * See __handle_changed_spte().
>   	 */
> -	tdp_mmu_set_spte(kvm, iter, 0);
> +	tdp_mmu_set_spte(kvm, iter, shadow_init_value);
>   
>   	if (!pte_write(range->pte)) {
>   		new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
Huang, Kai April 6, 2022, 11:30 p.m. UTC | #2
On Fri, 2022-03-04 at 11:49 -0800, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> TDP MMU uses REMOVED_SPTE = 0x5a0ULL as special constant to indicate the
> intermediate value to indicate one thread is operating on it and the value
> should be semi-arbitrary value.  For TDX (more correctly to use #VE), the
> value should include suppress #VE value which is shadow_init_value.
> 
> Define SHADOW_REMOVED_SPTE as shadow_init_value | REMOVED_SPTE, and replace
> REMOVED_SPTE with SHADOW_REMOVED_SPTE to use suppress #VE bit properly for
> TDX.

Like we discussed, this patch should be merged with patch "KVM: x86/mmu: Allow
non-zero init value for shadow PTE".
Sean Christopherson April 6, 2022, 11:35 p.m. UTC | #3
On Tue, Apr 05, 2022, Paolo Bonzini wrote:
> On 3/4/22 20:49, isaku.yamahata@intel.com wrote:
> > @@ -207,9 +209,17 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
> >   /* Removed SPTEs must not be misconstrued as shadow present PTEs. */
> >   static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK));
> > +/*
> > + * See above comment around REMOVED_SPTE.  SHADOW_REMOVED_SPTE is the actual
> > + * intermediate value set to the removed SPET.  When TDX is enabled, it sets
> > + * the "suppress #VE" bit, otherwise it's REMOVED_SPTE.
> > + */
> > +extern u64 __read_mostly shadow_init_value;
> > +#define SHADOW_REMOVED_SPTE	(shadow_init_value | REMOVED_SPTE)
> 
> Please rename the existing REMOVED_SPTE to REMOVED_SPTE_MASK, and call this
> simply REMOVED_SPTE.  This also makes the patch smaller.

Can we name it either __REMOVE_SPTE or REMOVED_SPTE_VAL?  It's most definitely
not a mask, it's a full value, e.g. spte |= REMOVED_SPTE_MASK is completely wrong.

Other than that, 100% agree with avoiding churn.
Paolo Bonzini April 7, 2022, 1:52 p.m. UTC | #4
On 4/7/22 01:35, Sean Christopherson wrote:
>> Please rename the existing REMOVED_SPTE to REMOVED_SPTE_MASK, and call this
>> simply REMOVED_SPTE.  This also makes the patch smaller.
> Can we name it either __REMOVE_SPTE or REMOVED_SPTE_VAL?  It's most definitely
> not a mask, it's a full value, e.g. spte |= REMOVED_SPTE_MASK is completely wrong.

REMOVED_SPTE_VAL is fine.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index bde843bce878..e88f796724b4 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -194,7 +194,9 @@  extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
  * If a thread running without exclusive control of the MMU lock must perform a
  * multi-part operation on an SPTE, it can set the SPTE to REMOVED_SPTE as a
  * non-present intermediate value. Other threads which encounter this value
- * should not modify the SPTE.
+ * should not modify the SPTE.  When TDX is enabled, shadow_init_value, which
+ * is "suppress #VE" bit set, is also set to removed SPTE, because TDX module
+ * always enables "EPT violation #VE".
  *
  * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on
  * bot AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF
@@ -207,9 +209,17 @@  extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
 /* Removed SPTEs must not be misconstrued as shadow present PTEs. */
 static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK));
 
+/*
+ * See above comment around REMOVED_SPTE.  SHADOW_REMOVED_SPTE is the actual
+ * intermediate value set to the removed SPET.  When TDX is enabled, it sets
+ * the "suppress #VE" bit, otherwise it's REMOVED_SPTE.
+ */
+extern u64 __read_mostly shadow_init_value;
+#define SHADOW_REMOVED_SPTE	(shadow_init_value | REMOVED_SPTE)
+
 static inline bool is_removed_spte(u64 spte)
 {
-	return spte == REMOVED_SPTE;
+	return spte == SHADOW_REMOVED_SPTE;
 }
 
 /*
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index ebd0a02620e8..b6ec2f112c26 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -338,7 +338,7 @@  static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
 			 * value to the removed SPTE value.
 			 */
 			for (;;) {
-				old_child_spte = xchg(sptep, REMOVED_SPTE);
+				old_child_spte = xchg(sptep, SHADOW_REMOVED_SPTE);
 				if (!is_removed_spte(old_child_spte))
 					break;
 				cpu_relax();
@@ -365,10 +365,10 @@  static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
 			 * the two branches consistent and simplifies
 			 * the function.
 			 */
-			WRITE_ONCE(*sptep, REMOVED_SPTE);
+			WRITE_ONCE(*sptep, SHADOW_REMOVED_SPTE);
 		}
 		handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn,
-				    old_child_spte, REMOVED_SPTE, level,
+				    old_child_spte, SHADOW_REMOVED_SPTE, level,
 				    shared);
 	}
 
@@ -537,7 +537,7 @@  static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
 	 * immediately installing a present entry in its place
 	 * before the TLBs are flushed.
 	 */
-	if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE))
+	if (!tdp_mmu_set_spte_atomic(kvm, iter, SHADOW_REMOVED_SPTE))
 		return false;
 
 	kvm_flush_remote_tlbs_with_address(kvm, iter->gfn,
@@ -550,8 +550,16 @@  static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
 	 * special removed SPTE value. No bookkeeping is needed
 	 * here since the SPTE is going from non-present
 	 * to non-present.
+	 *
+	 * Set non-present value to shadow_init_value, rather than 0.
+	 * It is because when TDX is enabled, TDX module always
+	 * enables "EPT-violation #VE", so KVM needs to set
+	 * "suppress #VE" bit in EPT table entries, in order to get
+	 * real EPT violation, rather than TDVMCALL.  KVM sets
+	 * shadow_init_value (which sets "suppress #VE" bit) so it
+	 * can be set when EPT table entries are zapped.
 	 */
-	WRITE_ONCE(*rcu_dereference(iter->sptep), 0);
+	WRITE_ONCE(*rcu_dereference(iter->sptep), shadow_init_value);
 
 	return true;
 }
@@ -748,7 +756,8 @@  static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 			continue;
 
 		if (!shared) {
-			tdp_mmu_set_spte(kvm, &iter, 0);
+			/* see comments in tdp_mmu_zap_spte_atomic() */
+			tdp_mmu_set_spte(kvm, &iter, shadow_init_value);
 			flush = true;
 		} else if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
 			/*
@@ -1135,7 +1144,7 @@  static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
 	 * invariant that the PFN of a present * leaf SPTE can never change.
 	 * See __handle_changed_spte().
 	 */
-	tdp_mmu_set_spte(kvm, iter, 0);
+	tdp_mmu_set_spte(kvm, iter, shadow_init_value);
 
 	if (!pte_write(range->pte)) {
 		new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,