Message ID | 6614d2a2bc34441ed598830392b425fdf8e5ca52.1646422845.git.isaku.yamahata@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM TDX basic feature support | expand |
On 3/4/22 20:49, isaku.yamahata@intel.com wrote: > From: Isaku Yamahata <isaku.yamahata@intel.com> > > TDP MMU uses REMOVED_SPTE = 0x5a0ULL as special constant to indicate the > intermediate value to indicate one thread is operating on it and the value > should be semi-arbitrary value. For TDX (more correctly to use #VE), the > value should include suppress #VE value which is shadow_init_value. > > Define SHADOW_REMOVED_SPTE as shadow_init_value | REMOVED_SPTE, and replace > REMOVED_SPTE with SHADOW_REMOVED_SPTE to use suppress #VE bit properly for > TDX. > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > --- > arch/x86/kvm/mmu/spte.h | 14 ++++++++++++-- > arch/x86/kvm/mmu/tdp_mmu.c | 23 ++++++++++++++++------- > 2 files changed, 28 insertions(+), 9 deletions(-) > > diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h > index bde843bce878..e88f796724b4 100644 > --- a/arch/x86/kvm/mmu/spte.h > +++ b/arch/x86/kvm/mmu/spte.h > @@ -194,7 +194,9 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; > * If a thread running without exclusive control of the MMU lock must perform a > * multi-part operation on an SPTE, it can set the SPTE to REMOVED_SPTE as a > * non-present intermediate value. Other threads which encounter this value > - * should not modify the SPTE. > + * should not modify the SPTE. When TDX is enabled, shadow_init_value, which > + * is "suppress #VE" bit set, is also set to removed SPTE, because TDX module > + * always enables "EPT violation #VE". > * > * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on > * bot AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF > @@ -207,9 +209,17 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; > /* Removed SPTEs must not be misconstrued as shadow present PTEs. */ > static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK)); > > +/* > + * See above comment around REMOVED_SPTE. SHADOW_REMOVED_SPTE is the actual > + * intermediate value set to the removed SPET. When TDX is enabled, it sets > + * the "suppress #VE" bit, otherwise it's REMOVED_SPTE. > + */ > +extern u64 __read_mostly shadow_init_value; > +#define SHADOW_REMOVED_SPTE (shadow_init_value | REMOVED_SPTE) Please rename the existing REMOVED_SPTE to REMOVED_SPTE_MASK, and call this simply REMOVED_SPTE. This also makes the patch smaller. Paolo > } > > /* > diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c > index ebd0a02620e8..b6ec2f112c26 100644 > --- a/arch/x86/kvm/mmu/tdp_mmu.c > +++ b/arch/x86/kvm/mmu/tdp_mmu.c > @@ -338,7 +338,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, > * value to the removed SPTE value. > */ > for (;;) { > - old_child_spte = xchg(sptep, REMOVED_SPTE); > + old_child_spte = xchg(sptep, SHADOW_REMOVED_SPTE); > if (!is_removed_spte(old_child_spte)) > break; > cpu_relax(); > @@ -365,10 +365,10 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, > * the two branches consistent and simplifies > * the function. > */ > - WRITE_ONCE(*sptep, REMOVED_SPTE); > + WRITE_ONCE(*sptep, SHADOW_REMOVED_SPTE); > } > handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn, > - old_child_spte, REMOVED_SPTE, level, > + old_child_spte, SHADOW_REMOVED_SPTE, level, > shared); > } > > @@ -537,7 +537,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm, > * immediately installing a present entry in its place > * before the TLBs are flushed. > */ > - if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE)) > + if (!tdp_mmu_set_spte_atomic(kvm, iter, SHADOW_REMOVED_SPTE)) > return false; > > kvm_flush_remote_tlbs_with_address(kvm, iter->gfn, > @@ -550,8 +550,16 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm, > * special removed SPTE value. No bookkeeping is needed > * here since the SPTE is going from non-present > * to non-present. > + * > + * Set non-present value to shadow_init_value, rather than 0. > + * It is because when TDX is enabled, TDX module always > + * enables "EPT-violation #VE", so KVM needs to set > + * "suppress #VE" bit in EPT table entries, in order to get > + * real EPT violation, rather than TDVMCALL. KVM sets > + * shadow_init_value (which sets "suppress #VE" bit) so it > + * can be set when EPT table entries are zapped. > */ > - WRITE_ONCE(*rcu_dereference(iter->sptep), 0); > + WRITE_ONCE(*rcu_dereference(iter->sptep), shadow_init_value); > > return true; > } > @@ -748,7 +756,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, > continue; > > if (!shared) { > - tdp_mmu_set_spte(kvm, &iter, 0); > + /* see comments in tdp_mmu_zap_spte_atomic() */ > + tdp_mmu_set_spte(kvm, &iter, shadow_init_value); > flush = true; > } else if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) { > /* > @@ -1135,7 +1144,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter, > * invariant that the PFN of a present * leaf SPTE can never change. > * See __handle_changed_spte(). > */ > - tdp_mmu_set_spte(kvm, iter, 0); > + tdp_mmu_set_spte(kvm, iter, shadow_init_value); > > if (!pte_write(range->pte)) { > new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
On Fri, 2022-03-04 at 11:49 -0800, isaku.yamahata@intel.com wrote: > From: Isaku Yamahata <isaku.yamahata@intel.com> > > TDP MMU uses REMOVED_SPTE = 0x5a0ULL as special constant to indicate the > intermediate value to indicate one thread is operating on it and the value > should be semi-arbitrary value. For TDX (more correctly to use #VE), the > value should include suppress #VE value which is shadow_init_value. > > Define SHADOW_REMOVED_SPTE as shadow_init_value | REMOVED_SPTE, and replace > REMOVED_SPTE with SHADOW_REMOVED_SPTE to use suppress #VE bit properly for > TDX. Like we discussed, this patch should be merged with patch "KVM: x86/mmu: Allow non-zero init value for shadow PTE".
On Tue, Apr 05, 2022, Paolo Bonzini wrote: > On 3/4/22 20:49, isaku.yamahata@intel.com wrote: > > @@ -207,9 +209,17 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; > > /* Removed SPTEs must not be misconstrued as shadow present PTEs. */ > > static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK)); > > +/* > > + * See above comment around REMOVED_SPTE. SHADOW_REMOVED_SPTE is the actual > > + * intermediate value set to the removed SPET. When TDX is enabled, it sets > > + * the "suppress #VE" bit, otherwise it's REMOVED_SPTE. > > + */ > > +extern u64 __read_mostly shadow_init_value; > > +#define SHADOW_REMOVED_SPTE (shadow_init_value | REMOVED_SPTE) > > Please rename the existing REMOVED_SPTE to REMOVED_SPTE_MASK, and call this > simply REMOVED_SPTE. This also makes the patch smaller. Can we name it either __REMOVE_SPTE or REMOVED_SPTE_VAL? It's most definitely not a mask, it's a full value, e.g. spte |= REMOVED_SPTE_MASK is completely wrong. Other than that, 100% agree with avoiding churn.
On 4/7/22 01:35, Sean Christopherson wrote: >> Please rename the existing REMOVED_SPTE to REMOVED_SPTE_MASK, and call this >> simply REMOVED_SPTE. This also makes the patch smaller. > Can we name it either __REMOVE_SPTE or REMOVED_SPTE_VAL? It's most definitely > not a mask, it's a full value, e.g. spte |= REMOVED_SPTE_MASK is completely wrong. REMOVED_SPTE_VAL is fine. Paolo
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index bde843bce878..e88f796724b4 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -194,7 +194,9 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; * If a thread running without exclusive control of the MMU lock must perform a * multi-part operation on an SPTE, it can set the SPTE to REMOVED_SPTE as a * non-present intermediate value. Other threads which encounter this value - * should not modify the SPTE. + * should not modify the SPTE. When TDX is enabled, shadow_init_value, which + * is "suppress #VE" bit set, is also set to removed SPTE, because TDX module + * always enables "EPT violation #VE". * * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on * bot AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF @@ -207,9 +209,17 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; /* Removed SPTEs must not be misconstrued as shadow present PTEs. */ static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK)); +/* + * See above comment around REMOVED_SPTE. SHADOW_REMOVED_SPTE is the actual + * intermediate value set to the removed SPET. When TDX is enabled, it sets + * the "suppress #VE" bit, otherwise it's REMOVED_SPTE. + */ +extern u64 __read_mostly shadow_init_value; +#define SHADOW_REMOVED_SPTE (shadow_init_value | REMOVED_SPTE) + static inline bool is_removed_spte(u64 spte) { - return spte == REMOVED_SPTE; + return spte == SHADOW_REMOVED_SPTE; } /* diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index ebd0a02620e8..b6ec2f112c26 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -338,7 +338,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, * value to the removed SPTE value. */ for (;;) { - old_child_spte = xchg(sptep, REMOVED_SPTE); + old_child_spte = xchg(sptep, SHADOW_REMOVED_SPTE); if (!is_removed_spte(old_child_spte)) break; cpu_relax(); @@ -365,10 +365,10 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, * the two branches consistent and simplifies * the function. */ - WRITE_ONCE(*sptep, REMOVED_SPTE); + WRITE_ONCE(*sptep, SHADOW_REMOVED_SPTE); } handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn, - old_child_spte, REMOVED_SPTE, level, + old_child_spte, SHADOW_REMOVED_SPTE, level, shared); } @@ -537,7 +537,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm, * immediately installing a present entry in its place * before the TLBs are flushed. */ - if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE)) + if (!tdp_mmu_set_spte_atomic(kvm, iter, SHADOW_REMOVED_SPTE)) return false; kvm_flush_remote_tlbs_with_address(kvm, iter->gfn, @@ -550,8 +550,16 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm, * special removed SPTE value. No bookkeeping is needed * here since the SPTE is going from non-present * to non-present. + * + * Set non-present value to shadow_init_value, rather than 0. + * It is because when TDX is enabled, TDX module always + * enables "EPT-violation #VE", so KVM needs to set + * "suppress #VE" bit in EPT table entries, in order to get + * real EPT violation, rather than TDVMCALL. KVM sets + * shadow_init_value (which sets "suppress #VE" bit) so it + * can be set when EPT table entries are zapped. */ - WRITE_ONCE(*rcu_dereference(iter->sptep), 0); + WRITE_ONCE(*rcu_dereference(iter->sptep), shadow_init_value); return true; } @@ -748,7 +756,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, continue; if (!shared) { - tdp_mmu_set_spte(kvm, &iter, 0); + /* see comments in tdp_mmu_zap_spte_atomic() */ + tdp_mmu_set_spte(kvm, &iter, shadow_init_value); flush = true; } else if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) { /* @@ -1135,7 +1144,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter, * invariant that the PFN of a present * leaf SPTE can never change. * See __handle_changed_spte(). */ - tdp_mmu_set_spte(kvm, iter, 0); + tdp_mmu_set_spte(kvm, iter, shadow_init_value); if (!pte_write(range->pte)) { new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,