Message ID | 8907eadab058300fa6ba7943036ad638b41ee0ef.1646422845.git.isaku.yamahata@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM TDX basic feature support | expand |
On 3/4/22 20:49, isaku.yamahata@intel.com wrote: > static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, > + bool private_spte, > u64 old_spte, u64 new_spte, int level) > { > bool pfn_changed; > struct kvm_memory_slot *slot; > > + /* > + * TDX doesn't support live migration. Never mark private page as > + * dirty in log-dirty bitmap, since it's not possible for userspace > + * hypervisor to live migrate private page anyway. > + */ > + if (private_spte) > + return; This should not be needed, patch 35 will block it anyway because kvm_slot_dirty_track_enabled() will return false. > @@ -1215,7 +1227,23 @@ static __always_inline bool kvm_tdp_mmu_handle_gfn(struct kvm *kvm, > * into this helper allow blocking; it'd be dead, wasteful code. > */ > for_each_tdp_mmu_root(kvm, root, range->slot->as_id) { > - tdp_root_for_each_leaf_pte(iter, root, range->start, range->end) > + /* > + * For TDX shared mapping, set GFN shared bit to the range, > + * so the handler() doesn't need to set it, to avoid duplicated > + * code in multiple handler()s. > + */ > + gfn_t start; > + gfn_t end; > + > + if (is_private_sp(root)) { > + start = kvm_gfn_private(kvm, range->start); > + end = kvm_gfn_private(kvm, range->end); > + } else { > + start = kvm_gfn_shared(kvm, range->start); > + end = kvm_gfn_shared(kvm, range->end); > + } I think this could be a separate function kvm_gfn_for_root(kvm, root, ...). > @@ -1237,6 +1265,15 @@ static bool age_gfn_range(struct kvm *kvm, struct tdp_iter *iter, > if (!is_accessed_spte(iter->old_spte)) > return false; > > + /* > + * First TDX generation doesn't support clearing A bit for private > + * mapping, since there's no secure EPT API to support it. However > + * it's a legitimate request for TDX guest, so just return w/o a > + * WARN(). > + */ > + if (is_private_spte(iter->sptep)) > + return false; Please add instead a "bool only_shared" argument to kvm_tdp_mmu_handle_gfn, since you can check "only_shared && is_private_sp(root)" just once (instead of checking it once per PTE). > new_spte = iter->old_spte; > > if (spte_ad_enabled(new_spte)) { > @@ -1281,6 +1318,13 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter, > /* Huge pages aren't expected to be modified without first being zapped. */ > WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end); > > + /* > + * .change_pte() callback should not happen for private page, because > + * for now TDX private pages are pinned during VM's life time. > + */ > + if (WARN_ON(is_private_spte(iter->sptep))) > + return false; > + > if (iter->level != PG_LEVEL_4K || > !is_shadow_present_pte(iter->old_spte)) > return false; > @@ -1332,6 +1376,16 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, > u64 new_spte; > bool spte_set = false; > > + /* > + * First TDX generation doesn't support write protecting private > + * mappings, since there's no secure EPT API to support it. It > + * is a bug to reach here for TDX guest. > + */ > + if (WARN_ON(is_private_sp(root))) > + return spte_set; Isn't this function unreachable even for the shared address range? If so, this WARN should be in kvm_tdp_mmu_wrprot_slot, and also it should check if !kvm_arch_dirty_log_supported(kvm). > + start = kvm_gfn_shared(kvm, start); > + end = kvm_gfn_shared(kvm, end); ... and then these two lines are unnecessary. > rcu_read_lock(); > > BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL); > @@ -1398,6 +1452,16 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, > u64 new_spte; > bool spte_set = false; > > + /* > + * First TDX generation doesn't support clearing dirty bit, > + * since there's no secure EPT API to support it. It is a > + * bug to reach here for TDX guest. > + */ > + if (WARN_ON(is_private_sp(root))) > + return spte_set; Same here, can you check it in kvm_tdp_mmu_clear_dirty_slot? > + start = kvm_gfn_shared(kvm, start); > + end = kvm_gfn_shared(kvm, end); Same here. > rcu_read_lock(); > > tdp_root_for_each_leaf_pte(iter, root, start, end) { > @@ -1467,6 +1531,15 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, > struct tdp_iter iter; > u64 new_spte; > > + /* > + * First TDX generation doesn't support clearing dirty bit, > + * since there's no secure EPT API to support it. It is a > + * bug to reach here for TDX guest. > + */ > + if (WARN_ON(is_private_sp(root))) > + return; IIRC this is reachable from userspace, so WARN_ON is not possible. But, again please check if (!kvm_arch_dirty_log_supported(kvm)) return; in kvm_tdp_mmu_clear_dirty_pt_masked. > + gfn = kvm_gfn_shared(kvm, gfn); Also unnecessary, I think. > rcu_read_lock(); > > tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask), > @@ -1530,6 +1603,16 @@ static void zap_collapsible_spte_range(struct kvm *kvm, > struct tdp_iter iter; > kvm_pfn_t pfn; > > + /* > + * This should only be reachable in case of log-dirty, which TD > + * private mapping doesn't support so far. Give a WARN() if it > + * hits private mapping. > + */ > + if (WARN_ON(is_private_sp(root))) > + return; > + start = kvm_gfn_shared(kvm, start); > + end = kvm_gfn_shared(kvm, end); I think this is not a big deal and you can leave it as is. Alternatively, please move the WARN to kvm_tdp_mmu_zap_collapsible_sptes(). It is also only reachable if you can set KVM_MEM_LOG_DIRTY_PAGES in the first place. Paolo > rcu_read_lock(); > > tdp_root_for_each_pte(iter, root, start, end) { > @@ -1543,8 +1626,9 @@ static void zap_collapsible_spte_range(struct kvm *kvm, > > pfn = spte_to_pfn(iter.old_spte); > if (kvm_is_reserved_pfn(pfn) || > - iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn, > - pfn, PG_LEVEL_NUM)) > + iter.level >= kvm_mmu_max_mapping_level(kvm, slot, > + tdp_iter_gfn_unalias(kvm, &iter), pfn, > + PG_LEVEL_NUM)) > continue; > > /* Note, a successful atomic zap also does a remote TLB flush. */ > @@ -1590,6 +1674,14 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, > > BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL); > > + /* > + * First TDX generation doesn't support write protecting private > + * mappings, since there's no secure EPT API to support it. It > + * is a bug to reach here for TDX guest. > + */ > + if (WARN_ON(is_private_sp(root))) > + return spte_set; > + > rcu_read_lock(); > > for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index acba2590b51e..1949f81027a0 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -257,11 +257,20 @@ static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level) } static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, + bool private_spte, u64 old_spte, u64 new_spte, int level) { bool pfn_changed; struct kvm_memory_slot *slot; + /* + * TDX doesn't support live migration. Never mark private page as + * dirty in log-dirty bitmap, since it's not possible for userspace + * hypervisor to live migrate private page anyway. + */ + if (private_spte) + return; + if (level > PG_LEVEL_4K) return; @@ -269,6 +278,8 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, if ((!is_writable_pte(old_spte) || pfn_changed) && is_writable_pte(new_spte)) { + /* For memory slot operations, use GFN without aliasing */ + gfn = kvm_gfn_unalias(kvm, gfn); slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn); mark_page_dirty_in_slot(kvm, slot, gfn); } @@ -547,7 +558,7 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, __handle_changed_spte(kvm, as_id, gfn, private_spte, old_spte, new_spte, level, shared); handle_changed_spte_acc_track(old_spte, new_spte, level); - handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte, + handle_changed_spte_dirty_log(kvm, as_id, gfn, private_spte, old_spte, new_spte, level); } @@ -678,6 +689,7 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, iter->level); if (record_dirty_log) handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn, + is_private_spte(iter->sptep), iter->old_spte, new_spte, iter->level); } @@ -1215,7 +1227,23 @@ static __always_inline bool kvm_tdp_mmu_handle_gfn(struct kvm *kvm, * into this helper allow blocking; it'd be dead, wasteful code. */ for_each_tdp_mmu_root(kvm, root, range->slot->as_id) { - tdp_root_for_each_leaf_pte(iter, root, range->start, range->end) + /* + * For TDX shared mapping, set GFN shared bit to the range, + * so the handler() doesn't need to set it, to avoid duplicated + * code in multiple handler()s. + */ + gfn_t start; + gfn_t end; + + if (is_private_sp(root)) { + start = kvm_gfn_private(kvm, range->start); + end = kvm_gfn_private(kvm, range->end); + } else { + start = kvm_gfn_shared(kvm, range->start); + end = kvm_gfn_shared(kvm, range->end); + } + + tdp_root_for_each_leaf_pte(iter, root, start, end) ret |= handler(kvm, &iter, range); } @@ -1237,6 +1265,15 @@ static bool age_gfn_range(struct kvm *kvm, struct tdp_iter *iter, if (!is_accessed_spte(iter->old_spte)) return false; + /* + * First TDX generation doesn't support clearing A bit for private + * mapping, since there's no secure EPT API to support it. However + * it's a legitimate request for TDX guest, so just return w/o a + * WARN(). + */ + if (is_private_spte(iter->sptep)) + return false; + new_spte = iter->old_spte; if (spte_ad_enabled(new_spte)) { @@ -1281,6 +1318,13 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter, /* Huge pages aren't expected to be modified without first being zapped. */ WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end); + /* + * .change_pte() callback should not happen for private page, because + * for now TDX private pages are pinned during VM's life time. + */ + if (WARN_ON(is_private_spte(iter->sptep))) + return false; + if (iter->level != PG_LEVEL_4K || !is_shadow_present_pte(iter->old_spte)) return false; @@ -1332,6 +1376,16 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, u64 new_spte; bool spte_set = false; + /* + * First TDX generation doesn't support write protecting private + * mappings, since there's no secure EPT API to support it. It + * is a bug to reach here for TDX guest. + */ + if (WARN_ON(is_private_sp(root))) + return spte_set; + start = kvm_gfn_shared(kvm, start); + end = kvm_gfn_shared(kvm, end); + rcu_read_lock(); BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL); @@ -1398,6 +1452,16 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, u64 new_spte; bool spte_set = false; + /* + * First TDX generation doesn't support clearing dirty bit, + * since there's no secure EPT API to support it. It is a + * bug to reach here for TDX guest. + */ + if (WARN_ON(is_private_sp(root))) + return spte_set; + start = kvm_gfn_shared(kvm, start); + end = kvm_gfn_shared(kvm, end); + rcu_read_lock(); tdp_root_for_each_leaf_pte(iter, root, start, end) { @@ -1467,6 +1531,15 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, struct tdp_iter iter; u64 new_spte; + /* + * First TDX generation doesn't support clearing dirty bit, + * since there's no secure EPT API to support it. It is a + * bug to reach here for TDX guest. + */ + if (WARN_ON(is_private_sp(root))) + return; + gfn = kvm_gfn_shared(kvm, gfn); + rcu_read_lock(); tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask), @@ -1530,6 +1603,16 @@ static void zap_collapsible_spte_range(struct kvm *kvm, struct tdp_iter iter; kvm_pfn_t pfn; + /* + * This should only be reachable in case of log-dirty, which TD + * private mapping doesn't support so far. Give a WARN() if it + * hits private mapping. + */ + if (WARN_ON(is_private_sp(root))) + return; + start = kvm_gfn_shared(kvm, start); + end = kvm_gfn_shared(kvm, end); + rcu_read_lock(); tdp_root_for_each_pte(iter, root, start, end) { @@ -1543,8 +1626,9 @@ static void zap_collapsible_spte_range(struct kvm *kvm, pfn = spte_to_pfn(iter.old_spte); if (kvm_is_reserved_pfn(pfn) || - iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn, - pfn, PG_LEVEL_NUM)) + iter.level >= kvm_mmu_max_mapping_level(kvm, slot, + tdp_iter_gfn_unalias(kvm, &iter), pfn, + PG_LEVEL_NUM)) continue; /* Note, a successful atomic zap also does a remote TLB flush. */ @@ -1590,6 +1674,14 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL); + /* + * First TDX generation doesn't support write protecting private + * mappings, since there's no secure EPT API to support it. It + * is a bug to reach here for TDX guest. + */ + if (WARN_ON(is_private_sp(root))) + return spte_set; + rcu_read_lock(); for_each_tdp_pte_min_level(iter, root->spt, root->role.level,