Message ID | 20220723012325.1715714-4-seanjc@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86: Apply NX mitigation more precisely | expand |
On Sat, Jul 23, 2022 at 01:23:22AM +0000, Sean Christopherson wrote: > Set nx_huge_page_disallowed in TDP MMU shadow pages before making the SP > visible to other readers, i.e. before setting its SPTE. This will allow > KVM to query the flag when determining if a shadow page can be replaced > by a NX huge page without violating the rules of the mitigation. It took me a minute to figure out why the same change isn't needed in the shadow MMU (it always holds the write-lock so it's impossible for another CPU to see an SP without a correct nx_huge_page_disallowed. If you send a v2 can you add a short blurb to that effect here? Otherwise, Reviewed-by: David Matlack <dmatlack@google.com> > > Signed-off-by: Sean Christopherson <seanjc@google.com> > --- > arch/x86/kvm/mmu/mmu.c | 12 +++++------- > arch/x86/kvm/mmu/mmu_internal.h | 5 ++--- > arch/x86/kvm/mmu/tdp_mmu.c | 30 +++++++++++++++++------------- > 3 files changed, 24 insertions(+), 23 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 493cdf1c29ff..e9252e7cd5a2 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -802,8 +802,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) > kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); > } > > -static void untrack_possible_nx_huge_page(struct kvm *kvm, > - struct kvm_mmu_page *sp) > +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) > { > if (list_empty(&sp->possible_nx_huge_page_link)) > return; > @@ -812,15 +811,14 @@ static void untrack_possible_nx_huge_page(struct kvm *kvm, > list_del_init(&sp->possible_nx_huge_page_link); > } > > -void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) > +static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) > { > sp->nx_huge_page_disallowed = false; > > untrack_possible_nx_huge_page(kvm, sp); > } > > -static void track_possible_nx_huge_page(struct kvm *kvm, > - struct kvm_mmu_page *sp) > +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) > { > if (!list_empty(&sp->possible_nx_huge_page_link)) > return; > @@ -830,8 +828,8 @@ static void track_possible_nx_huge_page(struct kvm *kvm, > &kvm->arch.possible_nx_huge_pages); > } > > -void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, > - bool nx_huge_page_possible) > +static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, > + bool nx_huge_page_possible) > { > sp->nx_huge_page_disallowed = true; > > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h > index 83644a0167ab..2a887d08b722 100644 > --- a/arch/x86/kvm/mmu/mmu_internal.h > +++ b/arch/x86/kvm/mmu/mmu_internal.h > @@ -336,8 +336,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ > > void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); > > -void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, > - bool nx_huge_page_possible); > -void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); > +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); > +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); > > #endif /* __KVM_X86_MMU_INTERNAL_H */ > diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c > index a30983947fee..626c40ec2af9 100644 > --- a/arch/x86/kvm/mmu/tdp_mmu.c > +++ b/arch/x86/kvm/mmu/tdp_mmu.c > @@ -392,8 +392,10 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, > lockdep_assert_held_write(&kvm->mmu_lock); > > list_del(&sp->link); > - if (sp->nx_huge_page_disallowed) > - unaccount_nx_huge_page(kvm, sp); > + if (sp->nx_huge_page_disallowed) { > + sp->nx_huge_page_disallowed = false; > + untrack_possible_nx_huge_page(kvm, sp); > + } > > if (shared) > spin_unlock(&kvm->arch.tdp_mmu_pages_lock); > @@ -1111,16 +1113,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, > * @kvm: kvm instance > * @iter: a tdp_iter instance currently on the SPTE that should be set > * @sp: The new TDP page table to install. > - * @account_nx: True if this page table is being installed to split a > - * non-executable huge page. > * @shared: This operation is running under the MMU lock in read mode. > * > * Returns: 0 if the new page table was installed. Non-0 if the page table > * could not be installed (e.g. the atomic compare-exchange failed). > */ > static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, > - struct kvm_mmu_page *sp, bool account_nx, > - bool shared) > + struct kvm_mmu_page *sp, bool shared) > { > u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled()); > int ret = 0; > @@ -1135,8 +1134,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, > > spin_lock(&kvm->arch.tdp_mmu_pages_lock); > list_add(&sp->link, &kvm->arch.tdp_mmu_pages); > - if (account_nx) > - account_nx_huge_page(kvm, sp, true); > spin_unlock(&kvm->arch.tdp_mmu_pages_lock); > > return 0; > @@ -1149,6 +1146,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, > int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > { > struct kvm_mmu *mmu = vcpu->arch.mmu; > + struct kvm *kvm = vcpu->kvm; > struct tdp_iter iter; > struct kvm_mmu_page *sp; > int ret; > @@ -1185,9 +1183,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > } > > if (!is_shadow_present_pte(iter.old_spte)) { > - bool account_nx = fault->huge_page_disallowed && > - fault->req_level >= iter.level; > - > /* > * If SPTE has been frozen by another thread, just > * give up and retry, avoiding unnecessary page table > @@ -1199,10 +1194,19 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > sp = tdp_mmu_alloc_sp(vcpu); > tdp_mmu_init_child_sp(sp, &iter); > > - if (tdp_mmu_link_sp(vcpu->kvm, &iter, sp, account_nx, true)) { > + sp->nx_huge_page_disallowed = fault->huge_page_disallowed; > + > + if (tdp_mmu_link_sp(kvm, &iter, sp, true)) { > tdp_mmu_free_sp(sp); > break; > } > + > + if (fault->huge_page_disallowed && > + fault->req_level >= iter.level) { > + spin_lock(&kvm->arch.tdp_mmu_pages_lock); > + track_possible_nx_huge_page(kvm, sp); > + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); > + } > } > } > > @@ -1490,7 +1494,7 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, > * correctness standpoint since the translation will be the same either > * way. > */ > - ret = tdp_mmu_link_sp(kvm, iter, sp, false, shared); > + ret = tdp_mmu_link_sp(kvm, iter, sp, shared); > if (ret) > goto out; > > -- > 2.37.1.359.gd136c6c3e2-goog >
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 493cdf1c29ff..e9252e7cd5a2 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -802,8 +802,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); } -static void untrack_possible_nx_huge_page(struct kvm *kvm, - struct kvm_mmu_page *sp) +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { if (list_empty(&sp->possible_nx_huge_page_link)) return; @@ -812,15 +811,14 @@ static void untrack_possible_nx_huge_page(struct kvm *kvm, list_del_init(&sp->possible_nx_huge_page_link); } -void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) +static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { sp->nx_huge_page_disallowed = false; untrack_possible_nx_huge_page(kvm, sp); } -static void track_possible_nx_huge_page(struct kvm *kvm, - struct kvm_mmu_page *sp) +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { if (!list_empty(&sp->possible_nx_huge_page_link)) return; @@ -830,8 +828,8 @@ static void track_possible_nx_huge_page(struct kvm *kvm, &kvm->arch.possible_nx_huge_pages); } -void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, - bool nx_huge_page_possible) +static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + bool nx_huge_page_possible) { sp->nx_huge_page_disallowed = true; diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 83644a0167ab..2a887d08b722 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -336,8 +336,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); -void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, - bool nx_huge_page_possible); -void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); #endif /* __KVM_X86_MMU_INTERNAL_H */ diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index a30983947fee..626c40ec2af9 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -392,8 +392,10 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, lockdep_assert_held_write(&kvm->mmu_lock); list_del(&sp->link); - if (sp->nx_huge_page_disallowed) - unaccount_nx_huge_page(kvm, sp); + if (sp->nx_huge_page_disallowed) { + sp->nx_huge_page_disallowed = false; + untrack_possible_nx_huge_page(kvm, sp); + } if (shared) spin_unlock(&kvm->arch.tdp_mmu_pages_lock); @@ -1111,16 +1113,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, * @kvm: kvm instance * @iter: a tdp_iter instance currently on the SPTE that should be set * @sp: The new TDP page table to install. - * @account_nx: True if this page table is being installed to split a - * non-executable huge page. * @shared: This operation is running under the MMU lock in read mode. * * Returns: 0 if the new page table was installed. Non-0 if the page table * could not be installed (e.g. the atomic compare-exchange failed). */ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, - struct kvm_mmu_page *sp, bool account_nx, - bool shared) + struct kvm_mmu_page *sp, bool shared) { u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled()); int ret = 0; @@ -1135,8 +1134,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_add(&sp->link, &kvm->arch.tdp_mmu_pages); - if (account_nx) - account_nx_huge_page(kvm, sp, true); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); return 0; @@ -1149,6 +1146,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_mmu *mmu = vcpu->arch.mmu; + struct kvm *kvm = vcpu->kvm; struct tdp_iter iter; struct kvm_mmu_page *sp; int ret; @@ -1185,9 +1183,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) } if (!is_shadow_present_pte(iter.old_spte)) { - bool account_nx = fault->huge_page_disallowed && - fault->req_level >= iter.level; - /* * If SPTE has been frozen by another thread, just * give up and retry, avoiding unnecessary page table @@ -1199,10 +1194,19 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) sp = tdp_mmu_alloc_sp(vcpu); tdp_mmu_init_child_sp(sp, &iter); - if (tdp_mmu_link_sp(vcpu->kvm, &iter, sp, account_nx, true)) { + sp->nx_huge_page_disallowed = fault->huge_page_disallowed; + + if (tdp_mmu_link_sp(kvm, &iter, sp, true)) { tdp_mmu_free_sp(sp); break; } + + if (fault->huge_page_disallowed && + fault->req_level >= iter.level) { + spin_lock(&kvm->arch.tdp_mmu_pages_lock); + track_possible_nx_huge_page(kvm, sp); + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); + } } } @@ -1490,7 +1494,7 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, * correctness standpoint since the translation will be the same either * way. */ - ret = tdp_mmu_link_sp(kvm, iter, sp, false, shared); + ret = tdp_mmu_link_sp(kvm, iter, sp, shared); if (ret) goto out;
Set nx_huge_page_disallowed in TDP MMU shadow pages before making the SP visible to other readers, i.e. before setting its SPTE. This will allow KVM to query the flag when determining if a shadow page can be replaced by a NX huge page without violating the rules of the mitigation. Signed-off-by: Sean Christopherson <seanjc@google.com> --- arch/x86/kvm/mmu/mmu.c | 12 +++++------- arch/x86/kvm/mmu/mmu_internal.h | 5 ++--- arch/x86/kvm/mmu/tdp_mmu.c | 30 +++++++++++++++++------------- 3 files changed, 24 insertions(+), 23 deletions(-)