[1/6] KVM: x86/mmu: Tag disallowed NX huge pages even if they're not tracked

Message ID	20220409003847.819686-2-seanjc@google.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> Reply-To: Sean Christopherson <seanjc@google.com> Date: Sat, 9 Apr 2022 00:38:42 +0000 In-Reply-To: <20220409003847.819686-1-seanjc@google.com> Message-Id: <20220409003847.819686-2-seanjc@google.com> Mime-Version: 1.0 References: <20220409003847.819686-1-seanjc@google.com> Subject: [PATCH 1/6] KVM: x86/mmu: Tag disallowed NX huge pages even if they're not tracked From: Sean Christopherson <seanjc@google.com> To: Paolo Bonzini <pbonzini@redhat.com> Cc: Sean Christopherson <seanjc@google.com>, Vitaly Kuznetsov <vkuznets@redhat.com>, Wanpeng Li <wanpengli@tencent.com>, Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>, kvm@vger.kernel.org, linux-kernel@vger.kernel.org, Mingwei Zhang <mizhang@google.com> Content-Type: text/plain; charset="UTF-8" Precedence: bulk
Series	KVM: x86: Apply NX mitigation more precisely \| expand [0/6] KVM: x86: Apply NX mitigation more precisely [1/6] KVM: x86/mmu: Tag disallowed NX huge pages even if they're not tracked [2/6] KVM: x86/mmu: Properly account NX huge page workaround for nonpaging MMUs [3/6] KVM: x86/mmu: Set disallowed_nx_huge_page in TDP MMU before setting SPTE [4/6] KVM: x86/mmu: Track the number of TDP MMU pages, but not the actual pages [5/6] KVM: x86/mmu: Add helper to convert SPTE value to its shadow page [6/6] KVM: x86/mmu: explicitly check nx_hugepage in disallowed_hugepage_adjust()

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2c20f715f009..e4f7e7998928 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1063,7 +1063,7 @@ struct kvm_arch { struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; - struct list_head lpage_disallowed_mmu_pages; + struct list_head possible_nx_huge_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; /* @@ -1219,8 +1219,8 @@ struct kvm_arch { * - tdp_mmu_roots (above) * - tdp_mmu_pages (above) * - the link field of struct kvm_mmu_pages used by the TDP MMU - * - lpage_disallowed_mmu_pages - * - the lpage_disallowed_link field of struct kvm_mmu_pages used + * - possible_nx_huge_pages; + * - the possible_nx_huge_page_link field of struct kvm_mmu_pages used * by the TDP MMU * It is acceptable, but not necessary, to acquire this lock when * the thread holds the MMU lock in write mode. diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 69a30d6d1e2b..d230d2d78ace 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -809,15 +809,43 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_disallow_lpage(slot, gfn); } -void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +static void untrack_possible_nx_huge_page(struct kvm *kvm, + struct kvm_mmu_page *sp) { - if (sp->lpage_disallowed) + if (list_empty(&sp->possible_nx_huge_page_link)) + return; + + --kvm->stat.nx_lpage_splits; + list_del_init(&sp->possible_nx_huge_page_link); +} + +void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + sp->nx_huge_page_disallowed = false; + + untrack_possible_nx_huge_page(kvm, sp); +} + +static void track_possible_nx_huge_page(struct kvm *kvm, + struct kvm_mmu_page *sp) +{ + if (!list_empty(&sp->possible_nx_huge_page_link)) return; ++kvm->stat.nx_lpage_splits; - list_add_tail(&sp->lpage_disallowed_link, - &kvm->arch.lpage_disallowed_mmu_pages); - sp->lpage_disallowed = true; + list_add_tail(&sp->possible_nx_huge_page_link, + &kvm->arch.possible_nx_huge_pages); +} + +void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + bool nx_huge_page_possible) +{ + sp->nx_huge_page_disallowed = true; + + if (!nx_huge_page_possible) + untrack_possible_nx_huge_page(kvm, sp); + else + track_possible_nx_huge_page(kvm, sp); } static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) @@ -837,13 +865,6 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_allow_lpage(slot, gfn); } -void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) -{ - --kvm->stat.nx_lpage_splits; - sp->lpage_disallowed = false; - list_del(&sp->lpage_disallowed_link); -} - static struct kvm_memory_slot * gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) @@ -1713,6 +1734,8 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct sp->gfns = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_gfn_array_cache); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); + INIT_LIST_HEAD(&sp->possible_nx_huge_page_link); + /* * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages() * depends on valid pages being added to the head of the list. See @@ -2352,8 +2375,8 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, zapped_root = !is_obsolete_sp(kvm, sp); } - if (sp->lpage_disallowed) - unaccount_huge_nx_page(kvm, sp); + if (sp->nx_huge_page_disallowed) + unaccount_nx_huge_page(kvm, sp); sp->role.invalid = 1; @@ -2931,9 +2954,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) it.level - 1, true, ACC_ALL); link_shadow_page(vcpu, it.sptep, sp); - if (fault->is_tdp && fault->huge_page_disallowed && - fault->req_level >= it.level) - account_huge_nx_page(vcpu->kvm, sp); + if (fault->is_tdp && fault->huge_page_disallowed) + account_nx_huge_page(vcpu->kvm, sp, + fault->req_level >= it.level); } if (WARN_ON_ONCE(it.level != fault->goal_level)) @@ -5717,7 +5740,7 @@ int kvm_mmu_init_vm(struct kvm *kvm) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); - INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); r = kvm_mmu_init_tdp_mmu(kvm); @@ -6328,23 +6351,25 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) ratio = READ_ONCE(nx_huge_pages_recovery_ratio); to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0; for ( ; to_zap; --to_zap) { - if (list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) + if (list_empty(&kvm->arch.possible_nx_huge_pages)) break; /* * We use a separate list instead of just using active_mmu_pages - * because the number of lpage_disallowed pages is expected to - * be relatively small compared to the total. + * because the number of shadow pages that can be replaced with + * an NX huge page is expected to be relatively small compared + * to the total number of shadow pages. And because the TDP MMU + * doesn't use active_mmu_pages. */ - sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages, + sp = list_first_entry(&kvm->arch.possible_nx_huge_pages, struct kvm_mmu_page, - lpage_disallowed_link); - WARN_ON_ONCE(!sp->lpage_disallowed); + possible_nx_huge_page_link); + WARN_ON_ONCE(!sp->nx_huge_page_disallowed); if (is_tdp_mmu_page(sp)) { flush |= kvm_tdp_mmu_zap_sp(kvm, sp); } else { kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); - WARN_ON_ONCE(sp->lpage_disallowed); + WARN_ON_ONCE(sp->nx_huge_page_disallowed); } if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 1bff453f7cbe..5c460c727407 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -43,7 +43,13 @@ struct kvm_mmu_page { bool tdp_mmu_page; bool unsync; u8 mmu_valid_gen; - bool lpage_disallowed; /* Can't be replaced by an equiv large page */ + + /* + * The shadow page can't be replaced by an equivalent huge page + * because it is being used to map an executable page in the guest + * and the NX huge page mitigation is enabled. + */ + bool nx_huge_page_disallowed; /* * The following two entries are used to key the shadow page in the @@ -73,7 +79,14 @@ struct kvm_mmu_page { }; }; - struct list_head lpage_disallowed_link; + /* + * Use to track shadow pages that, if zapped, would allow KVM to create + * an NX huge page. A shadow page will have nx_huge_page_disallowed + * set but not be on the list if a huge page is disallowed for other + * reasons, e.g. because KVM is shadowing a PTE at the same gfn, the + * memslot isn't properly aligned, etc... + */ + struct list_head possible_nx_huge_page_link; #ifdef CONFIG_X86_32 /* * Used out of the mmu-lock to avoid reading spte values while an @@ -168,7 +181,8 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); -void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp); -void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp); +void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + bool nx_huge_page_possible); +void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); #endif /* __KVM_X86_MMU_INTERNAL_H */ diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 66f1acf153c4..6c4549454a14 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -708,9 +708,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, sp = kvm_mmu_get_page(vcpu, base_gfn, fault->addr, it.level - 1, true, direct_access); link_shadow_page(vcpu, it.sptep, sp); - if (fault->huge_page_disallowed && - fault->req_level >= it.level) - account_huge_nx_page(vcpu->kvm, sp); + if (fault->huge_page_disallowed) + account_nx_huge_page(vcpu->kvm, sp, + fault->req_level >= it.level); } } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 566548a3efa7..7f949d48724b 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -284,6 +284,8 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu) static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep, gfn_t gfn, union kvm_mmu_page_role role) { + INIT_LIST_HEAD(&sp->possible_nx_huge_page_link); + set_page_private(virt_to_page(sp->spt), (unsigned long)sp); sp->role = role; @@ -390,8 +392,8 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, lockdep_assert_held_write(&kvm->mmu_lock); list_del(&sp->link); - if (sp->lpage_disallowed) - unaccount_huge_nx_page(kvm, sp); + if (sp->nx_huge_page_disallowed) + unaccount_nx_huge_page(kvm, sp); if (shared) spin_unlock(&kvm->arch.tdp_mmu_pages_lock); @@ -1125,7 +1127,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_add(&sp->link, &kvm->arch.tdp_mmu_pages); if (account_nx) - account_huge_nx_page(kvm, sp); + account_nx_huge_page(kvm, sp, true); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); return 0;

[1/6] KVM: x86/mmu: Tag disallowed NX huge pages even if they're not tracked

Commit Message

Patch