diff mbox series

[RFC,10/37] KVM: MMU: Move struct kvm_page_fault to common code

Message ID 20221208193857.4090582-11-dmatlack@google.com (mailing list archive)
State Handled Elsewhere
Headers show
Series KVM: Refactor the KVM/x86 TDP MMU into common code | expand

Commit Message

David Matlack Dec. 8, 2022, 7:38 p.m. UTC
Move struct kvm_page_fault to common code. This will be used in a future
commit to move the TDP MMU to common code.

No functional change intended.

Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/include/asm/kvm/mmu_types.h | 20 +++++++
 arch/x86/kvm/mmu/mmu.c               | 19 +++----
 arch/x86/kvm/mmu/mmu_internal.h      | 79 ++++++----------------------
 arch/x86/kvm/mmu/mmutrace.h          |  2 +-
 arch/x86/kvm/mmu/paging_tmpl.h       | 14 ++---
 arch/x86/kvm/mmu/tdp_mmu.c           |  6 +--
 include/kvm/mmu_types.h              | 44 ++++++++++++++++
 7 files changed, 100 insertions(+), 84 deletions(-)

Comments

Ben Gardon Dec. 12, 2022, 6:24 p.m. UTC | #1
On Thu, Dec 8, 2022 at 11:39 AM David Matlack <dmatlack@google.com> wrote:
>
> Move struct kvm_page_fault to common code. This will be used in a future
> commit to move the TDP MMU to common code.
>
> No functional change intended.
>
> Signed-off-by: David Matlack <dmatlack@google.com>
> ---
>  arch/x86/include/asm/kvm/mmu_types.h | 20 +++++++
>  arch/x86/kvm/mmu/mmu.c               | 19 +++----
>  arch/x86/kvm/mmu/mmu_internal.h      | 79 ++++++----------------------
>  arch/x86/kvm/mmu/mmutrace.h          |  2 +-
>  arch/x86/kvm/mmu/paging_tmpl.h       | 14 ++---
>  arch/x86/kvm/mmu/tdp_mmu.c           |  6 +--
>  include/kvm/mmu_types.h              | 44 ++++++++++++++++
>  7 files changed, 100 insertions(+), 84 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm/mmu_types.h b/arch/x86/include/asm/kvm/mmu_types.h
> index affcb520b482..59d1be85f4b7 100644
> --- a/arch/x86/include/asm/kvm/mmu_types.h
> +++ b/arch/x86/include/asm/kvm/mmu_types.h
> @@ -5,6 +5,7 @@
>  #include <linux/bitmap.h>
>  #include <linux/list.h>
>  #include <linux/types.h>
> +#include <linux/kvm_types.h>
>
>  /*
>   * This is a subset of the overall kvm_cpu_role to minimize the size of
> @@ -115,4 +116,23 @@ struct kvm_mmu_page_arch {
>         atomic_t write_flooding_count;
>  };
>
> +struct kvm_page_fault_arch {
> +       const u32 error_code;
> +
> +       /* x86-specific error code bits */
> +       const bool present;
> +       const bool rsvd;
> +       const bool user;
> +
> +       /* Derived from mmu and global state.  */
> +       const bool is_tdp;
> +       const bool nx_huge_page_workaround_enabled;
> +
> +       /*
> +        * Whether a >4KB mapping can be created or is forbidden due to NX
> +        * hugepages.
> +        */
> +       bool huge_page_disallowed;
> +};
> +
>  #endif /* !__ASM_KVM_MMU_TYPES_H */
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e47f35878ab5..0593d4a60139 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3092,7 +3092,8 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>         struct kvm_memory_slot *slot = fault->slot;
>         kvm_pfn_t mask;
>
> -       fault->huge_page_disallowed = fault->exec && fault->nx_huge_page_workaround_enabled;
> +       fault->arch.huge_page_disallowed =
> +               fault->exec && fault->arch.nx_huge_page_workaround_enabled;
>
>         if (unlikely(fault->max_level == PG_LEVEL_4K))
>                 return;
> @@ -3109,7 +3110,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>          */
>         fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
>                                                      fault->gfn, fault->max_level);
> -       if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
> +       if (fault->req_level == PG_LEVEL_4K || fault->arch.huge_page_disallowed)
>                 return;
>
>         /*
> @@ -3158,7 +3159,7 @@ static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>                  * We cannot overwrite existing page tables with an NX
>                  * large page, as the leaf could be executable.
>                  */
> -               if (fault->nx_huge_page_workaround_enabled)
> +               if (fault->arch.nx_huge_page_workaround_enabled)
>                         disallowed_hugepage_adjust(fault, *it.sptep, it.level);
>
>                 base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
> @@ -3170,7 +3171,7 @@ static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>                         continue;
>
>                 link_shadow_page(vcpu, it.sptep, sp);
> -               if (fault->huge_page_disallowed)
> +               if (fault->arch.huge_page_disallowed)
>                         account_nx_huge_page(vcpu->kvm, sp,
>                                              fault->req_level >= it.level);
>         }
> @@ -3221,7 +3222,7 @@ static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu,
>                                    struct kvm_page_fault *fault,
>                                    unsigned int access)
>  {
> -       gva_t gva = fault->is_tdp ? 0 : fault->addr;
> +       gva_t gva = fault->arch.is_tdp ? 0 : fault->addr;
>
>         vcpu_cache_mmio_info(vcpu, gva, fault->gfn,
>                              access & shadow_mmio_access_mask);
> @@ -3255,7 +3256,7 @@ static bool page_fault_can_be_fast(struct kvm_page_fault *fault)
>          * generation number.  Refreshing the MMIO generation needs to go down
>          * the slow path.  Note, EPT Misconfigs do NOT set the PRESENT flag!
>          */
> -       if (fault->rsvd)
> +       if (fault->arch.rsvd)
>                 return false;
>
>         /*
> @@ -3273,7 +3274,7 @@ static bool page_fault_can_be_fast(struct kvm_page_fault *fault)
>          *    SPTE is MMU-writable (determined later), the fault can be fixed
>          *    by setting the Writable bit, which can be done out of mmu_lock.
>          */
> -       if (!fault->present)
> +       if (!fault->arch.present)
>                 return !kvm_ad_enabled();
>
>         /*
> @@ -4119,10 +4120,10 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
>  static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
>                                          struct kvm_page_fault *fault)
>  {
> -       if (unlikely(fault->rsvd))
> +       if (unlikely(fault->arch.rsvd))
>                 return false;
>
> -       if (!fault->present || !fault->write)
> +       if (!fault->arch.present || !fault->write)
>                 return false;
>
>         /*
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index af2ae4887e35..4abb80a3bd01 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -77,60 +77,6 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
>         return READ_ONCE(nx_huge_pages) && !kvm->arch.disable_nx_huge_pages;
>  }
>
> -struct kvm_page_fault {
> -       /* arguments to kvm_mmu_do_page_fault.  */
> -       const gpa_t addr;
> -       const u32 error_code;
> -       const bool prefetch;
> -
> -       /* Derived from error_code.  */
> -       const bool exec;
> -       const bool write;
> -       const bool present;
> -       const bool rsvd;
> -       const bool user;
> -
> -       /* Derived from mmu and global state.  */
> -       const bool is_tdp;
> -       const bool nx_huge_page_workaround_enabled;
> -
> -       /*
> -        * Whether a >4KB mapping can be created or is forbidden due to NX
> -        * hugepages.
> -        */
> -       bool huge_page_disallowed;
> -
> -       /*
> -        * Maximum page size that can be created for this fault; input to
> -        * FNAME(fetch), direct_map() and kvm_tdp_mmu_map().
> -        */
> -       u8 max_level;
> -
> -       /*
> -        * Page size that can be created based on the max_level and the
> -        * page size used by the host mapping.
> -        */
> -       u8 req_level;
> -
> -       /*
> -        * Page size that will be created based on the req_level and
> -        * huge_page_disallowed.
> -        */
> -       u8 goal_level;
> -
> -       /* Shifted addr, or result of guest page table walk if addr is a gva.  */
> -       gfn_t gfn;
> -
> -       /* The memslot containing gfn. May be NULL. */
> -       struct kvm_memory_slot *slot;
> -
> -       /* Outputs of kvm_faultin_pfn.  */
> -       unsigned long mmu_seq;
> -       kvm_pfn_t pfn;
> -       hva_t hva;
> -       bool map_writable;
> -};
> -
>  int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
>
>  /*
> @@ -164,22 +110,27 @@ enum {
>  static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>                                         u32 err, bool prefetch)
>  {
> +       bool is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault);
>         struct kvm_page_fault fault = {
>                 .addr = cr2_or_gpa,
> -               .error_code = err,
> -               .exec = err & PFERR_FETCH_MASK,
> -               .write = err & PFERR_WRITE_MASK,
> -               .present = err & PFERR_PRESENT_MASK,
> -               .rsvd = err & PFERR_RSVD_MASK,
> -               .user = err & PFERR_USER_MASK,
>                 .prefetch = prefetch,
> -               .is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault),
> -               .nx_huge_page_workaround_enabled =
> -                       is_nx_huge_page_enabled(vcpu->kvm),
> +
> +               .write = err & PFERR_WRITE_MASK,
> +               .exec = err & PFERR_FETCH_MASK,
>
>                 .max_level = KVM_MAX_HUGEPAGE_LEVEL,
>                 .req_level = PG_LEVEL_4K,
>                 .goal_level = PG_LEVEL_4K,
> +
> +               .arch = {
> +                       .error_code = err,
> +                       .present = err & PFERR_PRESENT_MASK,
> +                       .rsvd = err & PFERR_RSVD_MASK,
> +                       .user = err & PFERR_USER_MASK,
> +                       .is_tdp = is_tdp,
> +                       .nx_huge_page_workaround_enabled =
> +                               is_nx_huge_page_enabled(vcpu->kvm),
> +               },
>         };
>         int r;
>
> @@ -196,7 +147,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>         if (!prefetch)
>                 vcpu->stat.pf_taken++;
>
> -       if (IS_ENABLED(CONFIG_RETPOLINE) && fault.is_tdp)
> +       if (IS_ENABLED(CONFIG_RETPOLINE) && fault.arch.is_tdp)
>                 r = kvm_tdp_page_fault(vcpu, &fault);
>         else
>                 r = vcpu->arch.mmu->page_fault(vcpu, &fault);
> diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
> index 335f26dabdf3..b01767acf073 100644
> --- a/arch/x86/kvm/mmu/mmutrace.h
> +++ b/arch/x86/kvm/mmu/mmutrace.h
> @@ -270,7 +270,7 @@ TRACE_EVENT(
>         TP_fast_assign(
>                 __entry->vcpu_id = vcpu->vcpu_id;
>                 __entry->cr2_or_gpa = fault->addr;
> -               __entry->error_code = fault->error_code;
> +               __entry->error_code = fault->arch.error_code;
>                 __entry->sptep = sptep;
>                 __entry->old_spte = old_spte;
>                 __entry->new_spte = *sptep;
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 18bb92b70a01..daf9c7731edc 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -698,7 +698,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
>                  * We cannot overwrite existing page tables with an NX
>                  * large page, as the leaf could be executable.
>                  */
> -               if (fault->nx_huge_page_workaround_enabled)
> +               if (fault->arch.nx_huge_page_workaround_enabled)
>                         disallowed_hugepage_adjust(fault, *it.sptep, it.level);
>
>                 base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
> @@ -713,7 +713,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
>                         continue;
>
>                 link_shadow_page(vcpu, it.sptep, sp);
> -               if (fault->huge_page_disallowed)
> +               if (fault->arch.huge_page_disallowed)
>                         account_nx_huge_page(vcpu->kvm, sp,
>                                              fault->req_level >= it.level);
>         }
> @@ -793,8 +793,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>         int r;
>         bool is_self_change_mapping;
>
> -       pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
> -       WARN_ON_ONCE(fault->is_tdp);
> +       pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->arch.error_code);
> +       WARN_ON_ONCE(fault->arch.is_tdp);
>
>         /*
>          * Look up the guest pte for the faulting address.
> @@ -802,7 +802,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>          * The bit needs to be cleared before walking guest page tables.
>          */
>         r = FNAME(walk_addr)(&walker, vcpu, fault->addr,
> -                            fault->error_code & ~PFERR_RSVD_MASK);
> +                            fault->arch.error_code & ~PFERR_RSVD_MASK);
>
>         /*
>          * The page is not mapped by the guest.  Let the guest handle it.
> @@ -830,7 +830,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>         vcpu->arch.write_fault_to_shadow_pgtable = false;
>
>         is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
> -             &walker, fault->user, &vcpu->arch.write_fault_to_shadow_pgtable);
> +             &walker, fault->arch.user, &vcpu->arch.write_fault_to_shadow_pgtable);
>
>         if (is_self_change_mapping)
>                 fault->max_level = PG_LEVEL_4K;
> @@ -846,7 +846,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>          * we will cache the incorrect access into mmio spte.
>          */
>         if (fault->write && !(walker.pte_access & ACC_WRITE_MASK) &&
> -           !is_cr0_wp(vcpu->arch.mmu) && !fault->user && fault->slot) {
> +           !is_cr0_wp(vcpu->arch.mmu) && !fault->arch.user && fault->slot) {
>                 walker.pte_access |= ACC_WRITE_MASK;
>                 walker.pte_access &= ~ACC_USER_MASK;
>
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 66231c7ed31e..4940413d3767 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -1156,7 +1156,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>         tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) {
>                 int r;
>
> -               if (fault->nx_huge_page_workaround_enabled)
> +               if (fault->arch.nx_huge_page_workaround_enabled)
>                         disallowed_hugepage_adjust(fault, iter.old_spte, iter.level);
>
>                 if (iter.level == fault->goal_level)
> @@ -1181,7 +1181,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>                 sp = tdp_mmu_alloc_sp(vcpu);
>                 tdp_mmu_init_child_sp(sp, &iter);
>
> -               sp->arch.nx_huge_page_disallowed = fault->huge_page_disallowed;
> +               sp->arch.nx_huge_page_disallowed = fault->arch.huge_page_disallowed;
>
>                 if (is_shadow_present_pte(iter.old_spte))
>                         r = tdp_mmu_split_huge_page(kvm, &iter, sp, true);
> @@ -1197,7 +1197,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>                         goto retry;
>                 }
>
> -               if (fault->huge_page_disallowed &&
> +               if (fault->arch.huge_page_disallowed &&
>                     fault->req_level >= iter.level) {
>                         spin_lock(&kvm->arch.tdp_mmu_pages_lock);
>                         track_possible_nx_huge_page(kvm, sp);
> diff --git a/include/kvm/mmu_types.h b/include/kvm/mmu_types.h
> index a9da33d4baa8..9f0ca920bf68 100644
> --- a/include/kvm/mmu_types.h
> +++ b/include/kvm/mmu_types.h
> @@ -66,4 +66,48 @@ struct kvm_mmu_page {
>         struct kvm_mmu_page_arch arch;
>  };
>
> +struct kvm_page_fault {
> +       /* The raw faulting address. */
> +       const gpa_t addr;
> +
> +       /* Whether the fault was synthesized to prefetch a mapping. */
> +       const bool prefetch;
> +
> +       /* Information about the cause of the fault. */
> +       const bool write;
> +       const bool exec;
> +
> +       /* Shifted addr, or result of guest page table walk if shadow paging. */
> +       gfn_t gfn;

Is this redundant to have in common code? If we're not doing common
shadow paging, then this is just addr shifted. Would this be better
placed in the arch specific struct?

> +
> +       /* The memslot that contains @gfn. May be NULL. */
> +       struct kvm_memory_slot *slot;
> +
> +       /* Maximum page size that can be created for this fault. */
> +       u8 max_level;
> +
> +       /*
> +        * Page size that can be created based on the max_level and the page
> +        * size used by the host mapping.
> +        */
> +       u8 req_level;
> +
> +       /* Final page size that will be created. */
> +       u8 goal_level;
> +
> +       /*
> +        * The value of kvm->mmu_invalidate_seq before fetching the host
> +        * mapping. Used to verify that the host mapping has not changed
> +        * after grabbing the MMU lock.
> +        */
> +       unsigned long mmu_seq;

Should this be ifdef'ed with  KVM_ARCH_WANT_MMU_NOTIFIER?

> +
> +       /* Information about the host mapping. */
> +       kvm_pfn_t pfn;
> +       hva_t hva;
> +       bool map_writable;
> +
> +       struct kvm_page_fault_arch arch;
> +};
> +
>  #endif /* !__KVM_MMU_TYPES_H */
> --
> 2.39.0.rc1.256.g54fd8350bd-goog
>
Paolo Bonzini Dec. 12, 2022, 10:27 p.m. UTC | #2
On 12/8/22 20:38, David Matlack wrote:
> +
> +	/* Derived from mmu and global state.  */
> +	const bool is_tdp;

I think this could stay in the architecture-independent part.

Paolo
David Matlack Dec. 12, 2022, 10:30 p.m. UTC | #3
On Mon, Dec 12, 2022 at 10:24:31AM -0800, Ben Gardon wrote:
> On Thu, Dec 8, 2022 at 11:39 AM David Matlack <dmatlack@google.com> wrote:
> >
> > Move struct kvm_page_fault to common code. This will be used in a future
> > commit to move the TDP MMU to common code.
> >
> > No functional change intended.
> >
> > Signed-off-by: David Matlack <dmatlack@google.com>
> > ---
[...]
> > diff --git a/include/kvm/mmu_types.h b/include/kvm/mmu_types.h
> > index a9da33d4baa8..9f0ca920bf68 100644
> > --- a/include/kvm/mmu_types.h
> > +++ b/include/kvm/mmu_types.h
> > @@ -66,4 +66,48 @@ struct kvm_mmu_page {
> >         struct kvm_mmu_page_arch arch;
> >  };
> >
> > +struct kvm_page_fault {
> > +       /* The raw faulting address. */
> > +       const gpa_t addr;
> > +
> > +       /* Whether the fault was synthesized to prefetch a mapping. */
> > +       const bool prefetch;
> > +
> > +       /* Information about the cause of the fault. */
> > +       const bool write;
> > +       const bool exec;
> > +
> > +       /* Shifted addr, or result of guest page table walk if shadow paging. */
> > +       gfn_t gfn;
> 
> Is this redundant to have in common code? If we're not doing common
> shadow paging, then this is just addr shifted. Would this be better
> placed in the arch specific struct?

Yes it's redundant but it is actually used by the TDP MMU, unlike @addr.
So if anything I would rather move @addr to kvm_page_fault_arch.

> 
> > +
> > +       /* The memslot that contains @gfn. May be NULL. */
> > +       struct kvm_memory_slot *slot;
> > +
> > +       /* Maximum page size that can be created for this fault. */
> > +       u8 max_level;
> > +
> > +       /*
> > +        * Page size that can be created based on the max_level and the page
> > +        * size used by the host mapping.
> > +        */
> > +       u8 req_level;
> > +
> > +       /* Final page size that will be created. */
> > +       u8 goal_level;
> > +
> > +       /*
> > +        * The value of kvm->mmu_invalidate_seq before fetching the host
> > +        * mapping. Used to verify that the host mapping has not changed
> > +        * after grabbing the MMU lock.
> > +        */
> > +       unsigned long mmu_seq;
> 
> Should this be ifdef'ed with  KVM_ARCH_WANT_MMU_NOTIFIER?

I'll have to take a closer look, but probably yes.

> 
> > +
> > +       /* Information about the host mapping. */
> > +       kvm_pfn_t pfn;
> > +       hva_t hva;
> > +       bool map_writable;
> > +
> > +       struct kvm_page_fault_arch arch;
> > +};
> > +
> >  #endif /* !__KVM_MMU_TYPES_H */
> > --
> > 2.39.0.rc1.256.g54fd8350bd-goog
> >
David Matlack Jan. 9, 2023, 6:55 p.m. UTC | #4
On Mon, Dec 12, 2022 at 2:27 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 12/8/22 20:38, David Matlack wrote:
> > +
> > +     /* Derived from mmu and global state.  */
> > +     const bool is_tdp;
>
> I think this could stay in the architecture-independent part.

I agree but until there's a use case for accessing it in common code
I'm inclined to leave it in x86's kvm_page_fault_arch.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm/mmu_types.h b/arch/x86/include/asm/kvm/mmu_types.h
index affcb520b482..59d1be85f4b7 100644
--- a/arch/x86/include/asm/kvm/mmu_types.h
+++ b/arch/x86/include/asm/kvm/mmu_types.h
@@ -5,6 +5,7 @@ 
 #include <linux/bitmap.h>
 #include <linux/list.h>
 #include <linux/types.h>
+#include <linux/kvm_types.h>
 
 /*
  * This is a subset of the overall kvm_cpu_role to minimize the size of
@@ -115,4 +116,23 @@  struct kvm_mmu_page_arch {
 	atomic_t write_flooding_count;
 };
 
+struct kvm_page_fault_arch {
+	const u32 error_code;
+
+	/* x86-specific error code bits */
+	const bool present;
+	const bool rsvd;
+	const bool user;
+
+	/* Derived from mmu and global state.  */
+	const bool is_tdp;
+	const bool nx_huge_page_workaround_enabled;
+
+	/*
+	 * Whether a >4KB mapping can be created or is forbidden due to NX
+	 * hugepages.
+	 */
+	bool huge_page_disallowed;
+};
+
 #endif /* !__ASM_KVM_MMU_TYPES_H */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e47f35878ab5..0593d4a60139 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3092,7 +3092,8 @@  void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	struct kvm_memory_slot *slot = fault->slot;
 	kvm_pfn_t mask;
 
-	fault->huge_page_disallowed = fault->exec && fault->nx_huge_page_workaround_enabled;
+	fault->arch.huge_page_disallowed =
+		fault->exec && fault->arch.nx_huge_page_workaround_enabled;
 
 	if (unlikely(fault->max_level == PG_LEVEL_4K))
 		return;
@@ -3109,7 +3110,7 @@  void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	 */
 	fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
 						     fault->gfn, fault->max_level);
-	if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
+	if (fault->req_level == PG_LEVEL_4K || fault->arch.huge_page_disallowed)
 		return;
 
 	/*
@@ -3158,7 +3159,7 @@  static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		 * We cannot overwrite existing page tables with an NX
 		 * large page, as the leaf could be executable.
 		 */
-		if (fault->nx_huge_page_workaround_enabled)
+		if (fault->arch.nx_huge_page_workaround_enabled)
 			disallowed_hugepage_adjust(fault, *it.sptep, it.level);
 
 		base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
@@ -3170,7 +3171,7 @@  static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 			continue;
 
 		link_shadow_page(vcpu, it.sptep, sp);
-		if (fault->huge_page_disallowed)
+		if (fault->arch.huge_page_disallowed)
 			account_nx_huge_page(vcpu->kvm, sp,
 					     fault->req_level >= it.level);
 	}
@@ -3221,7 +3222,7 @@  static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu,
 				   struct kvm_page_fault *fault,
 				   unsigned int access)
 {
-	gva_t gva = fault->is_tdp ? 0 : fault->addr;
+	gva_t gva = fault->arch.is_tdp ? 0 : fault->addr;
 
 	vcpu_cache_mmio_info(vcpu, gva, fault->gfn,
 			     access & shadow_mmio_access_mask);
@@ -3255,7 +3256,7 @@  static bool page_fault_can_be_fast(struct kvm_page_fault *fault)
 	 * generation number.  Refreshing the MMIO generation needs to go down
 	 * the slow path.  Note, EPT Misconfigs do NOT set the PRESENT flag!
 	 */
-	if (fault->rsvd)
+	if (fault->arch.rsvd)
 		return false;
 
 	/*
@@ -3273,7 +3274,7 @@  static bool page_fault_can_be_fast(struct kvm_page_fault *fault)
 	 *    SPTE is MMU-writable (determined later), the fault can be fixed
 	 *    by setting the Writable bit, which can be done out of mmu_lock.
 	 */
-	if (!fault->present)
+	if (!fault->arch.present)
 		return !kvm_ad_enabled();
 
 	/*
@@ -4119,10 +4120,10 @@  static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
 					 struct kvm_page_fault *fault)
 {
-	if (unlikely(fault->rsvd))
+	if (unlikely(fault->arch.rsvd))
 		return false;
 
-	if (!fault->present || !fault->write)
+	if (!fault->arch.present || !fault->write)
 		return false;
 
 	/*
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index af2ae4887e35..4abb80a3bd01 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -77,60 +77,6 @@  static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
 	return READ_ONCE(nx_huge_pages) && !kvm->arch.disable_nx_huge_pages;
 }
 
-struct kvm_page_fault {
-	/* arguments to kvm_mmu_do_page_fault.  */
-	const gpa_t addr;
-	const u32 error_code;
-	const bool prefetch;
-
-	/* Derived from error_code.  */
-	const bool exec;
-	const bool write;
-	const bool present;
-	const bool rsvd;
-	const bool user;
-
-	/* Derived from mmu and global state.  */
-	const bool is_tdp;
-	const bool nx_huge_page_workaround_enabled;
-
-	/*
-	 * Whether a >4KB mapping can be created or is forbidden due to NX
-	 * hugepages.
-	 */
-	bool huge_page_disallowed;
-
-	/*
-	 * Maximum page size that can be created for this fault; input to
-	 * FNAME(fetch), direct_map() and kvm_tdp_mmu_map().
-	 */
-	u8 max_level;
-
-	/*
-	 * Page size that can be created based on the max_level and the
-	 * page size used by the host mapping.
-	 */
-	u8 req_level;
-
-	/*
-	 * Page size that will be created based on the req_level and
-	 * huge_page_disallowed.
-	 */
-	u8 goal_level;
-
-	/* Shifted addr, or result of guest page table walk if addr is a gva.  */
-	gfn_t gfn;
-
-	/* The memslot containing gfn. May be NULL. */
-	struct kvm_memory_slot *slot;
-
-	/* Outputs of kvm_faultin_pfn.  */
-	unsigned long mmu_seq;
-	kvm_pfn_t pfn;
-	hva_t hva;
-	bool map_writable;
-};
-
 int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
 
 /*
@@ -164,22 +110,27 @@  enum {
 static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 					u32 err, bool prefetch)
 {
+	bool is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault);
 	struct kvm_page_fault fault = {
 		.addr = cr2_or_gpa,
-		.error_code = err,
-		.exec = err & PFERR_FETCH_MASK,
-		.write = err & PFERR_WRITE_MASK,
-		.present = err & PFERR_PRESENT_MASK,
-		.rsvd = err & PFERR_RSVD_MASK,
-		.user = err & PFERR_USER_MASK,
 		.prefetch = prefetch,
-		.is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault),
-		.nx_huge_page_workaround_enabled =
-			is_nx_huge_page_enabled(vcpu->kvm),
+
+		.write = err & PFERR_WRITE_MASK,
+		.exec = err & PFERR_FETCH_MASK,
 
 		.max_level = KVM_MAX_HUGEPAGE_LEVEL,
 		.req_level = PG_LEVEL_4K,
 		.goal_level = PG_LEVEL_4K,
+
+		.arch = {
+			.error_code = err,
+			.present = err & PFERR_PRESENT_MASK,
+			.rsvd = err & PFERR_RSVD_MASK,
+			.user = err & PFERR_USER_MASK,
+			.is_tdp = is_tdp,
+			.nx_huge_page_workaround_enabled =
+				is_nx_huge_page_enabled(vcpu->kvm),
+		},
 	};
 	int r;
 
@@ -196,7 +147,7 @@  static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 	if (!prefetch)
 		vcpu->stat.pf_taken++;
 
-	if (IS_ENABLED(CONFIG_RETPOLINE) && fault.is_tdp)
+	if (IS_ENABLED(CONFIG_RETPOLINE) && fault.arch.is_tdp)
 		r = kvm_tdp_page_fault(vcpu, &fault);
 	else
 		r = vcpu->arch.mmu->page_fault(vcpu, &fault);
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index 335f26dabdf3..b01767acf073 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -270,7 +270,7 @@  TRACE_EVENT(
 	TP_fast_assign(
 		__entry->vcpu_id = vcpu->vcpu_id;
 		__entry->cr2_or_gpa = fault->addr;
-		__entry->error_code = fault->error_code;
+		__entry->error_code = fault->arch.error_code;
 		__entry->sptep = sptep;
 		__entry->old_spte = old_spte;
 		__entry->new_spte = *sptep;
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 18bb92b70a01..daf9c7731edc 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -698,7 +698,7 @@  static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 		 * We cannot overwrite existing page tables with an NX
 		 * large page, as the leaf could be executable.
 		 */
-		if (fault->nx_huge_page_workaround_enabled)
+		if (fault->arch.nx_huge_page_workaround_enabled)
 			disallowed_hugepage_adjust(fault, *it.sptep, it.level);
 
 		base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
@@ -713,7 +713,7 @@  static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 			continue;
 
 		link_shadow_page(vcpu, it.sptep, sp);
-		if (fault->huge_page_disallowed)
+		if (fault->arch.huge_page_disallowed)
 			account_nx_huge_page(vcpu->kvm, sp,
 					     fault->req_level >= it.level);
 	}
@@ -793,8 +793,8 @@  static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	int r;
 	bool is_self_change_mapping;
 
-	pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
-	WARN_ON_ONCE(fault->is_tdp);
+	pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->arch.error_code);
+	WARN_ON_ONCE(fault->arch.is_tdp);
 
 	/*
 	 * Look up the guest pte for the faulting address.
@@ -802,7 +802,7 @@  static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	 * The bit needs to be cleared before walking guest page tables.
 	 */
 	r = FNAME(walk_addr)(&walker, vcpu, fault->addr,
-			     fault->error_code & ~PFERR_RSVD_MASK);
+			     fault->arch.error_code & ~PFERR_RSVD_MASK);
 
 	/*
 	 * The page is not mapped by the guest.  Let the guest handle it.
@@ -830,7 +830,7 @@  static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	vcpu->arch.write_fault_to_shadow_pgtable = false;
 
 	is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
-	      &walker, fault->user, &vcpu->arch.write_fault_to_shadow_pgtable);
+	      &walker, fault->arch.user, &vcpu->arch.write_fault_to_shadow_pgtable);
 
 	if (is_self_change_mapping)
 		fault->max_level = PG_LEVEL_4K;
@@ -846,7 +846,7 @@  static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	 * we will cache the incorrect access into mmio spte.
 	 */
 	if (fault->write && !(walker.pte_access & ACC_WRITE_MASK) &&
-	    !is_cr0_wp(vcpu->arch.mmu) && !fault->user && fault->slot) {
+	    !is_cr0_wp(vcpu->arch.mmu) && !fault->arch.user && fault->slot) {
 		walker.pte_access |= ACC_WRITE_MASK;
 		walker.pte_access &= ~ACC_USER_MASK;
 
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 66231c7ed31e..4940413d3767 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1156,7 +1156,7 @@  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 	tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) {
 		int r;
 
-		if (fault->nx_huge_page_workaround_enabled)
+		if (fault->arch.nx_huge_page_workaround_enabled)
 			disallowed_hugepage_adjust(fault, iter.old_spte, iter.level);
 
 		if (iter.level == fault->goal_level)
@@ -1181,7 +1181,7 @@  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		sp = tdp_mmu_alloc_sp(vcpu);
 		tdp_mmu_init_child_sp(sp, &iter);
 
-		sp->arch.nx_huge_page_disallowed = fault->huge_page_disallowed;
+		sp->arch.nx_huge_page_disallowed = fault->arch.huge_page_disallowed;
 
 		if (is_shadow_present_pte(iter.old_spte))
 			r = tdp_mmu_split_huge_page(kvm, &iter, sp, true);
@@ -1197,7 +1197,7 @@  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 			goto retry;
 		}
 
-		if (fault->huge_page_disallowed &&
+		if (fault->arch.huge_page_disallowed &&
 		    fault->req_level >= iter.level) {
 			spin_lock(&kvm->arch.tdp_mmu_pages_lock);
 			track_possible_nx_huge_page(kvm, sp);
diff --git a/include/kvm/mmu_types.h b/include/kvm/mmu_types.h
index a9da33d4baa8..9f0ca920bf68 100644
--- a/include/kvm/mmu_types.h
+++ b/include/kvm/mmu_types.h
@@ -66,4 +66,48 @@  struct kvm_mmu_page {
 	struct kvm_mmu_page_arch arch;
 };
 
+struct kvm_page_fault {
+	/* The raw faulting address. */
+	const gpa_t addr;
+
+	/* Whether the fault was synthesized to prefetch a mapping. */
+	const bool prefetch;
+
+	/* Information about the cause of the fault. */
+	const bool write;
+	const bool exec;
+
+	/* Shifted addr, or result of guest page table walk if shadow paging. */
+	gfn_t gfn;
+
+	/* The memslot that contains @gfn. May be NULL. */
+	struct kvm_memory_slot *slot;
+
+	/* Maximum page size that can be created for this fault. */
+	u8 max_level;
+
+	/*
+	 * Page size that can be created based on the max_level and the page
+	 * size used by the host mapping.
+	 */
+	u8 req_level;
+
+	/* Final page size that will be created. */
+	u8 goal_level;
+
+	/*
+	 * The value of kvm->mmu_invalidate_seq before fetching the host
+	 * mapping. Used to verify that the host mapping has not changed
+	 * after grabbing the MMU lock.
+	 */
+	unsigned long mmu_seq;
+
+	/* Information about the host mapping. */
+	kvm_pfn_t pfn;
+	hva_t hva;
+	bool map_writable;
+
+	struct kvm_page_fault_arch arch;
+};
+
 #endif /* !__KVM_MMU_TYPES_H */