Message ID | ddf1d98420f562707b11e12c416cce8fdb986bb1.1712785629.git.isaku.yamahata@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: Guest Memory Pre-Population API | expand |
>This patch makes the emulation_type always set irrelevant to the return >code. kvm_mmu_page_fault() is the only caller of kvm_mmu_do_page_fault(), >and references the value only when PF_RET_EMULATE is returned. Therefore, >this adjustment doesn't affect functionality. This is benign. But what's the benefit of doing this? >+static inline int __kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, >+ u64 err, bool prefetch, int *emulation_type) > { > struct kvm_page_fault fault = { > .addr = cr2_or_gpa, >@@ -318,14 +318,6 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn); > } > >- /* >- * Async #PF "faults", a.k.a. prefetch faults, are not faults from the >- * guest perspective and have already been counted at the time of the >- * original fault. >- */ >- if (!prefetch) >- vcpu->stat.pf_taken++; >- > if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp) > r = kvm_tdp_page_fault(vcpu, &fault); > else >@@ -333,12 +325,30 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > > if (r == RET_PF_EMULATE && fault.is_private) { > kvm_mmu_prepare_memory_fault_exit(vcpu, &fault); >- return -EFAULT; >+ r = -EFAULT; > } > > if (fault.write_fault_to_shadow_pgtable && emulation_type) > *emulation_type |= EMULTYPE_WRITE_PF_TO_SP; > >+ return r; >+} >+ >+static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, >+ u64 err, bool prefetch, int *emulation_type) >+{ >+ int r; >+ >+ /* >+ * Async #PF "faults", a.k.a. prefetch faults, are not faults from the >+ * guest perspective and have already been counted at the time of the >+ * original fault. >+ */ >+ if (!prefetch) >+ vcpu->stat.pf_taken++; >+ >+ r = __kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, err, prefetch, emulation_type); bail out if r < 0? >+ > /* > * Similar to above, prefetch faults aren't truly spurious, and the > * async #PF path doesn't do emulation. Do count faults that are fixed >-- >2.43.2 > >
On Wed, 2024-04-10 at 15:07 -0700, isaku.yamahata@intel.com wrote: > From: Isaku Yamahata <isaku.yamahata@intel.com> > > Extract out __kvm_mmu_do_page_fault() from kvm_mmu_do_page_fault(). The > inner function is to initialize struct kvm_page_fault and to call the fault > handler, and the outer function handles updating stats and converting > return code. KVM_MAP_MEMORY will call the KVM page fault handler. > > This patch makes the emulation_type always set irrelevant to the return a comma would help parse this better ^ > code. > kvm_mmu_page_fault() is the only caller of kvm_mmu_do_page_fault(), Not technically correct, there are other callers that pass NULL for emulation_type. > and references the value only when PF_RET_EMULATE is returned. Therefore, > this adjustment doesn't affect functionality. Is there a problem with dropping the argument then? > > No functional change intended. Can we not use the "intended"? It sounds like hedging for excuses. > > Suggested-by: Sean Christopherson <seanjc@google.com> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > --- > v2: > - Newly introduced. (Sean) > --- > arch/x86/kvm/mmu/mmu_internal.h | 32 +++++++++++++++++++++----------- > 1 file changed, 21 insertions(+), 11 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h > index e68a60974cf4..9baae6c223ee 100644 > --- a/arch/x86/kvm/mmu/mmu_internal.h > +++ b/arch/x86/kvm/mmu/mmu_internal.h > @@ -287,8 +287,8 @@ static inline void > kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, > fault->is_private); > } > > -static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t > cr2_or_gpa, > - u64 err, bool prefetch, int > *emulation_type) > +static inline int __kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t > cr2_or_gpa, > + u64 err, bool prefetch, int > *emulation_type) > { > struct kvm_page_fault fault = { > .addr = cr2_or_gpa, > @@ -318,14 +318,6 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu > *vcpu, gpa_t cr2_or_gpa, > fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn); > } > > - /* > - * Async #PF "faults", a.k.a. prefetch faults, are not faults from the > - * guest perspective and have already been counted at the time of the > - * original fault. > - */ > - if (!prefetch) > - vcpu->stat.pf_taken++; > - > if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp) > r = kvm_tdp_page_fault(vcpu, &fault); > else > @@ -333,12 +325,30 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu > *vcpu, gpa_t cr2_or_gpa, > > if (r == RET_PF_EMULATE && fault.is_private) { > kvm_mmu_prepare_memory_fault_exit(vcpu, &fault); > - return -EFAULT; > + r = -EFAULT; > } > > if (fault.write_fault_to_shadow_pgtable && emulation_type) > *emulation_type |= EMULTYPE_WRITE_PF_TO_SP; > > + return r; > +} > + > +static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t > cr2_or_gpa, > + u64 err, bool prefetch, int > *emulation_type) > +{ > + int r; > + > + /* > + * Async #PF "faults", a.k.a. prefetch faults, are not faults from the > + * guest perspective and have already been counted at the time of the > + * original fault. > + */ > + if (!prefetch) > + vcpu->stat.pf_taken++; From the name, it makes sense to not count KVM_MAP_MEMORY as a pf_taken. But kvm_arch_async_page_ready() increments it as well. Which makes it more like a "faulted-in" count. I think the code in this patch is ok. > + > + r = __kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, err, prefetch, > emulation_type); > + > /* > * Similar to above, prefetch faults aren't truly spurious, and the > * async #PF path doesn't do emulation. Do count faults that are > fixed
On Tue, Apr 16, 2024 at 04:22:35PM +0800, Chao Gao <chao.gao@intel.com> wrote: > > >This patch makes the emulation_type always set irrelevant to the return > >code. kvm_mmu_page_fault() is the only caller of kvm_mmu_do_page_fault(), > >and references the value only when PF_RET_EMULATE is returned. Therefore, > >this adjustment doesn't affect functionality. > > This is benign. But what's the benefit of doing this? To avoid increment vcpu->stat. Because originally this was VM ioctl, I wanted to avoid touch vCPU stat. Now it's vCPU ioctl, it's fine to increment them. Probably we can drop this patch and use kvm_mmu_do_page_fault(). > > >+static inline int __kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > >+ u64 err, bool prefetch, int *emulation_type) > > { > > struct kvm_page_fault fault = { > > .addr = cr2_or_gpa, > >@@ -318,14 +318,6 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > > fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn); > > } > > > >- /* > >- * Async #PF "faults", a.k.a. prefetch faults, are not faults from the > >- * guest perspective and have already been counted at the time of the > >- * original fault. > >- */ > >- if (!prefetch) > >- vcpu->stat.pf_taken++; > >- > > if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp) > > r = kvm_tdp_page_fault(vcpu, &fault); > > else > >@@ -333,12 +325,30 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > > > > if (r == RET_PF_EMULATE && fault.is_private) { > > kvm_mmu_prepare_memory_fault_exit(vcpu, &fault); > >- return -EFAULT; > >+ r = -EFAULT; > > } > > > > if (fault.write_fault_to_shadow_pgtable && emulation_type) > > *emulation_type |= EMULTYPE_WRITE_PF_TO_SP; > > > >+ return r; > >+} > >+ > >+static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > >+ u64 err, bool prefetch, int *emulation_type) > >+{ > >+ int r; > >+ > >+ /* > >+ * Async #PF "faults", a.k.a. prefetch faults, are not faults from the > >+ * guest perspective and have already been counted at the time of the > >+ * original fault. > >+ */ > >+ if (!prefetch) > >+ vcpu->stat.pf_taken++; > >+ > >+ r = __kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, err, prefetch, emulation_type); > > bail out if r < 0? The following if clauses checks RET_PF_xxx > 0.
On Tue, Apr 16, 2024 at 02:36:31PM +0000, "Edgecombe, Rick P" <rick.p.edgecombe@intel.com> wrote: > On Wed, 2024-04-10 at 15:07 -0700, isaku.yamahata@intel.com wrote: > > From: Isaku Yamahata <isaku.yamahata@intel.com> > > > > Extract out __kvm_mmu_do_page_fault() from kvm_mmu_do_page_fault(). The > > inner function is to initialize struct kvm_page_fault and to call the fault > > handler, and the outer function handles updating stats and converting > > return code. KVM_MAP_MEMORY will call the KVM page fault handler. > > > > This patch makes the emulation_type always set irrelevant to the return > a comma would help parse this better ^ > > code. > > > kvm_mmu_page_fault() is the only caller of kvm_mmu_do_page_fault(), > > Not technically correct, there are other callers that pass NULL for > emulation_type. > > > and references the value only when PF_RET_EMULATE is returned. Therefore, > > this adjustment doesn't affect functionality. > > Is there a problem with dropping the argument then? > > > > > No functional change intended. > > Can we not use the "intended"? It sounds like hedging for excuses. Thanks for review. As Chao pointed out, this patch is unnecessary. I'll use kvm_mmu_do_page_fault() directly with updating vcpu->stat. https://lore.kernel.org/all/20240416234334.GA3039520@ls.amr.corp.intel.com/
On Wed, Apr 17, 2024 at 1:52 AM Isaku Yamahata <isaku.yamahata@intel.com> wrote: > As Chao pointed out, this patch is unnecessary. I'll use > kvm_mmu_do_page_fault() directly with updating vcpu->stat. Actually I prefer to have this patch. pf_* stats do not make sense for pre-population, and updating them confuses things because pre-population (outside TDX) has the purpose of avoiding page faults. Paolo
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index e68a60974cf4..9baae6c223ee 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -287,8 +287,8 @@ static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, fault->is_private); } -static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, - u64 err, bool prefetch, int *emulation_type) +static inline int __kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + u64 err, bool prefetch, int *emulation_type) { struct kvm_page_fault fault = { .addr = cr2_or_gpa, @@ -318,14 +318,6 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn); } - /* - * Async #PF "faults", a.k.a. prefetch faults, are not faults from the - * guest perspective and have already been counted at the time of the - * original fault. - */ - if (!prefetch) - vcpu->stat.pf_taken++; - if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp) r = kvm_tdp_page_fault(vcpu, &fault); else @@ -333,12 +325,30 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, if (r == RET_PF_EMULATE && fault.is_private) { kvm_mmu_prepare_memory_fault_exit(vcpu, &fault); - return -EFAULT; + r = -EFAULT; } if (fault.write_fault_to_shadow_pgtable && emulation_type) *emulation_type |= EMULTYPE_WRITE_PF_TO_SP; + return r; +} + +static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + u64 err, bool prefetch, int *emulation_type) +{ + int r; + + /* + * Async #PF "faults", a.k.a. prefetch faults, are not faults from the + * guest perspective and have already been counted at the time of the + * original fault. + */ + if (!prefetch) + vcpu->stat.pf_taken++; + + r = __kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, err, prefetch, emulation_type); + /* * Similar to above, prefetch faults aren't truly spurious, and the * async #PF path doesn't do emulation. Do count faults that are fixed