Message ID | 20240228024147.41573-2-seanjc@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86/mmu: Page fault and MMIO cleanups | expand |
On 2/28/2024 10:41 AM, Sean Christopherson wrote: > Exit to userspace with -EFAULT / KVM_EXIT_MEMORY_FAULT if a private fault > triggers emulation of any kind, as KVM doesn't currently support emulating > access to guest private memory. Practically speaking, private faults and > emulation are already mutually exclusive, but there are edge cases upon > edge cases where KVM can return RET_PF_EMULATE, and adding one last check > to harden against weird, unexpected combinations is inexpensive. > > Suggested-by: Yan Zhao <yan.y.zhao@intel.com> > Signed-off-by: Sean Christopherson <seanjc@google.com> > --- > arch/x86/kvm/mmu/mmu.c | 8 -------- > arch/x86/kvm/mmu/mmu_internal.h | 13 +++++++++++++ > 2 files changed, 13 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index e4cc7f764980..e2fd74e06ff8 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -4309,14 +4309,6 @@ static inline u8 kvm_max_level_for_order(int order) > return PG_LEVEL_4K; > } > > -static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, > - struct kvm_page_fault *fault) > -{ > - kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT, > - PAGE_SIZE, fault->write, fault->exec, > - fault->is_private); > -} > - > static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, > struct kvm_page_fault *fault) > { > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h > index 0669a8a668ca..0eea6c5a824d 100644 > --- a/arch/x86/kvm/mmu/mmu_internal.h > +++ b/arch/x86/kvm/mmu/mmu_internal.h > @@ -279,6 +279,14 @@ enum { > RET_PF_SPURIOUS, > }; > > +static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, > + struct kvm_page_fault *fault) > +{ > + kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT, > + PAGE_SIZE, fault->write, fault->exec, > + fault->is_private); > +} > + > static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > u32 err, bool prefetch, int *emulation_type) > { > @@ -320,6 +328,11 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > else > r = vcpu->arch.mmu->page_fault(vcpu, &fault); Beg for some comment to explain the paraniod. > + if (r == RET_PF_EMULATE && fault.is_private) { > + kvm_mmu_prepare_memory_fault_exit(vcpu, &fault); > + return -EFAULT; > + } > + > if (fault.write_fault_to_shadow_pgtable && emulation_type) > *emulation_type |= EMULTYPE_WRITE_PF_TO_SP; >
> Exit to userspace with -EFAULT / KVM_EXIT_MEMORY_FAULT if a private fault > triggers emulation of any kind, as KVM doesn't currently support emulating > access to guest private memory. Practically speaking, private faults and > emulation are already mutually exclusive, but there are edge cases upon > edge cases where KVM can return RET_PF_EMULATE, and adding one last check edge cases upon edge cases? Just curious about the details of the edge cases scenarios? > to harden against weird, unexpected combinations is inexpensive. > > Suggested-by: Yan Zhao <yan.y.zhao@intel.com> > Signed-off-by: Sean Christopherson <seanjc@google.com> > --- > arch/x86/kvm/mmu/mmu.c | 8 -------- > arch/x86/kvm/mmu/mmu_internal.h | 13 +++++++++++++ > 2 files changed, 13 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index e4cc7f764980..e2fd74e06ff8 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -4309,14 +4309,6 @@ static inline u8 kvm_max_level_for_order(int order) > return PG_LEVEL_4K; > } > > -static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, > - struct kvm_page_fault *fault) > -{ > - kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT, > - PAGE_SIZE, fault->write, fault->exec, > - fault->is_private); > -} > - > static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, > struct kvm_page_fault *fault) > { > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h > index 0669a8a668ca..0eea6c5a824d 100644 > --- a/arch/x86/kvm/mmu/mmu_internal.h > +++ b/arch/x86/kvm/mmu/mmu_internal.h > @@ -279,6 +279,14 @@ enum { > RET_PF_SPURIOUS, > }; > > +static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, > + struct kvm_page_fault *fault) > +{ > + kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT, > + PAGE_SIZE, fault->write, fault->exec, > + fault->is_private); > +} > + > static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > u32 err, bool prefetch, int *emulation_type) > { > @@ -320,6 +328,11 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > else > r = vcpu->arch.mmu->page_fault(vcpu, &fault); > > + if (r == RET_PF_EMULATE && fault.is_private) { > + kvm_mmu_prepare_memory_fault_exit(vcpu, &fault); > + return -EFAULT; > + } > + > if (fault.write_fault_to_shadow_pgtable && emulation_type) > *emulation_type |= EMULTYPE_WRITE_PF_TO_SP; >
On Tue, Feb 27, 2024 at 06:41:32PM -0800, Sean Christopherson wrote: > Exit to userspace with -EFAULT / KVM_EXIT_MEMORY_FAULT if a private fault > triggers emulation of any kind, as KVM doesn't currently support emulating > access to guest private memory. Practically speaking, private faults and > emulation are already mutually exclusive, but there are edge cases upon > edge cases where KVM can return RET_PF_EMULATE, and adding one last check > to harden against weird, unexpected combinations is inexpensive. > > Suggested-by: Yan Zhao <yan.y.zhao@intel.com> > Signed-off-by: Sean Christopherson <seanjc@google.com> > --- > arch/x86/kvm/mmu/mmu.c | 8 -------- > arch/x86/kvm/mmu/mmu_internal.h | 13 +++++++++++++ > 2 files changed, 13 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index e4cc7f764980..e2fd74e06ff8 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -4309,14 +4309,6 @@ static inline u8 kvm_max_level_for_order(int order) > return PG_LEVEL_4K; > } > > -static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, > - struct kvm_page_fault *fault) > -{ > - kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT, > - PAGE_SIZE, fault->write, fault->exec, > - fault->is_private); > -} > - > static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, > struct kvm_page_fault *fault) > { > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h > index 0669a8a668ca..0eea6c5a824d 100644 > --- a/arch/x86/kvm/mmu/mmu_internal.h > +++ b/arch/x86/kvm/mmu/mmu_internal.h > @@ -279,6 +279,14 @@ enum { > RET_PF_SPURIOUS, > }; > > +static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, > + struct kvm_page_fault *fault) > +{ > + kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT, > + PAGE_SIZE, fault->write, fault->exec, > + fault->is_private); > +} > + > static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > u32 err, bool prefetch, int *emulation_type) > { > @@ -320,6 +328,11 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > else > r = vcpu->arch.mmu->page_fault(vcpu, &fault); > > + if (r == RET_PF_EMULATE && fault.is_private) { Should we just check VM type + RET_PF_EMULATE, and abort? If r is RET_PF_EMULATE, and fault is caused by accesing a shared address, the emulation code could still meet error if guest page table pages are in private memory, right? > + kvm_mmu_prepare_memory_fault_exit(vcpu, &fault); > + return -EFAULT; > + } > + > if (fault.write_fault_to_shadow_pgtable && emulation_type) > *emulation_type |= EMULTYPE_WRITE_PF_TO_SP; > > -- > 2.44.0.278.ge034bb2e1d-goog >
On 3/7/2024 8:52 PM, Gupta, Pankaj wrote: >> Exit to userspace with -EFAULT / KVM_EXIT_MEMORY_FAULT if a private >> fault >> triggers emulation of any kind, as KVM doesn't currently support >> emulating >> access to guest private memory. Practically speaking, private faults >> and >> emulation are already mutually exclusive, but there are edge cases upon >> edge cases where KVM can return RET_PF_EMULATE, and adding one last >> check > > edge cases upon edge cases? > > Just curious about the details of the edge cases scenarios? Same question out of curiosity. > >> to harden against weird, unexpected combinations is inexpensive. >> >> Suggested-by: Yan Zhao <yan.y.zhao@intel.com> >> Signed-off-by: Sean Christopherson <seanjc@google.com> >> --- >> arch/x86/kvm/mmu/mmu.c | 8 -------- >> arch/x86/kvm/mmu/mmu_internal.h | 13 +++++++++++++ >> 2 files changed, 13 insertions(+), 8 deletions(-) >> >> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c >> index e4cc7f764980..e2fd74e06ff8 100644 >> --- a/arch/x86/kvm/mmu/mmu.c >> +++ b/arch/x86/kvm/mmu/mmu.c >> @@ -4309,14 +4309,6 @@ static inline u8 kvm_max_level_for_order(int >> order) >> return PG_LEVEL_4K; >> } >> -static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, >> - struct kvm_page_fault *fault) >> -{ >> - kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT, >> - PAGE_SIZE, fault->write, fault->exec, >> - fault->is_private); >> -} >> - >> static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, >> struct kvm_page_fault *fault) >> { >> diff --git a/arch/x86/kvm/mmu/mmu_internal.h >> b/arch/x86/kvm/mmu/mmu_internal.h >> index 0669a8a668ca..0eea6c5a824d 100644 >> --- a/arch/x86/kvm/mmu/mmu_internal.h >> +++ b/arch/x86/kvm/mmu/mmu_internal.h >> @@ -279,6 +279,14 @@ enum { >> RET_PF_SPURIOUS, >> }; >> +static inline void kvm_mmu_prepare_memory_fault_exit(struct >> kvm_vcpu *vcpu, >> + struct kvm_page_fault *fault) >> +{ >> + kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT, >> + PAGE_SIZE, fault->write, fault->exec, >> + fault->is_private); >> +} >> + >> static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, >> gpa_t cr2_or_gpa, >> u32 err, bool prefetch, int *emulation_type) >> { >> @@ -320,6 +328,11 @@ static inline int kvm_mmu_do_page_fault(struct >> kvm_vcpu *vcpu, gpa_t cr2_or_gpa, >> else >> r = vcpu->arch.mmu->page_fault(vcpu, &fault); >> + if (r == RET_PF_EMULATE && fault.is_private) { >> + kvm_mmu_prepare_memory_fault_exit(vcpu, &fault); >> + return -EFAULT; >> + } >> + >> if (fault.write_fault_to_shadow_pgtable && emulation_type) >> *emulation_type |= EMULTYPE_WRITE_PF_TO_SP; > >
On Tue, Mar 12, 2024, Binbin Wu wrote: > > On 3/7/2024 8:52 PM, Gupta, Pankaj wrote: > > > Exit to userspace with -EFAULT / KVM_EXIT_MEMORY_FAULT if a private fault > > > triggers emulation of any kind, as KVM doesn't currently support > > > emulating access to guest private memory. Practically speaking, private > > > faults and emulation are already mutually exclusive, but there are edge > > > cases upon edge cases where KVM can return RET_PF_EMULATE, and adding one > > > last check > > > > edge cases upon edge cases? > > > > Just curious about the details of the edge cases scenarios? > > Same question out of curiosity. Accesses that hit the APIC-access page and gfns that are write-tracked, are the two most likely candidates. Even plain old emulated MMIO falls into this bucket, e.g. if KVM botched things and generated a RSVD fault on a private mapping. I'll reword that line to faults and emulation are already mutually exclusive, but there are many flows that can result in KVM returning RET_PF_EMULATE, and adding one last check to harden against weird, unexpected combinations and/or KVM bugs is inexpensive. to make it sound less dramatic/hand-wavy.
On Fri, Mar 08, 2024, Yan Zhao wrote: > On Tue, Feb 27, 2024 at 06:41:32PM -0800, Sean Christopherson wrote: > > @@ -320,6 +328,11 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > > else > > r = vcpu->arch.mmu->page_fault(vcpu, &fault); > > > > + if (r == RET_PF_EMULATE && fault.is_private) { > Should we just check VM type + RET_PF_EMULATE, and abort? No, the goal here is purely to ensure that emulation is never triggered for private memory. Guarding against attempting emulation for a VM type that doesn't support emulation at all is something different. And more concretely, as of this commit, all VM types that support private memory (i.e. SW_PROTECTED_VM) support emulation, just not for private memory. > If r is RET_PF_EMULATE, and fault is caused by accesing a shared address, > the emulation code could still meet error if guest page table pages are in > private memory, right? Yes, which is why I squeezed in a documentation update for v6.8 to make it super clear that SW_PROTECTED_VM is a development vehicle, i.e. that trying to use it to run a real VM is all but guaranteed to cause explosions.
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e4cc7f764980..e2fd74e06ff8 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4309,14 +4309,6 @@ static inline u8 kvm_max_level_for_order(int order) return PG_LEVEL_4K; } -static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, - struct kvm_page_fault *fault) -{ - kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT, - PAGE_SIZE, fault->write, fault->exec, - fault->is_private); -} - static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 0669a8a668ca..0eea6c5a824d 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -279,6 +279,14 @@ enum { RET_PF_SPURIOUS, }; +static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault) +{ + kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT, + PAGE_SIZE, fault->write, fault->exec, + fault->is_private); +} + static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, bool prefetch, int *emulation_type) { @@ -320,6 +328,11 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, else r = vcpu->arch.mmu->page_fault(vcpu, &fault); + if (r == RET_PF_EMULATE && fault.is_private) { + kvm_mmu_prepare_memory_fault_exit(vcpu, &fault); + return -EFAULT; + } + if (fault.write_fault_to_shadow_pgtable && emulation_type) *emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
Exit to userspace with -EFAULT / KVM_EXIT_MEMORY_FAULT if a private fault triggers emulation of any kind, as KVM doesn't currently support emulating access to guest private memory. Practically speaking, private faults and emulation are already mutually exclusive, but there are edge cases upon edge cases where KVM can return RET_PF_EMULATE, and adding one last check to harden against weird, unexpected combinations is inexpensive. Suggested-by: Yan Zhao <yan.y.zhao@intel.com> Signed-off-by: Sean Christopherson <seanjc@google.com> --- arch/x86/kvm/mmu/mmu.c | 8 -------- arch/x86/kvm/mmu/mmu_internal.h | 13 +++++++++++++ 2 files changed, 13 insertions(+), 8 deletions(-)