diff mbox series

[WIP,v2,09/14] KVM: Introduce KVM_CAP_MEMORY_FAULT_NOWAIT without implementation

Message ID 20230315021738.1151386-10-amoorthy@google.com (mailing list archive)
State New, archived
Headers show
Series Avoiding slow get-user-pages via memory fault exit | expand

Commit Message

Anish Moorthy March 15, 2023, 2:17 a.m. UTC
Add documentation, memslot flags, useful helper functions, and the
actual new capability itself.

Memory fault exits on absent mappings are particularly useful for
userfaultfd-based live migration postcopy. When many vCPUs fault upon a
single userfaultfd the faults can take a while to surface to userspace
due to having to contend for uffd wait queue locks. Bypassing the uffd
entirely by triggering a vCPU exit avoids this contention and can improve
the fault rate by as much as 10x.
---
 Documentation/virt/kvm/api.rst | 37 +++++++++++++++++++++++++++++++---
 include/linux/kvm_host.h       |  6 ++++++
 include/uapi/linux/kvm.h       |  3 +++
 tools/include/uapi/linux/kvm.h |  2 ++
 virt/kvm/kvm_main.c            |  7 ++++++-
 5 files changed, 51 insertions(+), 4 deletions(-)

Comments

Oliver Upton March 17, 2023, 6:59 p.m. UTC | #1
On Wed, Mar 15, 2023 at 02:17:33AM +0000, Anish Moorthy wrote:
> Add documentation, memslot flags, useful helper functions, and the
> actual new capability itself.
> 
> Memory fault exits on absent mappings are particularly useful for
> userfaultfd-based live migration postcopy. When many vCPUs fault upon a
> single userfaultfd the faults can take a while to surface to userspace
> due to having to contend for uffd wait queue locks. Bypassing the uffd
> entirely by triggering a vCPU exit avoids this contention and can improve
> the fault rate by as much as 10x.
> ---
>  Documentation/virt/kvm/api.rst | 37 +++++++++++++++++++++++++++++++---
>  include/linux/kvm_host.h       |  6 ++++++
>  include/uapi/linux/kvm.h       |  3 +++
>  tools/include/uapi/linux/kvm.h |  2 ++
>  virt/kvm/kvm_main.c            |  7 ++++++-
>  5 files changed, 51 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index f9ca18bbec879..4932c0f62eb3d 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -1312,6 +1312,7 @@ yet and must be cleared on entry.
>    /* for kvm_userspace_memory_region::flags */
>    #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
>    #define KVM_MEM_READONLY	(1UL << 1)
> +  #define KVM_MEM_ABSENT_MAPPING_FAULT (1UL << 2)

call it KVM_MEM_EXIT_ABSENT_MAPPING

>  
>  This ioctl allows the user to create, modify or delete a guest physical
>  memory slot.  Bits 0-15 of "slot" specify the slot id and this value
> @@ -1342,12 +1343,15 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
>  be identical.  This allows large pages in the guest to be backed by large
>  pages in the host.
>  
> -The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and
> -KVM_MEM_READONLY.  The former can be set to instruct KVM to keep track of
> +The flags field supports three flags
> +
> +1.  KVM_MEM_LOG_DIRTY_PAGES: can be set to instruct KVM to keep track of
>  writes to memory within the slot.  See KVM_GET_DIRTY_LOG ioctl to know how to
> -use it.  The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
> +use it.
> +2.  KVM_MEM_READONLY: can be set, if KVM_CAP_READONLY_MEM capability allows it,
>  to make a new slot read-only.  In this case, writes to this memory will be
>  posted to userspace as KVM_EXIT_MMIO exits.
> +3.  KVM_MEM_ABSENT_MAPPING_FAULT: see KVM_CAP_MEMORY_FAULT_NOWAIT for details.
>  
>  When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
>  the memory region are automatically reflected into the guest.  For example, an
> @@ -7702,10 +7706,37 @@ Through args[0], the capability can be set on a per-exit-reason basis.
>  Currently, the only exit reasons supported are
>  
>  1. KVM_MEMFAULT_REASON_UNKNOWN (1 << 0)
> +2. KVM_MEMFAULT_REASON_ABSENT_MAPPING (1 << 1)
>  
>  Memory fault exits with a reason of UNKNOWN should not be depended upon: they
>  may be added, removed, or reclassified under a stable reason.
>  
> +7.35 KVM_CAP_MEMORY_FAULT_NOWAIT
> +--------------------------------
> +
> +:Architectures: x86, arm64
> +:Returns: -EINVAL.
> +
> +The presence of this capability indicates that userspace may pass the
> +KVM_MEM_ABSENT_MAPPING_FAULT flag to KVM_SET_USER_MEMORY_REGION to cause KVM_RUN
> +to exit to populate 'kvm_run.memory_fault' and exit to userspace (*) in response
> +to page faults for which the userspace page tables do not contain present
> +mappings. Attempting to enable the capability directly will fail.
> +
> +The 'gpa' and 'len' fields of kvm_run.memory_fault will be set to the starting
> +address and length (in bytes) of the faulting page. 'flags' will be set to
> +KVM_MEMFAULT_REASON_ABSENT_MAPPING.
> +
> +Userspace should determine how best to make the mapping present, then take
> +appropriate action. For instance, in the case of absent mappings this might
> +involve establishing the mapping for the first time via UFFDIO_COPY/CONTINUE or
> +faulting the mapping in using MADV_POPULATE_READ/WRITE. After establishing the
> +mapping, userspace can return to KVM to retry the previous memory access.
> +
> +(*) NOTE: On x86, KVM_CAP_X86_MEMORY_FAULT_EXIT must be enabled for the
> +KVM_MEMFAULT_REASON_ABSENT_MAPPING_reason: otherwise userspace will only receive
> +a -EFAULT from KVM_RUN without any useful information.

I'm not a fan of this architecture-specific dependency. Userspace is already
explicitly opting in to this behavior by way of the memslot flag. These sort
of exits are entirely orthogonal to the -EFAULT conversion earlier in the
series.

>  8. Other capabilities.
>  ======================
>  
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index d3ccfead73e42..c28330f25526f 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -593,6 +593,12 @@ static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *sl
>  	return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
>  }
>  
> +static inline bool kvm_slot_fault_on_absent_mapping(
> +	const struct kvm_memory_slot *slot)

Style again...

I'd strongly recommend using 'exit' instead of 'fault' in the verbiage of the
KVM implementation. I understand we're giving userspace the illusion of a page
fault mechanism, but the term is then overloaded in KVM since we handle
literal faults from hardware.
Anish Moorthy March 17, 2023, 8:15 p.m. UTC | #2
On Fri, Mar 17, 2023 at 11:59 AM Oliver Upton <oliver.upton@linux.dev> wrote:

> > +  #define KVM_MEM_ABSENT_MAPPING_FAULT (1UL << 2)
>
> call it KVM_MEM_EXIT_ABSENT_MAPPING
> ...
> I'm not a fan of this architecture-specific dependency. Userspace is already
> explicitly opting in to this behavior by way of the memslot flag. These sort
> of exits are entirely orthogonal to the -EFAULT conversion earlier in the
> series.

I'm not a fan of the semantics varying between architectures either:
but the reason I have it like that (and that the EFAULT conversions
exist in this series in the first place) is (a) not having
KVM_CAP_MEMORY_FAULT_EXIT implemented for arm and (b) Sean's following
statement from https://lore.kernel.org/kvm/Y%2FfS0eab7GG0NVKS@google.com/

On Thu, Feb 23, 2023 at 12:55 PM Sean Christopherson <seanjc@google.com> wrote:
>
> The new memslot flag should depend on KVM_CAP_MEMORY_FAULT_EXIT, but
> KVM_CAP_MEMORY_FAULT_EXIT should be a standalone thing, i.e. should convert "all"
> guest-memory -EFAULTS to KVM_CAP_MEMORY_FAULT_EXIT.  All in quotes because I would
> likely be ok with a partial conversion for the initial implementation if there
> are paths that would require an absurd amount of work to convert.

The best way that I thought of how to do that was to have one cap
(KVM_CAP_MEMORY_FAULT_NOWAIT) to make KVM -EFAULT without calling slow
GUP, and KVM_CAP_MEMORY_FAULT_EXIT to transform efaults to useful vm
exits. But if you think the two are really orthogonal, then we need to
resolve the apparent disagreement.
Sean Christopherson March 17, 2023, 8:17 p.m. UTC | #3
On Fri, Mar 17, 2023, Oliver Upton wrote:
> On Wed, Mar 15, 2023 at 02:17:33AM +0000, Anish Moorthy wrote:
> > Add documentation, memslot flags, useful helper functions, and the
> > actual new capability itself.
> > 
> > Memory fault exits on absent mappings are particularly useful for
> > userfaultfd-based live migration postcopy. When many vCPUs fault upon a
> > single userfaultfd the faults can take a while to surface to userspace
> > due to having to contend for uffd wait queue locks. Bypassing the uffd
> > entirely by triggering a vCPU exit avoids this contention and can improve
> > the fault rate by as much as 10x.
> > ---
> >  Documentation/virt/kvm/api.rst | 37 +++++++++++++++++++++++++++++++---
> >  include/linux/kvm_host.h       |  6 ++++++
> >  include/uapi/linux/kvm.h       |  3 +++
> >  tools/include/uapi/linux/kvm.h |  2 ++
> >  virt/kvm/kvm_main.c            |  7 ++++++-
> >  5 files changed, 51 insertions(+), 4 deletions(-)
> > 
> > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > index f9ca18bbec879..4932c0f62eb3d 100644
> > --- a/Documentation/virt/kvm/api.rst
> > +++ b/Documentation/virt/kvm/api.rst
> > @@ -1312,6 +1312,7 @@ yet and must be cleared on entry.
> >    /* for kvm_userspace_memory_region::flags */
> >    #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
> >    #define KVM_MEM_READONLY	(1UL << 1)
> > +  #define KVM_MEM_ABSENT_MAPPING_FAULT (1UL << 2)
> 
> call it KVM_MEM_EXIT_ABSENT_MAPPING

Ooh, look, a bikeshed!  :-)

I don't think it should have "EXIT" in the name.  The exit to userspace is a side
effect, e.g. KVM already exits to userspace on unresolved userfaults.  The only
thing this knob _directly_ controls is whether or not KVM attempts the slow path.
If we give the flag a name like "exit on absent userspace mappings", then KVM will
appear to do the wrong thing when KVM exits on a truly absent userspace mapping.

And as I argued in the last version[*], I am _strongly_ opposed to KVM speculating
on why KVM is exiting to userspace.  I.e. KVM should not set a special flag if
the memslot has "fast only" behavior.  The only thing the flag should do is control
whether or not KVM tries slow paths, what KVM does in response to an unresolved
fault should be an orthogonal thing.

E.g. If KVM encounters an unmapped page while prefetching SPTEs, KVM will (correctly)
not exit to userspace and instead simply terminate the prefetch.  Obviously we
could solve that through documentation, but I don't see any benefit in making this
more complex than it needs to be.

[*] https://lkml.kernel.org/r/Y%2B0RYMfw6pHrSLX4%40google.com

> > +7.35 KVM_CAP_MEMORY_FAULT_NOWAIT
> > +--------------------------------
> > +
> > +:Architectures: x86, arm64
> > +:Returns: -EINVAL.
> > +
> > +The presence of this capability indicates that userspace may pass the
> > +KVM_MEM_ABSENT_MAPPING_FAULT flag to KVM_SET_USER_MEMORY_REGION to cause KVM_RUN
> > +to exit to populate 'kvm_run.memory_fault' and exit to userspace (*) in response
> > +to page faults for which the userspace page tables do not contain present
> > +mappings. Attempting to enable the capability directly will fail.
> > +
> > +The 'gpa' and 'len' fields of kvm_run.memory_fault will be set to the starting
> > +address and length (in bytes) of the faulting page. 'flags' will be set to
> > +KVM_MEMFAULT_REASON_ABSENT_MAPPING.
> > +
> > +Userspace should determine how best to make the mapping present, then take
> > +appropriate action. For instance, in the case of absent mappings this might
> > +involve establishing the mapping for the first time via UFFDIO_COPY/CONTINUE or
> > +faulting the mapping in using MADV_POPULATE_READ/WRITE. After establishing the
> > +mapping, userspace can return to KVM to retry the previous memory access.
> > +
> > +(*) NOTE: On x86, KVM_CAP_X86_MEMORY_FAULT_EXIT must be enabled for the
> > +KVM_MEMFAULT_REASON_ABSENT_MAPPING_reason: otherwise userspace will only receive
> > +a -EFAULT from KVM_RUN without any useful information.
> 
> I'm not a fan of this architecture-specific dependency. Userspace is already
> explicitly opting in to this behavior by way of the memslot flag. These sort
> of exits are entirely orthogonal to the -EFAULT conversion earlier in the
> series.

Ya, yet another reason not to speculate on why KVM wasn't able to resolve a fault.
Sean Christopherson March 17, 2023, 8:54 p.m. UTC | #4
On Fri, Mar 17, 2023, Anish Moorthy wrote:
> On Fri, Mar 17, 2023 at 11:59 AM Oliver Upton <oliver.upton@linux.dev> wrote:
> 
> > > +  #define KVM_MEM_ABSENT_MAPPING_FAULT (1UL << 2)
> >
> > call it KVM_MEM_EXIT_ABSENT_MAPPING
> > ...
> > I'm not a fan of this architecture-specific dependency. Userspace is already
> > explicitly opting in to this behavior by way of the memslot flag. These sort
> > of exits are entirely orthogonal to the -EFAULT conversion earlier in the
> > series.
> 
> I'm not a fan of the semantics varying between architectures either:
> but the reason I have it like that (and that the EFAULT conversions
> exist in this series in the first place) is (a) not having
> KVM_CAP_MEMORY_FAULT_EXIT implemented for arm and (b) Sean's following
> statement from https://lore.kernel.org/kvm/Y%2FfS0eab7GG0NVKS@google.com/

Strictly speaking, if y'all buy my argument that the flag shouldn't control the
gup behavior, there won't be semantic differences for the memslot flag.  KVM will
(obviously) behavior differently if KVM_CAP_MEMORY_FAULT_EXIT is not set, but that
will hold true for x86 as well.  The only difference is that x86 will also support
an orthogonal flag that makes the fast-only memslot flag useful in practice.

So yeah, there will be an arch dependency, but only because arch code needs to
actually handle perform the exit, and that's true no matter what.

That said, there's zero reason to put X86 in the name.  Just add the capability
as KVM_CAP_MEMORY_FAULT_EXIT or whatever and mark it as x86 in the documentation.
Anish Moorthy March 17, 2023, 11:42 p.m. UTC | #5
On Fri, Mar 17, 2023 at 1:17 PM Sean Christopherson <seanjc@google.com> wrote:
>
> And as I argued in the last version[*], I am _strongly_ opposed to KVM speculating
> on why KVM is exiting to userspace.  I.e. KVM should not set a special flag if
> the memslot has "fast only" behavior.  The only thing the flag should do is control
> whether or not KVM tries slow paths, what KVM does in response to an unresolved
> fault should be an orthogonal thing.

I'm guessing you would want changes to patch 10 of this series [*]
then, right? Setting a bit/exit reason in kvm_run::memory_fault.flags
depending on whether the failure originated from a "fast only" fault
is... exactly what I'm doing :/ I'm not totally clear on your usages
of the word "flag" above though, the "KVM should not set a special
flag... the only thing *the* flag should do" part is throwing me off a
bit. What I think you're saying is

"KVM should not set a special bit in kvm_run::memory_fault.flags if
the memslot has fast-only behavior. The only thing
KVM_MEM_ABSENT_MAPPING_FAULT should do is..."

[1] https://lore.kernel.org/all/20230315021738.1151386-11-amoorthy@google.com/

On Fri, Mar 17, 2023 at 1:54 PM Sean Christopherson <seanjc@google.com> wrote:
>
> Strictly speaking, if y'all buy my argument that the flag shouldn't control the
> gup behavior, there won't be semantic differences for the memslot flag.  KVM will
> (obviously) behavior differently if KVM_CAP_MEMORY_FAULT_EXIT is not set, but that
> will hold true for x86 as well.  The only difference is that x86 will also support
> an orthogonal flag that makes the fast-only memslot flag useful in practice.
>
> So yeah, there will be an arch dependency, but only because arch code needs to
> actually handle perform the exit, and that's true no matter what.
>
> That said, there's zero reason to put X86 in the name.  Just add the capability
> as KVM_CAP_MEMORY_FAULT_EXIT or whatever and mark it as x86 in the documentation.
>
> That said, there's zero reason to put X86 in the name.  Just add the capability
> as KVM_CAP_MEMORY_FAULT_EXIT or whatever and mark it as x86 in the documentation.

Again, a little confused on your first "flag" usage here. I figure you
can't mean the memslot flag because the whole point of that is to
control the GUP behavior, but I'm not sure what else you'd be
referring to.

Anyways the idea of having orthogonal features, one to -EFAULTing
early before a slow path and another to transform/augment -EFAULTs
into/with useful information does make sense to me. But I think the
issue here is that we want the fast-only memslot flag to be useful on
Arm as well, and with KVM_CAP_MEMORY_FAULT_NOWAIT written as it is now
there is a semantic differences between x86 and Arm.

I don't see a way to keep the two features here orthogonal on x86 and
linked on arm without keeping that semantic difference. Perhaps the
solution here is a bare-bones implementation of
KVM_CAP_MEMORY_FAULT_EXIT for Arm? All that actually *needs* to be
covered to resolve this difference is the one call site in
user_mem_abort. since KVM_CAP_MEMORY_FAULT_EXIT will be allowed to
have holes anyways.
Sean Christopherson March 20, 2023, 3:13 p.m. UTC | #6
On Fri, Mar 17, 2023, Anish Moorthy wrote:
> On Fri, Mar 17, 2023 at 1:17 PM Sean Christopherson <seanjc@google.com> wrote:
> >
> > And as I argued in the last version[*], I am _strongly_ opposed to KVM speculating
> > on why KVM is exiting to userspace.  I.e. KVM should not set a special flag if
> > the memslot has "fast only" behavior.  The only thing the flag should do is control
> > whether or not KVM tries slow paths, what KVM does in response to an unresolved
> > fault should be an orthogonal thing.
> 
> I'm guessing you would want changes to patch 10 of this series [*]
> then, right? Setting a bit/exit reason in kvm_run::memory_fault.flags
> depending on whether the failure originated from a "fast only" fault
> is... exactly what I'm doing :/ I'm not totally clear on your usages
> of the word "flag" above though, the "KVM should not set a special
> flag... the only thing *the* flag should do" part is throwing me off a
> bit. What I think you're saying is

Heh, the second "the flag" is referring to the memslot flag.  Rewriting the above:

  KVM should not set a special flag in kvm_run::memory_fault.flags ... the
  only thing KVM_MEM_FAST_FAULT_ONLY should do is ..."

> "KVM should not set a special bit in kvm_run::memory_fault.flags if
> the memslot has fast-only behavior. The only thing
> KVM_MEM_ABSENT_MAPPING_FAULT should do is..."
> 
> [1] https://lore.kernel.org/all/20230315021738.1151386-11-amoorthy@google.com/
> 
> On Fri, Mar 17, 2023 at 1:54 PM Sean Christopherson <seanjc@google.com> wrote:
> >
> > Strictly speaking, if y'all buy my argument that the flag shouldn't control the
> > gup behavior, there won't be semantic differences for the memslot flag.  KVM will
> > (obviously) behavior differently if KVM_CAP_MEMORY_FAULT_EXIT is not set, but that
> > will hold true for x86 as well.  The only difference is that x86 will also support
> > an orthogonal flag that makes the fast-only memslot flag useful in practice.
> >
> > So yeah, there will be an arch dependency, but only because arch code needs to
> > actually handle perform the exit, and that's true no matter what.
> >
> > That said, there's zero reason to put X86 in the name.  Just add the capability
> > as KVM_CAP_MEMORY_FAULT_EXIT or whatever and mark it as x86 in the documentation.
> >
> > That said, there's zero reason to put X86 in the name.  Just add the capability
> > as KVM_CAP_MEMORY_FAULT_EXIT or whatever and mark it as x86 in the documentation.
> 
> Again, a little confused on your first "flag" usage here. I figure you
> can't mean the memslot flag because the whole point of that is to
> control the GUP behavior, but I'm not sure what else you'd be
> referring to.
> 
> Anyways the idea of having orthogonal features, one to -EFAULTing
> early before a slow path and another to transform/augment -EFAULTs
> into/with useful information does make sense to me. But I think the
> issue here is that we want the fast-only memslot flag to be useful on
> Arm as well, and with KVM_CAP_MEMORY_FAULT_NOWAIT written as it is now
> there is a semantic differences between x86 and Arm.

If and only if userspace enables the capability that transforms -EFAULT.

> I don't see a way to keep the two features here orthogonal on x86 and
> linked on arm without keeping that semantic difference. Perhaps the
> solution here is a bare-bones implementation of
> KVM_CAP_MEMORY_FAULT_EXIT for Arm? All that actually *needs* to be
> covered to resolve this difference is the one call site in
> user_mem_abort. since KVM_CAP_MEMORY_FAULT_EXIT will be allowed to
> have holes anyways.

As above, so long as userspace must opt into transforming -EFAULT, and can do
so independent of KVM_MEM_FAST_FAULT_ONLY (or whatever we call it), the behavior
of KVM_MEM_FAST_FAULT_ONLY itself is semantically identical across all
architectures.

KVM_MEM_FAST_FAULT_ONLY is obviously not very useful without precise information
about the failing address, but IMO that's not reason enough to tie the two
together.
Anish Moorthy March 20, 2023, 7:53 p.m. UTC | #7
On Mon, Mar 20, 2023 at 8:13 AM Sean Christopherson <seanjc@google.com> wrote:
>
> On Fri, Mar 17, 2023, Anish Moorthy wrote:
> > On Fri, Mar 17, 2023 at 1:17 PM Sean Christopherson <seanjc@google.com> wrote:
> > >
> > > And as I argued in the last version[*], I am _strongly_ opposed to KVM speculating
> > > on why KVM is exiting to userspace.  I.e. KVM should not set a special flag if
> > > the memslot has "fast only" behavior.  The only thing the flag should do is control
> > > whether or not KVM tries slow paths, what KVM does in response to an unresolved
> > > fault should be an orthogonal thing.
> >
> > I'm guessing you would want changes to patch 10 of this series [*]
> > then, right? Setting a bit/exit reason in kvm_run::memory_fault.flags
> > depending on whether the failure originated from a "fast only" fault
> > is... exactly what I'm doing :/ I'm not totally clear on your usages
> > of the word "flag" above though, the "KVM should not set a special
> > flag... the only thing *the* flag should do" part is throwing me off a
> > bit. What I think you're saying is
>
> Heh, the second "the flag" is referring to the memslot flag.  Rewriting the above:
>
>   KVM should not set a special flag in kvm_run::memory_fault.flags ... the
>   only thing KVM_MEM_FAST_FAULT_ONLY should do is ..."
>
> > "KVM should not set a special bit in kvm_run::memory_fault.flags if
> > the memslot has fast-only behavior. The only thing
> > KVM_MEM_ABSENT_MAPPING_FAULT should do is..."
> >
> > [1] https://lore.kernel.org/all/20230315021738.1151386-11-amoorthy@google.com/

Ok so, just to be clear, you are not opposed to

(a) all -EFAULTs from kvm_faultin_pfn populating the
kvm_run.memory_fault and setting kvm_run.memory_fault.flags to, say,
FAULTIN_FAILURE if/when kvm_cap_memory_fault_exit is enabled

but *are* opposed to

(b) the combination of the memslot flag and kvm_cap_memory_fault_exit
providing any additional information on top of: for instance, a
kvm_run.memory_fault.flags of FAULTIN_FAILURE & FAST_FAULT_ONLY.

Is that right?


> > On Fri, Mar 17, 2023 at 1:54 PM Sean Christopherson <seanjc@google.com> wrote:
> > >
> > > Strictly speaking, if y'all buy my argument that the flag shouldn't control the
> > > gup behavior, there won't be semantic differences for the memslot flag.  KVM will
> > > (obviously) behavior differently if KVM_CAP_MEMORY_FAULT_EXIT is not set, but that
> > > will hold true for x86 as well.  The only difference is that x86 will also support
> > > an orthogonal flag that makes the fast-only memslot flag useful in practice.
> > >
> > > So yeah, there will be an arch dependency, but only because arch code needs to
> > > actually handle perform the exit, and that's true no matter what.
> > >
> > > That said, there's zero reason to put X86 in the name.  Just add the capability
> > > as KVM_CAP_MEMORY_FAULT_EXIT or whatever and mark it as x86 in the documentation.
> > >
> > > That said, there's zero reason to put X86 in the name.  Just add the capability
> > > as KVM_CAP_MEMORY_FAULT_EXIT or whatever and mark it as x86 in the documentation.
> >
> > Again, a little confused on your first "flag" usage here. I figure you
> > can't mean the memslot flag because the whole point of that is to
> > control the GUP behavior, but I'm not sure what else you'd be
> > referring to.
> >
> > Anyways the idea of having orthogonal features, one to -EFAULTing
> > early before a slow path and another to transform/augment -EFAULTs
> > into/with useful information does make sense to me. But I think the
> > issue here is that we want the fast-only memslot flag to be useful on
> > Arm as well, and with KVM_CAP_MEMORY_FAULT_NOWAIT written as it is now
> > there is a semantic differences between x86 and Arm.
>
> If and only if userspace enables the capability that transforms -EFAULT.
>
> > I don't see a way to keep the two features here orthogonal on x86 and
> > linked on arm without keeping that semantic difference. Perhaps the
> > solution here is a bare-bones implementation of
> > KVM_CAP_MEMORY_FAULT_EXIT for Arm? All that actually *needs* to be
> > covered to resolve this difference is the one call site in
> > user_mem_abort. since KVM_CAP_MEMORY_FAULT_EXIT will be allowed to
> > have holes anyways.
>
> As above, so long as userspace must opt into transforming -EFAULT, and can do
> so independent of KVM_MEM_FAST_FAULT_ONLY (or whatever we call it), the behavior
> of KVM_MEM_FAST_FAULT_ONLY itself is semantically identical across all
> architectures.
>
> KVM_MEM_FAST_FAULT_ONLY is obviously not very useful without precise information
> about the failing address, but IMO that's not reason enough to tie the two
> together.
Oliver Upton March 20, 2023, 10:22 p.m. UTC | #8
Sean,

On Fri, Mar 17, 2023 at 01:17:22PM -0700, Sean Christopherson wrote:
> On Fri, Mar 17, 2023, Oliver Upton wrote:
> > On Wed, Mar 15, 2023 at 02:17:33AM +0000, Anish Moorthy wrote:
> > > Add documentation, memslot flags, useful helper functions, and the
> > > actual new capability itself.
> > > 
> > > Memory fault exits on absent mappings are particularly useful for
> > > userfaultfd-based live migration postcopy. When many vCPUs fault upon a
> > > single userfaultfd the faults can take a while to surface to userspace
> > > due to having to contend for uffd wait queue locks. Bypassing the uffd
> > > entirely by triggering a vCPU exit avoids this contention and can improve
> > > the fault rate by as much as 10x.
> > > ---
> > >  Documentation/virt/kvm/api.rst | 37 +++++++++++++++++++++++++++++++---
> > >  include/linux/kvm_host.h       |  6 ++++++
> > >  include/uapi/linux/kvm.h       |  3 +++
> > >  tools/include/uapi/linux/kvm.h |  2 ++
> > >  virt/kvm/kvm_main.c            |  7 ++++++-
> > >  5 files changed, 51 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > > index f9ca18bbec879..4932c0f62eb3d 100644
> > > --- a/Documentation/virt/kvm/api.rst
> > > +++ b/Documentation/virt/kvm/api.rst
> > > @@ -1312,6 +1312,7 @@ yet and must be cleared on entry.
> > >    /* for kvm_userspace_memory_region::flags */
> > >    #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
> > >    #define KVM_MEM_READONLY	(1UL << 1)
> > > +  #define KVM_MEM_ABSENT_MAPPING_FAULT (1UL << 2)
> > 
> > call it KVM_MEM_EXIT_ABSENT_MAPPING
> 
> Ooh, look, a bikeshed!  :-)

Couldn't help myself :)

> I don't think it should have "EXIT" in the name.  The exit to userspace is a side
> effect, e.g. KVM already exits to userspace on unresolved userfaults.  The only
> thing this knob _directly_ controls is whether or not KVM attempts the slow path.
> If we give the flag a name like "exit on absent userspace mappings", then KVM will
> appear to do the wrong thing when KVM exits on a truly absent userspace mapping.
> 
> And as I argued in the last version[*], I am _strongly_ opposed to KVM speculating
> on why KVM is exiting to userspace.  I.e. KVM should not set a special flag if
> the memslot has "fast only" behavior.  The only thing the flag should do is control
> whether or not KVM tries slow paths, what KVM does in response to an unresolved
> fault should be an orthogonal thing.
> 
> E.g. If KVM encounters an unmapped page while prefetching SPTEs, KVM will (correctly)
> not exit to userspace and instead simply terminate the prefetch.  Obviously we
> could solve that through documentation, but I don't see any benefit in making this
> more complex than it needs to be.

I couldn't care less about what the user-facing portion of this thing is
called, TBH. We could just refer to it as KVM_MEM_BIT_2 /s

The only bit I wanted to avoid is having a collision in the kernel between
literal faults arising from hardware and exits to userspace that we are also
calling 'faults'.

> [*] https://lkml.kernel.org/r/Y%2B0RYMfw6pHrSLX4%40google.com
> 
> > > +7.35 KVM_CAP_MEMORY_FAULT_NOWAIT
> > > +--------------------------------
> > > +
> > > +:Architectures: x86, arm64
> > > +:Returns: -EINVAL.
> > > +
> > > +The presence of this capability indicates that userspace may pass the
> > > +KVM_MEM_ABSENT_MAPPING_FAULT flag to KVM_SET_USER_MEMORY_REGION to cause KVM_RUN
> > > +to exit to populate 'kvm_run.memory_fault' and exit to userspace (*) in response
> > > +to page faults for which the userspace page tables do not contain present
> > > +mappings. Attempting to enable the capability directly will fail.
> > > +
> > > +The 'gpa' and 'len' fields of kvm_run.memory_fault will be set to the starting
> > > +address and length (in bytes) of the faulting page. 'flags' will be set to
> > > +KVM_MEMFAULT_REASON_ABSENT_MAPPING.
> > > +
> > > +Userspace should determine how best to make the mapping present, then take
> > > +appropriate action. For instance, in the case of absent mappings this might
> > > +involve establishing the mapping for the first time via UFFDIO_COPY/CONTINUE or
> > > +faulting the mapping in using MADV_POPULATE_READ/WRITE. After establishing the
> > > +mapping, userspace can return to KVM to retry the previous memory access.
> > > +
> > > +(*) NOTE: On x86, KVM_CAP_X86_MEMORY_FAULT_EXIT must be enabled for the
> > > +KVM_MEMFAULT_REASON_ABSENT_MAPPING_reason: otherwise userspace will only receive
> > > +a -EFAULT from KVM_RUN without any useful information.
> > 
> > I'm not a fan of this architecture-specific dependency. Userspace is already
> > explicitly opting in to this behavior by way of the memslot flag. These sort
> > of exits are entirely orthogonal to the -EFAULT conversion earlier in the
> > series.
> 
> Ya, yet another reason not to speculate on why KVM wasn't able to resolve a fault.

Regardless of what we name this memslot flag, we're already getting explicit
opt-in from userspace for new behavior. There seems to be zero value in
supporting memslot_flag && !MEMORY_FAULT_EXIT (i.e. returning EFAULT),
so why even bother?

Requiring two levels of opt-in to have the intended outcome for a single
architecture seems nauseating from a userspace perspective.
Sean Christopherson March 21, 2023, 2:50 p.m. UTC | #9
On Mon, Mar 20, 2023, Oliver Upton wrote:
> On Fri, Mar 17, 2023 at 01:17:22PM -0700, Sean Christopherson wrote:
> > On Fri, Mar 17, 2023, Oliver Upton wrote:
> > > I'm not a fan of this architecture-specific dependency. Userspace is already
> > > explicitly opting in to this behavior by way of the memslot flag. These sort
> > > of exits are entirely orthogonal to the -EFAULT conversion earlier in the
> > > series.
> > 
> > Ya, yet another reason not to speculate on why KVM wasn't able to resolve a fault.
> 
> Regardless of what we name this memslot flag, we're already getting explicit
> opt-in from userspace for new behavior. There seems to be zero value in
> supporting memslot_flag && !MEMORY_FAULT_EXIT (i.e. returning EFAULT),
> so why even bother?

Because there are use cases for MEMORY_FAULT_EXIT beyond fast-only gup.  We could
have the memslot feature depend on the MEMORY_FAULT_EXIT capability, but I don't
see how that adds value for either KVM or userspace.

Filling MEMORY_FAULT_EXIT iff the memslot flag is set would also lead to a weird
ABI and/or funky KVM code.  E.g. if MEMORY_FAULT_EXIT is tied to the fast-only
memslot flag, what's the defined behavior if the gfn=>hva translation fails?  KVM
hasn't actually tried to gup() anything.  Obviously not the end of the world, but
I'd prefer not to avoid introduce more odditites into KVM, however minor.

> Requiring two levels of opt-in to have the intended outcome for a single
> architecture seems nauseating from a userspace perspective.

If we usurp -EFAULT, I don't think we'll actually need an opt-in for
MEMORY_FAULT_EXIT.  KVM will need to add a capability so that userspace can query
KVM support, but the actual filling of of kvm_run could be done unconditionally.

Even if we do end up making the behavior opt-in, I would expect them to be largely
orthogonal in userspace.  E.g. userspace would always enable MEMORY_FAULT_EXIT
during startup, and then toggle the memslot flag during postcopy.
Oliver Upton March 21, 2023, 8:23 p.m. UTC | #10
On Tue, Mar 21, 2023 at 07:50:35AM -0700, Sean Christopherson wrote:
> On Mon, Mar 20, 2023, Oliver Upton wrote:
> > On Fri, Mar 17, 2023 at 01:17:22PM -0700, Sean Christopherson wrote:
> > > On Fri, Mar 17, 2023, Oliver Upton wrote:
> > > > I'm not a fan of this architecture-specific dependency. Userspace is already
> > > > explicitly opting in to this behavior by way of the memslot flag. These sort
> > > > of exits are entirely orthogonal to the -EFAULT conversion earlier in the
> > > > series.
> > > 
> > > Ya, yet another reason not to speculate on why KVM wasn't able to resolve a fault.
> > 
> > Regardless of what we name this memslot flag, we're already getting explicit
> > opt-in from userspace for new behavior. There seems to be zero value in
> > supporting memslot_flag && !MEMORY_FAULT_EXIT (i.e. returning EFAULT),
> > so why even bother?
> 
> Because there are use cases for MEMORY_FAULT_EXIT beyond fast-only gup.

To be abundantly clear -- I have no issue with (nor care about) the other
MEMORY_FAULT_EXIT changes. If we go the route of explicit user opt-in then
that deserves its own distinct bit of UAPI. None of my objection pertains
to the conversion of existing -EFAULT exits.

> We could have the memslot feature depend on the MEMORY_FAULT_EXIT capability,
> but I don't see how that adds value for either KVM or userspace.

That is exactly what I want to avoid! My issue was the language here:

  +(*) NOTE: On x86, KVM_CAP_X86_MEMORY_FAULT_EXIT must be enabled for the
  +KVM_MEMFAULT_REASON_ABSENT_MAPPING_reason: otherwise userspace will only receive
  +a -EFAULT from KVM_RUN without any useful information.

Which sounds to me as though there are *two* UAPI bits for the whole fast-gup
failed interaction (flip a bit in the CAP and set a bit on the memslot, but
only for x86).

What I'm asking for is this:

 1) A capability advertising MEMORY_FAULT_EXIT to userspace. Either usurp
   EFAULT or require userspace to enable this capability to convert
   _existing_ EFAULT exits to the new way of the world.

 2) A capability and a single memslot flag to enable the fast-gup-only
   behavior (naming TBD). This does not depend on (1) in any way, i.e.
   only setting (2) should still result in MEMORY_FAULT_EXITs when fast
   gup fails. IOW, enabling (2) should always yield precise fault
   information to userspace.
Sean Christopherson March 21, 2023, 9:01 p.m. UTC | #11
On Tue, Mar 21, 2023, Oliver Upton wrote:
> On Tue, Mar 21, 2023 at 07:50:35AM -0700, Sean Christopherson wrote:
> > On Mon, Mar 20, 2023, Oliver Upton wrote:
> > > On Fri, Mar 17, 2023 at 01:17:22PM -0700, Sean Christopherson wrote:
> > > > On Fri, Mar 17, 2023, Oliver Upton wrote:
> > > > > I'm not a fan of this architecture-specific dependency. Userspace is already
> > > > > explicitly opting in to this behavior by way of the memslot flag. These sort
> > > > > of exits are entirely orthogonal to the -EFAULT conversion earlier in the
> > > > > series.
> > > > 
> > > > Ya, yet another reason not to speculate on why KVM wasn't able to resolve a fault.
> > > 
> > > Regardless of what we name this memslot flag, we're already getting explicit
> > > opt-in from userspace for new behavior. There seems to be zero value in
> > > supporting memslot_flag && !MEMORY_FAULT_EXIT (i.e. returning EFAULT),
> > > so why even bother?
> > 
> > Because there are use cases for MEMORY_FAULT_EXIT beyond fast-only gup.
> 
> To be abundantly clear -- I have no issue with (nor care about) the other
> MEMORY_FAULT_EXIT changes. If we go the route of explicit user opt-in then
> that deserves its own distinct bit of UAPI. None of my objection pertains
> to the conversion of existing -EFAULT exits.
> 
> > We could have the memslot feature depend on the MEMORY_FAULT_EXIT capability,
> > but I don't see how that adds value for either KVM or userspace.
> 
> That is exactly what I want to avoid! My issue was the language here:
> 
>   +(*) NOTE: On x86, KVM_CAP_X86_MEMORY_FAULT_EXIT must be enabled for the
>   +KVM_MEMFAULT_REASON_ABSENT_MAPPING_reason: otherwise userspace will only receive
>   +a -EFAULT from KVM_RUN without any useful information.
> 
> Which sounds to me as though there are *two* UAPI bits for the whole fast-gup
> failed interaction (flip a bit in the CAP and set a bit on the memslot, but
> only for x86).

It won't be x86 only.  Anish's proposed patch has it as x86 specific, but I think
we're all in agreement that that is undesirable.  There will inevitably be per-arch
enabling and enumeration, e.g. to actually fill information and kick out to
userspace, but I don't see a sane way to avoid that since the common paths don't
have the vCPU (largely by design).

> What I'm asking for is this:
> 
>  1) A capability advertising MEMORY_FAULT_EXIT to userspace. Either usurp
>    EFAULT or require userspace to enable this capability to convert
>    _existing_ EFAULT exits to the new way of the world.
> 
>  2) A capability and a single memslot flag to enable the fast-gup-only
>    behavior (naming TBD). This does not depend on (1) in any way, i.e.
>    only setting (2) should still result in MEMORY_FAULT_EXITs when fast
>    gup fails. IOW, enabling (2) should always yield precise fault
>    information to userspace.

Ah, so 2.2, providing precise fault information on fast-gup-only failures, is the
biggest (only?) point of contention.

My objection to that behavior is that it's either going to annoyingly difficult to
get right in KVM, and even more annoying to maintain, or we'll end up with "fuzzy"
behavior that userspace will inevitably come to rely on, and then we'll be in a real
pickle.  E.g. if KVM sets the information without checking if gup() itself actually
failed, then KVM _might_ fill the info, depending on when KVM detects a problem.

Conversly, if KVM's contract is that it provides precise information if and only
if gup() fails, then KVM needs to precisely propagate back up the stack that gup()
failed.

To avoid spending more time going in circles, I propose we try to usurp -EFAULT
and convert all userspace-exits-from-KVM_RUN -EFAULT paths on x86 (as a guinea pig)
without requiring userspace to opt-in.  If that approach pans out, then this point
of contention goes away because 2.2 Just Works.
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index f9ca18bbec879..4932c0f62eb3d 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1312,6 +1312,7 @@  yet and must be cleared on entry.
   /* for kvm_userspace_memory_region::flags */
   #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
   #define KVM_MEM_READONLY	(1UL << 1)
+  #define KVM_MEM_ABSENT_MAPPING_FAULT (1UL << 2)
 
 This ioctl allows the user to create, modify or delete a guest physical
 memory slot.  Bits 0-15 of "slot" specify the slot id and this value
@@ -1342,12 +1343,15 @@  It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
 be identical.  This allows large pages in the guest to be backed by large
 pages in the host.
 
-The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and
-KVM_MEM_READONLY.  The former can be set to instruct KVM to keep track of
+The flags field supports three flags
+
+1.  KVM_MEM_LOG_DIRTY_PAGES: can be set to instruct KVM to keep track of
 writes to memory within the slot.  See KVM_GET_DIRTY_LOG ioctl to know how to
-use it.  The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
+use it.
+2.  KVM_MEM_READONLY: can be set, if KVM_CAP_READONLY_MEM capability allows it,
 to make a new slot read-only.  In this case, writes to this memory will be
 posted to userspace as KVM_EXIT_MMIO exits.
+3.  KVM_MEM_ABSENT_MAPPING_FAULT: see KVM_CAP_MEMORY_FAULT_NOWAIT for details.
 
 When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
 the memory region are automatically reflected into the guest.  For example, an
@@ -7702,10 +7706,37 @@  Through args[0], the capability can be set on a per-exit-reason basis.
 Currently, the only exit reasons supported are
 
 1. KVM_MEMFAULT_REASON_UNKNOWN (1 << 0)
+2. KVM_MEMFAULT_REASON_ABSENT_MAPPING (1 << 1)
 
 Memory fault exits with a reason of UNKNOWN should not be depended upon: they
 may be added, removed, or reclassified under a stable reason.
 
+7.35 KVM_CAP_MEMORY_FAULT_NOWAIT
+--------------------------------
+
+:Architectures: x86, arm64
+:Returns: -EINVAL.
+
+The presence of this capability indicates that userspace may pass the
+KVM_MEM_ABSENT_MAPPING_FAULT flag to KVM_SET_USER_MEMORY_REGION to cause KVM_RUN
+to exit to populate 'kvm_run.memory_fault' and exit to userspace (*) in response
+to page faults for which the userspace page tables do not contain present
+mappings. Attempting to enable the capability directly will fail.
+
+The 'gpa' and 'len' fields of kvm_run.memory_fault will be set to the starting
+address and length (in bytes) of the faulting page. 'flags' will be set to
+KVM_MEMFAULT_REASON_ABSENT_MAPPING.
+
+Userspace should determine how best to make the mapping present, then take
+appropriate action. For instance, in the case of absent mappings this might
+involve establishing the mapping for the first time via UFFDIO_COPY/CONTINUE or
+faulting the mapping in using MADV_POPULATE_READ/WRITE. After establishing the
+mapping, userspace can return to KVM to retry the previous memory access.
+
+(*) NOTE: On x86, KVM_CAP_X86_MEMORY_FAULT_EXIT must be enabled for the
+KVM_MEMFAULT_REASON_ABSENT_MAPPING_reason: otherwise userspace will only receive
+a -EFAULT from KVM_RUN without any useful information.
+
 8. Other capabilities.
 ======================
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d3ccfead73e42..c28330f25526f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -593,6 +593,12 @@  static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *sl
 	return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
 }
 
+static inline bool kvm_slot_fault_on_absent_mapping(
+	const struct kvm_memory_slot *slot)
+{
+	return slot->flags & KVM_MEM_ABSENT_MAPPING_FAULT;
+}
+
 static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
 {
 	return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0ba1d7f01346e..2146b27cdd61a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -102,6 +102,7 @@  struct kvm_userspace_memory_region {
  */
 #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
 #define KVM_MEM_READONLY	(1UL << 1)
+#define KVM_MEM_ABSENT_MAPPING_FAULT	(1UL << 2)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
@@ -1197,6 +1198,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
 #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
 #define KVM_CAP_X86_MEMORY_FAULT_EXIT 227
+#define KVM_CAP_MEMORY_FAULT_NOWAIT 228
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -2252,5 +2254,6 @@  struct kvm_s390_zpci_op {
 
 /* Exit reasons for KVM_EXIT_MEMORY_FAULT */
 #define KVM_MEMFAULT_REASON_UNKNOWN (1 << 0)
+#define KVM_MEMFAULT_REASON_ABSENT_MAPPING (1 << 1)
 
 #endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 2b468345f25c3..1a1707d9f442a 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -102,6 +102,7 @@  struct kvm_userspace_memory_region {
  */
 #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
 #define KVM_MEM_READONLY	(1UL << 1)
+#define KVM_MEM_ABSENT_MAPPING_FAULT (1UL << 2)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
@@ -2242,5 +2243,6 @@  struct kvm_s390_zpci_op {
 
 /* Exit reasons for KVM_EXIT_MEMORY_FAULT */
 #define KVM_MEMFAULT_REASON_UNKNOWN (1 << 0)
+#define KVM_MEMFAULT_REASON_ABSENT_MAPPING (1 << 1)
 
 #endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 00aec43860ff1..aa3b59410a356 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1525,6 +1525,9 @@  static int check_memory_region_flags(const struct kvm_userspace_memory_region *m
 	valid_flags |= KVM_MEM_READONLY;
 #endif
 
+	if (kvm_vm_ioctl_check_extension(NULL, KVM_CAP_MEMORY_FAULT_NOWAIT))
+		valid_flags |= KVM_MEM_ABSENT_MAPPING_FAULT;
+
 	if (mem->flags & ~valid_flags)
 		return -EINVAL;
 
@@ -6196,7 +6199,9 @@  inline int kvm_memfault_exit_or_efault(
 
 bool kvm_memfault_exit_flags_valid(uint64_t reasons)
 {
-	uint64_t valid_flags = KVM_MEMFAULT_REASON_UNKNOWN;
+	uint64_t valid_flags
+		= KVM_MEMFAULT_REASON_UNKNOWN
+		| KVM_MEMFAULT_REASON_ABSENT_MAPPING;
 
 	return !(reasons & !valid_flags);
 }