diff mbox series

[v5,3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages

Message ID 20250303171013.3548775-4-tabba@google.com (mailing list archive)
State New
Headers show
Series KVM: Mapping guest_memfd backed memory at the host for software protected VMs | expand

Commit Message

Fuad Tabba March 3, 2025, 5:10 p.m. UTC
Add support for mmap() and fault() for guest_memfd backed memory
in the host for VMs that support in-place conversion between
shared and private. To that end, this patch adds the ability to
check whether the VM type supports in-place conversion, and only
allows mapping its memory if that's the case.

Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
indicates that the VM supports shared memory in guest_memfd, or
that the host can create VMs that support shared memory.
Supporting shared memory implies that memory can be mapped when
shared with the host.

This is controlled by the KVM_GMEM_SHARED_MEM configuration
option.

Signed-off-by: Fuad Tabba <tabba@google.com>
---
 include/linux/kvm_host.h |  11 ++++
 include/uapi/linux/kvm.h |   1 +
 virt/kvm/guest_memfd.c   | 105 +++++++++++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c      |   4 ++
 4 files changed, 121 insertions(+)

Comments

Kirill A. Shutemov March 4, 2025, 8:58 a.m. UTC | #1
On Mon, Mar 03, 2025 at 05:10:07PM +0000, Fuad Tabba wrote:
> Add support for mmap() and fault() for guest_memfd backed memory
> in the host for VMs that support in-place conversion between
> shared and private. To that end, this patch adds the ability to
> check whether the VM type supports in-place conversion, and only
> allows mapping its memory if that's the case.
> 
> Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
> indicates that the VM supports shared memory in guest_memfd, or
> that the host can create VMs that support shared memory.
> Supporting shared memory implies that memory can be mapped when
> shared with the host.
> 
> This is controlled by the KVM_GMEM_SHARED_MEM configuration
> option.
> 
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>  include/linux/kvm_host.h |  11 ++++
>  include/uapi/linux/kvm.h |   1 +
>  virt/kvm/guest_memfd.c   | 105 +++++++++++++++++++++++++++++++++++++++
>  virt/kvm/kvm_main.c      |   4 ++
>  4 files changed, 121 insertions(+)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 7788e3625f6d..2d025b8ee20e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
>  }
>  #endif
>  
> +/*
> + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> + * private memory is enabled and it supports in-place shared/private conversion.
> + */
> +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)

Hm. Do we expect any caller for !CONFIG_KVM_PRIVATE_MEM?

> +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
> +{
> +	return false;
> +}
> +#endif
> +
>  #ifndef kvm_arch_has_readonly_mem
>  static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
>  {

... 

> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index b2aa6bf24d3a..4291956b51ae 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -312,7 +312,112 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
>  	return gfn - slot->base_gfn + slot->gmem.pgoff;
>  }
>  
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
> +{
> +	struct kvm_gmem *gmem = file->private_data;
> +
> +	/* For now, VMs that support shared memory share all their memory. */
> +	return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
> +}
> +
> +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> +{
> +	struct inode *inode = file_inode(vmf->vma->vm_file);
> +	struct folio *folio;
> +	vm_fault_t ret = VM_FAULT_LOCKED;
> +
> +	filemap_invalidate_lock_shared(inode->i_mapping);
> +
> +	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> +	if (IS_ERR(folio)) {
> +		switch (PTR_ERR(folio)) {
> +		case -EAGAIN:
> +			ret = VM_FAULT_RETRY;
> +			break;
> +		case -ENOMEM:
> +			ret = VM_FAULT_OOM;
> +			break;
> +		default:
> +			ret = VM_FAULT_SIGBUS;
> +			break;
> +		}
> +		goto out_filemap;
> +	}
> +
> +	if (folio_test_hwpoison(folio)) {
> +		ret = VM_FAULT_HWPOISON;
> +		goto out_folio;
> +	}
> +
> +	/* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */

If this is a requirement, it would be cleaner to rename the function and
pass down the folio and check the lock state inside.
Fuad Tabba March 4, 2025, 9:27 a.m. UTC | #2
Hi Kirill,

On Tue, 4 Mar 2025 at 08:58, Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> On Mon, Mar 03, 2025 at 05:10:07PM +0000, Fuad Tabba wrote:
> > Add support for mmap() and fault() for guest_memfd backed memory
> > in the host for VMs that support in-place conversion between
> > shared and private. To that end, this patch adds the ability to
> > check whether the VM type supports in-place conversion, and only
> > allows mapping its memory if that's the case.
> >
> > Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
> > indicates that the VM supports shared memory in guest_memfd, or
> > that the host can create VMs that support shared memory.
> > Supporting shared memory implies that memory can be mapped when
> > shared with the host.
> >
> > This is controlled by the KVM_GMEM_SHARED_MEM configuration
> > option.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >  include/linux/kvm_host.h |  11 ++++
> >  include/uapi/linux/kvm.h |   1 +
> >  virt/kvm/guest_memfd.c   | 105 +++++++++++++++++++++++++++++++++++++++
> >  virt/kvm/kvm_main.c      |   4 ++
> >  4 files changed, 121 insertions(+)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 7788e3625f6d..2d025b8ee20e 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> >  }
> >  #endif
> >
> > +/*
> > + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> > + * private memory is enabled and it supports in-place shared/private conversion.
> > + */
> > +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
>
> Hm. Do we expect any caller for !CONFIG_KVM_PRIVATE_MEM?
>
> > +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
> > +{
> > +     return false;
> > +}
> > +#endif
> > +
> >  #ifndef kvm_arch_has_readonly_mem
> >  static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
> >  {
>
> ...
>
> > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> > index b2aa6bf24d3a..4291956b51ae 100644
> > --- a/virt/kvm/guest_memfd.c
> > +++ b/virt/kvm/guest_memfd.c
> > @@ -312,7 +312,112 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
> >       return gfn - slot->base_gfn + slot->gmem.pgoff;
> >  }
> >
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
> > +{
> > +     struct kvm_gmem *gmem = file->private_data;
> > +
> > +     /* For now, VMs that support shared memory share all their memory. */
> > +     return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
> > +}
> > +
> > +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> > +{
> > +     struct inode *inode = file_inode(vmf->vma->vm_file);
> > +     struct folio *folio;
> > +     vm_fault_t ret = VM_FAULT_LOCKED;
> > +
> > +     filemap_invalidate_lock_shared(inode->i_mapping);
> > +
> > +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> > +     if (IS_ERR(folio)) {
> > +             switch (PTR_ERR(folio)) {
> > +             case -EAGAIN:
> > +                     ret = VM_FAULT_RETRY;
> > +                     break;
> > +             case -ENOMEM:
> > +                     ret = VM_FAULT_OOM;
> > +                     break;
> > +             default:
> > +                     ret = VM_FAULT_SIGBUS;
> > +                     break;
> > +             }
> > +             goto out_filemap;
> > +     }
> > +
> > +     if (folio_test_hwpoison(folio)) {
> > +             ret = VM_FAULT_HWPOISON;
> > +             goto out_folio;
> > +     }
> > +
> > +     /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
>
> If this is a requirement, it would be cleaner to rename the function and
> pass down the folio and check the lock state inside.

Will do.

Thanks,
/fuad

> --
>   Kiryl Shutsemau / Kirill A. Shutemov
Kirill A. Shutemov March 4, 2025, 9:45 a.m. UTC | #3
On Tue, Mar 04, 2025 at 09:27:06AM +0000, Fuad Tabba wrote:
> Hi Kirill,
> 
> On Tue, 4 Mar 2025 at 08:58, Kirill A. Shutemov
> <kirill.shutemov@linux.intel.com> wrote:
> >
> > On Mon, Mar 03, 2025 at 05:10:07PM +0000, Fuad Tabba wrote:
> > > Add support for mmap() and fault() for guest_memfd backed memory
> > > in the host for VMs that support in-place conversion between
> > > shared and private. To that end, this patch adds the ability to
> > > check whether the VM type supports in-place conversion, and only
> > > allows mapping its memory if that's the case.
> > >
> > > Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
> > > indicates that the VM supports shared memory in guest_memfd, or
> > > that the host can create VMs that support shared memory.
> > > Supporting shared memory implies that memory can be mapped when
> > > shared with the host.
> > >
> > > This is controlled by the KVM_GMEM_SHARED_MEM configuration
> > > option.
> > >
> > > Signed-off-by: Fuad Tabba <tabba@google.com>
> > > ---
> > >  include/linux/kvm_host.h |  11 ++++
> > >  include/uapi/linux/kvm.h |   1 +
> > >  virt/kvm/guest_memfd.c   | 105 +++++++++++++++++++++++++++++++++++++++
> > >  virt/kvm/kvm_main.c      |   4 ++
> > >  4 files changed, 121 insertions(+)
> > >
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index 7788e3625f6d..2d025b8ee20e 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> > >  }
> > >  #endif
> > >
> > > +/*
> > > + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> > > + * private memory is enabled and it supports in-place shared/private conversion.
> > > + */
> > > +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
> >
> > Hm. Do we expect any caller for !CONFIG_KVM_PRIVATE_MEM?

I think you missed this.
Fuad Tabba March 4, 2025, 9:52 a.m. UTC | #4
On Tue, 4 Mar 2025 at 09:46, Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> On Tue, Mar 04, 2025 at 09:27:06AM +0000, Fuad Tabba wrote:
> > Hi Kirill,
> >
> > On Tue, 4 Mar 2025 at 08:58, Kirill A. Shutemov
> > <kirill.shutemov@linux.intel.com> wrote:
> > >
> > > On Mon, Mar 03, 2025 at 05:10:07PM +0000, Fuad Tabba wrote:
> > > > Add support for mmap() and fault() for guest_memfd backed memory
> > > > in the host for VMs that support in-place conversion between
> > > > shared and private. To that end, this patch adds the ability to
> > > > check whether the VM type supports in-place conversion, and only
> > > > allows mapping its memory if that's the case.
> > > >
> > > > Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
> > > > indicates that the VM supports shared memory in guest_memfd, or
> > > > that the host can create VMs that support shared memory.
> > > > Supporting shared memory implies that memory can be mapped when
> > > > shared with the host.
> > > >
> > > > This is controlled by the KVM_GMEM_SHARED_MEM configuration
> > > > option.
> > > >
> > > > Signed-off-by: Fuad Tabba <tabba@google.com>
> > > > ---
> > > >  include/linux/kvm_host.h |  11 ++++
> > > >  include/uapi/linux/kvm.h |   1 +
> > > >  virt/kvm/guest_memfd.c   | 105 +++++++++++++++++++++++++++++++++++++++
> > > >  virt/kvm/kvm_main.c      |   4 ++
> > > >  4 files changed, 121 insertions(+)
> > > >
> > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > index 7788e3625f6d..2d025b8ee20e 100644
> > > > --- a/include/linux/kvm_host.h
> > > > +++ b/include/linux/kvm_host.h
> > > > @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> > > >  }
> > > >  #endif
> > > >
> > > > +/*
> > > > + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> > > > + * private memory is enabled and it supports in-place shared/private conversion.
> > > > + */
> > > > +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
> > >
> > > Hm. Do we expect any caller for !CONFIG_KVM_PRIVATE_MEM?
>
> I think you missed this.

Yes I did :)

You're right (reading between the lines of where you're getting at
that is). This should be

+#if !defined(kvm_arch_gmem_supports_shared_mem) &&
!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM)

I'll fix (if you agree with my understanding of your comment).

Cheers,
/fuad



> --
>   Kiryl Shutsemau / Kirill A. Shutemov
Fuad Tabba March 6, 2025, 8:48 a.m. UTC | #5
Hi Ackerley,

On Thu, 6 Mar 2025 at 00:02, Ackerley Tng <ackerleytng@google.com> wrote:
>
> Fuad Tabba <tabba@google.com> writes:
>
> > <snip>
> >
> > +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> > +{
> > +     struct inode *inode = file_inode(vmf->vma->vm_file);
> > +     struct folio *folio;
> > +     vm_fault_t ret = VM_FAULT_LOCKED;
> > +
> > +     filemap_invalidate_lock_shared(inode->i_mapping);
> > +
> > +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> > +     if (IS_ERR(folio)) {
> > +             switch (PTR_ERR(folio)) {
> > +             case -EAGAIN:
> > +                     ret = VM_FAULT_RETRY;
> > +                     break;
> > +             case -ENOMEM:
> > +                     ret = VM_FAULT_OOM;
> > +                     break;
> > +             default:
> > +                     ret = VM_FAULT_SIGBUS;
> > +                     break;
> > +             }
> > +             goto out_filemap;
> > +     }
> > +
> > +     if (folio_test_hwpoison(folio)) {
> > +             ret = VM_FAULT_HWPOISON;
> > +             goto out_folio;
> > +     }
> > +
> > +     /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
> > +     if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
> > +             ret = VM_FAULT_SIGBUS;
> > +             goto out_folio;
> > +     }
> > +
> > +     /*
> > +      * Only private folios are marked as "guestmem" so far, and we never
> > +      * expect private folios at this point.
> > +      */
>
> I think this is not quite accurate.
>
> Based on my understanding and kvm_gmem_handle_folio_put() in this other
> patch [1], only pages *in transition* from shared to private state are
> marked "guestmem", although it is true that no private folios or folios
> marked guestmem are expected here.

Technically, pages in transition are private as far as the host is
concerned. This doesn't say that _all_ private pages are marked as
guestmem. It says that only private pages are marked as guestmem. It
could be private and _not_ be marked as guestmem :)

I probably should rephrase something along the lines of, "no shared
folios would be marked as guestmem". How does that sound?

Thanks,
/fuad

> > +     if (WARN_ON_ONCE(folio_test_guestmem(folio)))  {
> > +             ret = VM_FAULT_SIGBUS;
> > +             goto out_folio;
> > +     }
> > +
> > +     /* No support for huge pages. */
> > +     if (WARN_ON_ONCE(folio_test_large(folio))) {
> > +             ret = VM_FAULT_SIGBUS;
> > +             goto out_folio;
> > +     }
> > +
> > +     if (!folio_test_uptodate(folio)) {
> > +             clear_highpage(folio_page(folio, 0));
> > +             kvm_gmem_mark_prepared(folio);
> > +     }
> > +
> > +     vmf->page = folio_file_page(folio, vmf->pgoff);
> > +
> > +out_folio:
> > +     if (ret != VM_FAULT_LOCKED) {
> > +             folio_unlock(folio);
> > +             folio_put(folio);
> > +     }
> > +
> > +out_filemap:
> > +     filemap_invalidate_unlock_shared(inode->i_mapping);
> > +
> > +     return ret;
> > +}
> >
> > <snip>
>
> [1] https://lore.kernel.org/all/20250117163001.2326672-7-tabba@google.com/
James Houghton March 7, 2025, 1:53 a.m. UTC | #6
On Mon, Mar 3, 2025 at 9:10 AM Fuad Tabba <tabba@google.com> wrote:
> +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> +{
> +       struct inode *inode = file_inode(vmf->vma->vm_file);
> +       struct folio *folio;
> +       vm_fault_t ret = VM_FAULT_LOCKED;
> +
> +       filemap_invalidate_lock_shared(inode->i_mapping);
> +
> +       folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> +       if (IS_ERR(folio)) {
> +               switch (PTR_ERR(folio)) {
> +               case -EAGAIN:
> +                       ret = VM_FAULT_RETRY;
> +                       break;
> +               case -ENOMEM:
> +                       ret = VM_FAULT_OOM;
> +                       break;
> +               default:
> +                       ret = VM_FAULT_SIGBUS;
> +                       break;

Tiny nit-pick: This smells almost like an open-coded vmf_error(). For
the non-EAGAIN case, can we just call vmf_error()?

> +               }
> +               goto out_filemap;
> +       }
> +
> +       if (folio_test_hwpoison(folio)) {
> +               ret = VM_FAULT_HWPOISON;
> +               goto out_folio;
> +       }
> +
> +       /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
> +       if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
> +               ret = VM_FAULT_SIGBUS;
> +               goto out_folio;
> +       }
Fuad Tabba March 7, 2025, 8:55 a.m. UTC | #7
Hi James,

On Fri, 7 Mar 2025 at 01:53, James Houghton <jthoughton@google.com> wrote:
>
> On Mon, Mar 3, 2025 at 9:10 AM Fuad Tabba <tabba@google.com> wrote:
> > +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> > +{
> > +       struct inode *inode = file_inode(vmf->vma->vm_file);
> > +       struct folio *folio;
> > +       vm_fault_t ret = VM_FAULT_LOCKED;
> > +
> > +       filemap_invalidate_lock_shared(inode->i_mapping);
> > +
> > +       folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> > +       if (IS_ERR(folio)) {
> > +               switch (PTR_ERR(folio)) {
> > +               case -EAGAIN:
> > +                       ret = VM_FAULT_RETRY;
> > +                       break;
> > +               case -ENOMEM:
> > +                       ret = VM_FAULT_OOM;
> > +                       break;
> > +               default:
> > +                       ret = VM_FAULT_SIGBUS;
> > +                       break;
>
> Tiny nit-pick: This smells almost like an open-coded vmf_error(). For
> the non-EAGAIN case, can we just call vmf_error()?

I wasn't aware of that. I will fix this on the respin, thanks for
pointing it out.

That said, any idea why vmf_error() doesn't convert -EAGAIN to VM_FAULT_RETRY?

Cheers,
/fuad

> > +               }
> > +               goto out_filemap;
> > +       }
> > +
> > +       if (folio_test_hwpoison(folio)) {
> > +               ret = VM_FAULT_HWPOISON;
> > +               goto out_folio;
> > +       }
> > +
> > +       /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
> > +       if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
> > +               ret = VM_FAULT_SIGBUS;
> > +               goto out_folio;
> > +       }
Fuad Tabba March 7, 2025, 8:57 a.m. UTC | #8
Hi Ackerley,

On Thu, 6 Mar 2025 at 22:46, Ackerley Tng <ackerleytng@google.com> wrote:
>
> Fuad Tabba <tabba@google.com> writes:
>
> > Add support for mmap() and fault() for guest_memfd backed memory
> > in the host for VMs that support in-place conversion between
> > shared and private. To that end, this patch adds the ability to
> > check whether the VM type supports in-place conversion, and only
> > allows mapping its memory if that's the case.
> >
> > Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
> > indicates that the VM supports shared memory in guest_memfd, or
> > that the host can create VMs that support shared memory.
> > Supporting shared memory implies that memory can be mapped when
> > shared with the host.
> >
> > This is controlled by the KVM_GMEM_SHARED_MEM configuration
> > option.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >  include/linux/kvm_host.h |  11 ++++
> >  include/uapi/linux/kvm.h |   1 +
> >  virt/kvm/guest_memfd.c   | 105 +++++++++++++++++++++++++++++++++++++++
> >  virt/kvm/kvm_main.c      |   4 ++
> >  4 files changed, 121 insertions(+)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 7788e3625f6d..2d025b8ee20e 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> >  }
> >  #endif
> >
> > +/*
> > + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> > + * private memory is enabled and it supports in-place shared/private conversion.
> > + */
> > +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
>
> Is this a copypasta error? I'm wondering if this should be
> !IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM).

Yes. Kirill had pointed that out as well. I will fix it.

> Also, would you consider defining a __weak function to be overridden by
> different architectures, or would weak symbols not be inline-able?

I have no strong opinion, but I think that it should follow the same
pattern as kvm_arch_has_private_mem().

Cheers,
/fuad

> > +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
> > +{
> > +     return false;
> > +}
> > +#endif
> > +
> >
> > <snip>
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7788e3625f6d..2d025b8ee20e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -728,6 +728,17 @@  static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
 }
 #endif
 
+/*
+ * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
+ * private memory is enabled and it supports in-place shared/private conversion.
+ */
+#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
+static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
+{
+	return false;
+}
+#endif
+
 #ifndef kvm_arch_has_readonly_mem
 static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
 {
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 45e6d8fca9b9..117937a895da 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -929,6 +929,7 @@  struct kvm_enable_cap {
 #define KVM_CAP_PRE_FAULT_MEMORY 236
 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
 #define KVM_CAP_X86_GUEST_MODE 238
+#define KVM_CAP_GMEM_SHARED_MEM 239
 
 struct kvm_irq_routing_irqchip {
 	__u32 irqchip;
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index b2aa6bf24d3a..4291956b51ae 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -312,7 +312,112 @@  static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
 	return gfn - slot->base_gfn + slot->gmem.pgoff;
 }
 
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
+{
+	struct kvm_gmem *gmem = file->private_data;
+
+	/* For now, VMs that support shared memory share all their memory. */
+	return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
+}
+
+static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vmf->vma->vm_file);
+	struct folio *folio;
+	vm_fault_t ret = VM_FAULT_LOCKED;
+
+	filemap_invalidate_lock_shared(inode->i_mapping);
+
+	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
+	if (IS_ERR(folio)) {
+		switch (PTR_ERR(folio)) {
+		case -EAGAIN:
+			ret = VM_FAULT_RETRY;
+			break;
+		case -ENOMEM:
+			ret = VM_FAULT_OOM;
+			break;
+		default:
+			ret = VM_FAULT_SIGBUS;
+			break;
+		}
+		goto out_filemap;
+	}
+
+	if (folio_test_hwpoison(folio)) {
+		ret = VM_FAULT_HWPOISON;
+		goto out_folio;
+	}
+
+	/* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
+	if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
+		ret = VM_FAULT_SIGBUS;
+		goto out_folio;
+	}
+
+	/*
+	 * Only private folios are marked as "guestmem" so far, and we never
+	 * expect private folios at this point.
+	 */
+	if (WARN_ON_ONCE(folio_test_guestmem(folio)))  {
+		ret = VM_FAULT_SIGBUS;
+		goto out_folio;
+	}
+
+	/* No support for huge pages. */
+	if (WARN_ON_ONCE(folio_test_large(folio))) {
+		ret = VM_FAULT_SIGBUS;
+		goto out_folio;
+	}
+
+	if (!folio_test_uptodate(folio)) {
+		clear_highpage(folio_page(folio, 0));
+		kvm_gmem_mark_prepared(folio);
+	}
+
+	vmf->page = folio_file_page(folio, vmf->pgoff);
+
+out_folio:
+	if (ret != VM_FAULT_LOCKED) {
+		folio_unlock(folio);
+		folio_put(folio);
+	}
+
+out_filemap:
+	filemap_invalidate_unlock_shared(inode->i_mapping);
+
+	return ret;
+}
+
+static const struct vm_operations_struct kvm_gmem_vm_ops = {
+	.fault = kvm_gmem_fault,
+};
+
+static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kvm_gmem *gmem = file->private_data;
+
+	if (!kvm_arch_gmem_supports_shared_mem(gmem->kvm))
+		return -ENODEV;
+
+	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
+	    (VM_SHARED | VM_MAYSHARE)) {
+		return -EINVAL;
+	}
+
+	file_accessed(file);
+	vm_flags_set(vma, VM_DONTDUMP);
+	vma->vm_ops = &kvm_gmem_vm_ops;
+
+	return 0;
+}
+#else
+#define kvm_gmem_mmap NULL
+#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
+
 static struct file_operations kvm_gmem_fops = {
+	.mmap		= kvm_gmem_mmap,
 	.open		= generic_file_open,
 	.release	= kvm_gmem_release,
 	.fallocate	= kvm_gmem_fallocate,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ba0327e2d0d3..38f0f402ea46 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4830,6 +4830,10 @@  static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #ifdef CONFIG_KVM_PRIVATE_MEM
 	case KVM_CAP_GUEST_MEMFD:
 		return !kvm || kvm_arch_has_private_mem(kvm);
+#endif
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+	case KVM_CAP_GMEM_SHARED_MEM:
+		return !kvm || kvm_arch_gmem_supports_shared_mem(kvm);
 #endif
 	default:
 		break;