diff mbox series

[RFC,v1,1/9] KVM: guest_memfd: Allow host to mmap guest_memfd() pages

Message ID 20250122152738.1173160-2-tabba@google.com (mailing list archive)
State Not Applicable
Headers show
Series KVM: Mapping of guest_memfd at the host and a software protected VM type | expand

Commit Message

Fuad Tabba Jan. 22, 2025, 3:27 p.m. UTC
Add support for mmap() and fault() for guest_memfd in the host
for VMs that support in place conversion between shared and
private. To that end, this patch adds the ability to check
whether the architecture has that support, and only allows mmap()
if that's the case.

Additionally, this is gated with a new configuration option,
CONFIG_KVM_GMEM_MAPPABLE.

Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +
 include/linux/kvm_host.h        | 11 +++++
 virt/kvm/Kconfig                |  4 ++
 virt/kvm/guest_memfd.c          | 71 +++++++++++++++++++++++++++++++++
 4 files changed, 88 insertions(+)

Comments

David Hildenbrand Jan. 22, 2025, 10:06 p.m. UTC | #1
On 22.01.25 16:27, Fuad Tabba wrote:
> Add support for mmap() and fault() for guest_memfd in the host
> for VMs that support in place conversion between shared and
> private.
 > To that end, this patch adds the ability to check> whether the 
architecture has that support, and only allows mmap()
> if that's the case.
 > > Additionally, this is gated with a new configuration option,
> CONFIG_KVM_GMEM_MAPPABLE.
 > > Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>   arch/x86/include/asm/kvm_host.h |  2 +
>   include/linux/kvm_host.h        | 11 +++++
>   virt/kvm/Kconfig                |  4 ++
>   virt/kvm/guest_memfd.c          | 71 +++++++++++++++++++++++++++++++++
>   4 files changed, 88 insertions(+)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index e159e44a6a1b..c0e149bc1d79 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -2206,6 +2206,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
>   #define kvm_arch_has_private_mem(kvm) false
>   #endif
>   
> +#define kvm_arch_private_mem_inplace(kvm) false
> +
>   #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
>   
>   static inline u16 kvm_read_ldt(void)
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 401439bb21e3..ebca0ab4c5e2 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -717,6 +717,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
>   }
>   #endif
>   
> +/*
> + * Arch code must define kvm_arch_private_mem_inplace if support for private
> + * memory is enabled it supports in-place conversion between shared and private.
> + */
> +#if !defined(kvm_arch_private_mem_inplace) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
> +static inline bool kvm_arch_private_mem_inplace(struct kvm *kvm)
I assume right now this would rather indicate "support for shared 
(mappable) memory in guest_memfd".

Maybe there is a better way to express that :)

kvm_arch_gmem_supports_shared_mem ?

The in-place conversion is (at least to me) implied with support for 
shared memory.

> +
>   #ifndef kvm_arch_has_readonly_mem
>   static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
>   {
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index 54e959e7d68f..59400fd8f539 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -124,3 +124,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
>   config HAVE_KVM_ARCH_GMEM_INVALIDATE
>          bool
>          depends on KVM_PRIVATE_MEM
> +
> +config KVM_GMEM_MAPPABLE
> +       select KVM_PRIVATE_MEM

Easier to grasp might be:

KVM_GMEM_MAPPABLE -> KVM_GMEM_SHARED_MEM

Support for "shared" memory kind of imply mmap support (otherwise, how 
to make use of it :) ),


(KVM_PRIVATE_MEM -> KVM_GMEM might also make sense, but it's a different 
discussion)

> +       bool
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 47a9f68f7b24..9ee162bf6bde 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -307,7 +307,78 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
>   	return gfn - slot->base_gfn + slot->gmem.pgoff;
>   }
>   
> +#ifdef CONFIG_KVM_GMEM_MAPPABLE
> +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> +{
> +	struct inode *inode = file_inode(vmf->vma->vm_file);
> +	struct folio *folio;
> +	vm_fault_t ret = VM_FAULT_LOCKED;
> +
> +	filemap_invalidate_lock_shared(inode->i_mapping);
> +
> +	folio = kvm_gmem_get_folio(inode, vmf->pgoff);


Would the idea be later that kvm_gmem_get_folio() would fail on private 
memory, or do you envision other checks in this code here in the future?
Fuad Tabba Jan. 23, 2025, 9:44 a.m. UTC | #2
Hi David,

On Wed, 22 Jan 2025 at 22:07, David Hildenbrand <david@redhat.com> wrote:
>
> On 22.01.25 16:27, Fuad Tabba wrote:
> > Add support for mmap() and fault() for guest_memfd in the host
> > for VMs that support in place conversion between shared and
> > private.
>  > To that end, this patch adds the ability to check> whether the
> architecture has that support, and only allows mmap()
> > if that's the case.
>  > > Additionally, this is gated with a new configuration option,
> > CONFIG_KVM_GMEM_MAPPABLE.
>  > > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >   arch/x86/include/asm/kvm_host.h |  2 +
> >   include/linux/kvm_host.h        | 11 +++++
> >   virt/kvm/Kconfig                |  4 ++
> >   virt/kvm/guest_memfd.c          | 71 +++++++++++++++++++++++++++++++++
> >   4 files changed, 88 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index e159e44a6a1b..c0e149bc1d79 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -2206,6 +2206,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
> >   #define kvm_arch_has_private_mem(kvm) false
> >   #endif
> >
> > +#define kvm_arch_private_mem_inplace(kvm) false
> > +
> >   #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
> >
> >   static inline u16 kvm_read_ldt(void)
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 401439bb21e3..ebca0ab4c5e2 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -717,6 +717,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> >   }
> >   #endif
> >
> > +/*
> > + * Arch code must define kvm_arch_private_mem_inplace if support for private
> > + * memory is enabled it supports in-place conversion between shared and private.
> > + */
> > +#if !defined(kvm_arch_private_mem_inplace) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
> > +static inline bool kvm_arch_private_mem_inplace(struct kvm *kvm)
> I assume right now this would rather indicate "support for shared
> (mappable) memory in guest_memfd".
>
> Maybe there is a better way to express that :)
>
> kvm_arch_gmem_supports_shared_mem ?
>
> The in-place conversion is (at least to me) implied with support for
> shared memory.

Sounds better than what I had.

> > +
> >   #ifndef kvm_arch_has_readonly_mem
> >   static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
> >   {
> > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> > index 54e959e7d68f..59400fd8f539 100644
> > --- a/virt/kvm/Kconfig
> > +++ b/virt/kvm/Kconfig
> > @@ -124,3 +124,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
> >   config HAVE_KVM_ARCH_GMEM_INVALIDATE
> >          bool
> >          depends on KVM_PRIVATE_MEM
> > +
> > +config KVM_GMEM_MAPPABLE
> > +       select KVM_PRIVATE_MEM
>
> Easier to grasp might be:
>
> KVM_GMEM_MAPPABLE -> KVM_GMEM_SHARED_MEM
>
> Support for "shared" memory kind of imply mmap support (otherwise, how
> to make use of it :) ),

Like the one above, this makes more sense.


>
> (KVM_PRIVATE_MEM -> KVM_GMEM might also make sense, but it's a different
> discussion)
>
> > +       bool
> > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> > index 47a9f68f7b24..9ee162bf6bde 100644
> > --- a/virt/kvm/guest_memfd.c
> > +++ b/virt/kvm/guest_memfd.c
> > @@ -307,7 +307,78 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
> >       return gfn - slot->base_gfn + slot->gmem.pgoff;
> >   }
> >
> > +#ifdef CONFIG_KVM_GMEM_MAPPABLE
> > +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> > +{
> > +     struct inode *inode = file_inode(vmf->vma->vm_file);
> > +     struct folio *folio;
> > +     vm_fault_t ret = VM_FAULT_LOCKED;
> > +
> > +     filemap_invalidate_lock_shared(inode->i_mapping);
> > +
> > +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
>
>
> Would the idea be later that kvm_gmem_get_folio() would fail on private
> memory, or do you envision other checks in this code here in the future?

There would be other checks in the future, the idea is that they would
be the ones in:
https://lore.kernel.org/all/20250117163001.2326672-8-tabba@google.com/

What this patch series has, but the other one doesn't, is checks on
mmap to ensure whether the VM type supports that operation in
principle or not.

Thanks,
/fuad

> --
> Cheers,
>
> David / dhildenb
>
David Hildenbrand Jan. 23, 2025, 10:27 a.m. UTC | #3
>>> +       bool
>>> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
>>> index 47a9f68f7b24..9ee162bf6bde 100644
>>> --- a/virt/kvm/guest_memfd.c
>>> +++ b/virt/kvm/guest_memfd.c
>>> @@ -307,7 +307,78 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
>>>        return gfn - slot->base_gfn + slot->gmem.pgoff;
>>>    }
>>>
>>> +#ifdef CONFIG_KVM_GMEM_MAPPABLE
>>> +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
>>> +{
>>> +     struct inode *inode = file_inode(vmf->vma->vm_file);
>>> +     struct folio *folio;
>>> +     vm_fault_t ret = VM_FAULT_LOCKED;
>>> +
>>> +     filemap_invalidate_lock_shared(inode->i_mapping);
>>> +
>>> +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
>>
>>
>> Would the idea be later that kvm_gmem_get_folio() would fail on private
>> memory, or do you envision other checks in this code here in the future?
> 
> There would be other checks in the future, the idea is that they would
> be the ones in:
> https://lore.kernel.org/all/20250117163001.2326672-8-tabba@google.com/
> 

Thanks, so I wonder if this patch should just add necessary callback(s) 
as well, to make this patch look like it adds most of the infrastructure 
on the mmap level.

kvm_gmem_is_shared() or sth like that, documenting that it must be 
called after kvm_gmem_get_folio() -- with a raised folio reference / 
folio lock.

Alternatively, provide a

	kvm_gmem_get_shared_folio()

that abstracts that operation.

We could also for now ensure that we really only get small folios back, 
and even get rid of the clearing loop.


The "WARN_ON_ONCE(folio_test_guestmem(folio)" would be added separately.

In the context of this series, the callback would be a nop and always 
say "yes".
Fuad Tabba Jan. 23, 2025, 11:02 a.m. UTC | #4
On Thu, 23 Jan 2025 at 10:28, David Hildenbrand <david@redhat.com> wrote:
>
> >>> +       bool
> >>> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> >>> index 47a9f68f7b24..9ee162bf6bde 100644
> >>> --- a/virt/kvm/guest_memfd.c
> >>> +++ b/virt/kvm/guest_memfd.c
> >>> @@ -307,7 +307,78 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
> >>>        return gfn - slot->base_gfn + slot->gmem.pgoff;
> >>>    }
> >>>
> >>> +#ifdef CONFIG_KVM_GMEM_MAPPABLE
> >>> +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> >>> +{
> >>> +     struct inode *inode = file_inode(vmf->vma->vm_file);
> >>> +     struct folio *folio;
> >>> +     vm_fault_t ret = VM_FAULT_LOCKED;
> >>> +
> >>> +     filemap_invalidate_lock_shared(inode->i_mapping);
> >>> +
> >>> +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> >>
> >>
> >> Would the idea be later that kvm_gmem_get_folio() would fail on private
> >> memory, or do you envision other checks in this code here in the future?
> >
> > There would be other checks in the future, the idea is that they would
> > be the ones in:
> > https://lore.kernel.org/all/20250117163001.2326672-8-tabba@google.com/
> >
>
> Thanks, so I wonder if this patch should just add necessary callback(s)
> as well, to make this patch look like it adds most of the infrastructure
> on the mmap level.
>
> kvm_gmem_is_shared() or sth like that, documenting that it must be
> called after kvm_gmem_get_folio() -- with a raised folio reference /
> folio lock.
>
> Alternatively, provide a
>
>         kvm_gmem_get_shared_folio()
>
> that abstracts that operation.
>
> We could also for now ensure that we really only get small folios back,
> and even get rid of the clearing loop.
>
>
> The "WARN_ON_ONCE(folio_test_guestmem(folio)" would be added separately.
>
> In the context of this series, the callback would be a nop and always
> say "yes".

I agree, especially if this patch series were to serve as a prelude to
the other one that adds restricted mmap() support.

Cheers,
/fuad

> --
> Cheers,
>
> David / dhildenb
>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e159e44a6a1b..c0e149bc1d79 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2206,6 +2206,8 @@  void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
 #define kvm_arch_has_private_mem(kvm) false
 #endif
 
+#define kvm_arch_private_mem_inplace(kvm) false
+
 #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
 
 static inline u16 kvm_read_ldt(void)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 401439bb21e3..ebca0ab4c5e2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -717,6 +717,17 @@  static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
 }
 #endif
 
+/*
+ * Arch code must define kvm_arch_private_mem_inplace if support for private
+ * memory is enabled it supports in-place conversion between shared and private.
+ */
+#if !defined(kvm_arch_private_mem_inplace) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
+static inline bool kvm_arch_private_mem_inplace(struct kvm *kvm)
+{
+	return false;
+}
+#endif
+
 #ifndef kvm_arch_has_readonly_mem
 static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
 {
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 54e959e7d68f..59400fd8f539 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -124,3 +124,7 @@  config HAVE_KVM_ARCH_GMEM_PREPARE
 config HAVE_KVM_ARCH_GMEM_INVALIDATE
        bool
        depends on KVM_PRIVATE_MEM
+
+config KVM_GMEM_MAPPABLE
+       select KVM_PRIVATE_MEM
+       bool
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 47a9f68f7b24..9ee162bf6bde 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -307,7 +307,78 @@  static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
 	return gfn - slot->base_gfn + slot->gmem.pgoff;
 }
 
+#ifdef CONFIG_KVM_GMEM_MAPPABLE
+static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vmf->vma->vm_file);
+	struct folio *folio;
+	vm_fault_t ret = VM_FAULT_LOCKED;
+
+	filemap_invalidate_lock_shared(inode->i_mapping);
+
+	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
+	if (IS_ERR(folio)) {
+		ret = VM_FAULT_SIGBUS;
+		goto out_filemap;
+	}
+
+	if (folio_test_hwpoison(folio)) {
+		ret = VM_FAULT_HWPOISON;
+		goto out_folio;
+	}
+
+	if (!folio_test_uptodate(folio)) {
+		unsigned long nr_pages = folio_nr_pages(folio);
+		unsigned long i;
+
+		for (i = 0; i < nr_pages; i++)
+			clear_highpage(folio_page(folio, i));
+
+		folio_mark_uptodate(folio);
+	}
+
+	vmf->page = folio_file_page(folio, vmf->pgoff);
+
+out_folio:
+	if (ret != VM_FAULT_LOCKED) {
+		folio_unlock(folio);
+		folio_put(folio);
+	}
+
+out_filemap:
+	filemap_invalidate_unlock_shared(inode->i_mapping);
+
+	return ret;
+}
+
+static const struct vm_operations_struct kvm_gmem_vm_ops = {
+	.fault = kvm_gmem_fault,
+};
+
+static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kvm_gmem *gmem = file->private_data;
+
+	if (!kvm_arch_private_mem_inplace(gmem->kvm))
+		return -ENODEV;
+
+	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
+	    (VM_SHARED | VM_MAYSHARE)) {
+		return -EINVAL;
+	}
+
+	file_accessed(file);
+	vm_flags_set(vma, VM_DONTDUMP);
+	vma->vm_ops = &kvm_gmem_vm_ops;
+
+	return 0;
+}
+#else
+#define kvm_gmem_mmap NULL
+#endif /* CONFIG_KVM_GMEM_MAPPABLE */
+
 static struct file_operations kvm_gmem_fops = {
+	.mmap		= kvm_gmem_mmap,
 	.open		= generic_file_open,
 	.release	= kvm_gmem_release,
 	.fallocate	= kvm_gmem_fallocate,