diff mbox series

[v3,kvm/queue,04/16] KVM: Extend the memslot to support fd-based private memory

Message ID 20211223123011.41044-5-chao.p.peng@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: mm: fd-based approach for supporting KVM guest private memory | expand

Commit Message

Chao Peng Dec. 23, 2021, 12:29 p.m. UTC
Extend the memslot definition to provide fd-based private memory support
by adding two new fields(fd/ofs). The memslot then can maintain memory
for both shared and private pages in a single memslot. Shared pages are
provided in the existing way by using userspace_addr(hva) field and
get_user_pages() while private pages are provided through the new
fields(fd/ofs). Since there is no 'hva' concept anymore for private
memory we cannot call get_user_pages() to get a pfn, instead we rely on
the newly introduced MEMFD_OPS callbacks to do the same job.

This new extension is indicated by a new flag KVM_MEM_PRIVATE.

Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
---
 include/linux/kvm_host.h | 10 ++++++++++
 include/uapi/linux/kvm.h | 12 ++++++++++++
 2 files changed, 22 insertions(+)

Comments

Sean Christopherson Dec. 23, 2021, 5:35 p.m. UTC | #1
On Thu, Dec 23, 2021, Chao Peng wrote:
> Extend the memslot definition to provide fd-based private memory support
> by adding two new fields(fd/ofs). The memslot then can maintain memory
> for both shared and private pages in a single memslot. Shared pages are
> provided in the existing way by using userspace_addr(hva) field and
> get_user_pages() while private pages are provided through the new
> fields(fd/ofs). Since there is no 'hva' concept anymore for private
> memory we cannot call get_user_pages() to get a pfn, instead we rely on
> the newly introduced MEMFD_OPS callbacks to do the same job.
> 
> This new extension is indicated by a new flag KVM_MEM_PRIVATE.
> 
> Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> ---
>  include/linux/kvm_host.h | 10 ++++++++++
>  include/uapi/linux/kvm.h | 12 ++++++++++++
>  2 files changed, 22 insertions(+)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f8ed799e8674..2cd35560c44b 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -460,8 +460,18 @@ struct kvm_memory_slot {
>  	u32 flags;
>  	short id;
>  	u16 as_id;
> +	u32 fd;

There should be no need to store the fd in the memslot, the fd should be unneeded
outside of __kvm_set_memory_region(), e.g.

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1caebded52c4..4e43262887a3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2029,10 +2029,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
        new->npages = npages;
        new->flags = mem->flags;
        new->userspace_addr = mem->userspace_addr;
-       new->fd = mem->fd;
-       new->file = NULL;
-       new->ofs = mem->ofs;
-
+       if (mem->flags & KVM_MEM_PRIVATE) {
+               new->private_file = fget(mem->private_fd);
+               new->private_offset = mem->private_offset;
+       }
        r = kvm_set_memslot(kvm, old, new, change);
        if (r)
                kfree(new);

> +	struct file *file;

Please use more descriptive names, shaving characters is not at all priority.

> +	u64 ofs;

I believe this should be loff_t.

	struct file *private_file;
	struct loff_t private_offset;

>  };
>  
> +static inline bool kvm_slot_is_private(const struct kvm_memory_slot *slot)
> +{
> +	if (slot && (slot->flags & KVM_MEM_PRIVATE))
> +		return true;
> +	return false;

	return slot && (slot->flags & KVM_MEM_PRIVATE);

> +}
> +
>  static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
>  {
>  	return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 1daa45268de2..41434322fa23 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -103,6 +103,17 @@ struct kvm_userspace_memory_region {
>  	__u64 userspace_addr; /* start of the userspace allocated memory */
>  };
>  
> +struct kvm_userspace_memory_region_ext {
> +	__u32 slot;
> +	__u32 flags;
> +	__u64 guest_phys_addr;
> +	__u64 memory_size; /* bytes */
> +	__u64 userspace_addr; /* hva */

Would it make sense to embed "struct kvm_userspace_memory_region"?

> +	__u64 ofs; /* offset into fd */
> +	__u32 fd;

Again, use descriptive names, then comments like "offset into fd" are unnecessary.

	__u64 private_offset;
	__u32 private_fd;

> +	__u32 padding[5];
> +};
> +
>  /*
>   * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
>   * other bits are reserved for kvm internal use which are defined in
> @@ -110,6 +121,7 @@ struct kvm_userspace_memory_region {
>   */
>  #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
>  #define KVM_MEM_READONLY	(1UL << 1)
> +#define KVM_MEM_PRIVATE		(1UL << 2)
>  
>  /* for KVM_IRQ_LINE */
>  struct kvm_irq_level {
> -- 
> 2.17.1
>
Chao Peng Dec. 31, 2021, 2:53 a.m. UTC | #2
On Thu, Dec 23, 2021 at 05:35:37PM +0000, Sean Christopherson wrote:
> On Thu, Dec 23, 2021, Chao Peng wrote:
> 
> > +	struct file *file;
> 
> Please use more descriptive names, shaving characters is not at all priority.
> 
> > +	u64 ofs;
> 
> I believe this should be loff_t.
> 
> 	struct file *private_file;
> 	struct loff_t private_offset;
> 
> >  };
> >  
> > +static inline bool kvm_slot_is_private(const struct kvm_memory_slot *slot)
> > +{
> > +	if (slot && (slot->flags & KVM_MEM_PRIVATE))
> > +		return true;
> > +	return false;
> 
> 	return slot && (slot->flags & KVM_MEM_PRIVATE);
> 
> > +}
> > +
> >  static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
> >  {
> >  	return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 1daa45268de2..41434322fa23 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -103,6 +103,17 @@ struct kvm_userspace_memory_region {
> >  	__u64 userspace_addr; /* start of the userspace allocated memory */
> >  };
> >  
> > +struct kvm_userspace_memory_region_ext {
> > +	__u32 slot;
> > +	__u32 flags;
> > +	__u64 guest_phys_addr;
> > +	__u64 memory_size; /* bytes */
> > +	__u64 userspace_addr; /* hva */
> 
> Would it make sense to embed "struct kvm_userspace_memory_region"?
> 
> > +	__u64 ofs; /* offset into fd */
> > +	__u32 fd;
> 
> Again, use descriptive names, then comments like "offset into fd" are unnecessary.
> 
> 	__u64 private_offset;
> 	__u32 private_fd;

My original thought is the same fields might be used for shared memslot
as well in future (e.g. there may be another KVM_MEM_* bit can reuse the
same fields for shared slot) so non private-specific name may sound
better. But definitely I have no objection and can use private_* names
for next version unless there is other objection.

Thanks,
Chao
> 
> > +	__u32 padding[5];
> > +};
> > +
> >  /*
> >   * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
> >   * other bits are reserved for kvm internal use which are defined in
> > @@ -110,6 +121,7 @@ struct kvm_userspace_memory_region {
> >   */
> >  #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
> >  #define KVM_MEM_READONLY	(1UL << 1)
> > +#define KVM_MEM_PRIVATE		(1UL << 2)
> >  
> >  /* for KVM_IRQ_LINE */
> >  struct kvm_irq_level {
> > -- 
> > 2.17.1
> >
Sean Christopherson Jan. 4, 2022, 5:34 p.m. UTC | #3
On Fri, Dec 31, 2021, Chao Peng wrote:
> On Thu, Dec 23, 2021 at 05:35:37PM +0000, Sean Christopherson wrote:
> > On Thu, Dec 23, 2021, Chao Peng wrote:
> > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > > index 1daa45268de2..41434322fa23 100644
> > > --- a/include/uapi/linux/kvm.h
> > > +++ b/include/uapi/linux/kvm.h
> > > @@ -103,6 +103,17 @@ struct kvm_userspace_memory_region {
> > >  	__u64 userspace_addr; /* start of the userspace allocated memory */
> > >  };
> > >  
> > > +struct kvm_userspace_memory_region_ext {
> > > +	__u32 slot;
> > > +	__u32 flags;
> > > +	__u64 guest_phys_addr;
> > > +	__u64 memory_size; /* bytes */
> > > +	__u64 userspace_addr; /* hva */
> > 
> > Would it make sense to embed "struct kvm_userspace_memory_region"?
> > 
> > > +	__u64 ofs; /* offset into fd */
> > > +	__u32 fd;
> > 
> > Again, use descriptive names, then comments like "offset into fd" are unnecessary.
> > 
> > 	__u64 private_offset;
> > 	__u32 private_fd;
> 
> My original thought is the same fields might be used for shared memslot
> as well in future (e.g. there may be another KVM_MEM_* bit can reuse the
> same fields for shared slot) so non private-specific name may sound
> better. But definitely I have no objection and can use private_* names
> for next version unless there is other objection.

If that does happen, it's easy enough to wrap them in a union.
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f8ed799e8674..2cd35560c44b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -460,8 +460,18 @@  struct kvm_memory_slot {
 	u32 flags;
 	short id;
 	u16 as_id;
+	u32 fd;
+	struct file *file;
+	u64 ofs;
 };
 
+static inline bool kvm_slot_is_private(const struct kvm_memory_slot *slot)
+{
+	if (slot && (slot->flags & KVM_MEM_PRIVATE))
+		return true;
+	return false;
+}
+
 static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
 {
 	return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1daa45268de2..41434322fa23 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -103,6 +103,17 @@  struct kvm_userspace_memory_region {
 	__u64 userspace_addr; /* start of the userspace allocated memory */
 };
 
+struct kvm_userspace_memory_region_ext {
+	__u32 slot;
+	__u32 flags;
+	__u64 guest_phys_addr;
+	__u64 memory_size; /* bytes */
+	__u64 userspace_addr; /* hva */
+	__u64 ofs; /* offset into fd */
+	__u32 fd;
+	__u32 padding[5];
+};
+
 /*
  * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
  * other bits are reserved for kvm internal use which are defined in
@@ -110,6 +121,7 @@  struct kvm_userspace_memory_region {
  */
 #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
 #define KVM_MEM_READONLY	(1UL << 1)
+#define KVM_MEM_PRIVATE		(1UL << 2)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {