@@ -6346,7 +6346,10 @@ and cannot be resized (guest_memfd files do however support PUNCH_HOLE).
struct kvm_create_guest_memfd {
__u64 size;
__u64 flags;
- __u64 reserved[6];
+ __u64 host_nodes_addr;
+ __u16 maxnode;
+ __u8 mpol_mode;
+ __u8 reserved[37];
};
Conceptually, the inode backing a guest_memfd file represents physical memory,
@@ -6367,6 +6370,14 @@ a single guest_memfd file, but the bound ranges must not overlap).
See KVM_SET_USER_MEMORY_REGION2 for additional details.
+NUMA memory policy support for KVM guest_memfd allows the host to specify
+memory allocation behavior for guest NUMA nodes, similar to mbind(). If
+KVM_GUEST_MEMFD_NUMA_ENABLE flag is set, memory allocations from the guest
+will use the specified policy and host-nodes for physical memory.
+- mpol_mode refers to the policy mode: default, preferred, bind, interleave, or
+ preferred.
+- host_nodes_addr points to bitmask of nodes containing up to maxnode bits.
+
4.143 KVM_PRE_FAULT_MEMORY
---------------------------
@@ -299,4 +299,8 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol)
}
#endif /* CONFIG_NUMA */
+
+struct mempolicy *create_mpol_from_args(unsigned char mode,
+ const unsigned long __user *nmask,
+ unsigned short maxnode);
#endif
@@ -1561,7 +1561,10 @@ struct kvm_memory_attributes {
struct kvm_create_guest_memfd {
__u64 size;
__u64 flags;
- __u64 reserved[6];
+ __u64 host_nodes_addr;
+ __u16 maxnode;
+ __u8 mpol_mode;
+ __u8 reserved[37];
};
#define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory)
@@ -3557,3 +3557,55 @@ static int __init mempolicy_sysfs_init(void)
late_initcall(mempolicy_sysfs_init);
#endif /* CONFIG_SYSFS */
+
+#ifdef CONFIG_KVM_PRIVATE_MEM
+/**
+ * create_mpol_from_args - create a mempolicy structure from args
+ * @mode: NUMA memory policy mode
+ * @nmask: bitmask of NUMA nodes
+ * @maxnode: number of bits in the nodes bitmask
+ *
+ * Create a mempolicy from given nodemask and memory policy such as
+ * default, preferred, interleave or bind.
+ *
+ * Return: error encoded in a pointer or memory policy on success.
+ */
+struct mempolicy *create_mpol_from_args(unsigned char mode,
+ const unsigned long __user *nmask,
+ unsigned short maxnode)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned short mode_flags;
+ struct mempolicy *mpol;
+ nodemask_t nodes;
+ int lmode = mode;
+ int err = -ENOMEM;
+
+ err = sanitize_mpol_flags(&lmode, &mode_flags);
+ if (err)
+ return ERR_PTR(err);
+
+ err = get_nodes(&nodes, nmask, maxnode);
+ if (err)
+ return ERR_PTR(err);
+
+ mpol = mpol_new(mode, mode_flags, &nodes);
+ if (IS_ERR_OR_NULL(mpol))
+ return mpol;
+
+ NODEMASK_SCRATCH(scratch);
+ if (!scratch)
+ return ERR_PTR(-ENOMEM);
+
+ mmap_write_lock(mm);
+ err = mpol_set_nodemask(mpol, &nodes, scratch);
+ mmap_write_unlock(mm);
+ NODEMASK_SCRATCH_FREE(scratch);
+
+ if (err)
+ return ERR_PTR(err);
+
+ return mpol;
+}
+EXPORT_SYMBOL(create_mpol_from_args);
+#endif
@@ -1546,7 +1546,10 @@ struct kvm_memory_attributes {
struct kvm_create_guest_memfd {
__u64 size;
__u64 flags;
- __u64 reserved[6];
+ __u64 host_nodes_addr;
+ __u16 maxnode;
+ __u8 mpol_mode;
+ __u8 reserved[37];
};
#define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory)
@@ -4,6 +4,7 @@
#include <linux/kvm_host.h>
#include <linux/pagemap.h>
#include <linux/anon_inodes.h>
+#include <linux/mempolicy.h>
#include "kvm_mm.h"
@@ -445,7 +446,8 @@ static const struct inode_operations kvm_gmem_iops = {
.setattr = kvm_gmem_setattr,
};
-static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
+static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags,
+ struct mempolicy *pol)
{
const char *anon_name = "[kvm-gmem]";
struct kvm_gmem *gmem;
@@ -478,6 +480,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
inode->i_private = (void *)(unsigned long)flags;
inode->i_op = &kvm_gmem_iops;
inode->i_mapping->a_ops = &kvm_gmem_aops;
+ inode->i_mapping->i_private_data = (void *)pol;
inode->i_mode |= S_IFREG;
inode->i_size = size;
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
@@ -505,7 +508,8 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
{
loff_t size = args->size;
u64 flags = args->flags;
- u64 valid_flags = 0;
+ u64 valid_flags = GUEST_MEMFD_NUMA_ENABLE;
+ struct mempolicy *mpol = NULL;
if (flags & ~valid_flags)
return -EINVAL;
@@ -513,7 +517,18 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
if (size <= 0 || !PAGE_ALIGNED(size))
return -EINVAL;
- return __kvm_gmem_create(kvm, size, flags);
+ if (flags & GUEST_MEMFD_NUMA_ENABLE) {
+ unsigned char mode = args->mpol_mode;
+ unsigned short maxnode = args->maxnode;
+ const unsigned long __user *user_nmask =
+ (const unsigned long *)args->host_nodes_addr;
+
+ mpol = create_mpol_from_args(mode, user_nmask, maxnode);
+ if (IS_ERR_OR_NULL(mpol))
+ return PTR_ERR(mpol);
+ }
+
+ return __kvm_gmem_create(kvm, size, flags, mpol);
}
int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
@@ -36,6 +36,9 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
#endif /* HAVE_KVM_PFNCACHE */
#ifdef CONFIG_KVM_PRIVATE_MEM
+/* Flag to check NUMA policy while creating KVM guest-memfd. */
+#define GUEST_MEMFD_NUMA_ENABLE BIT_ULL(0)
+
void kvm_gmem_init(struct module *module);
int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,