@@ -431,6 +431,26 @@ static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
void vm_create_irqchip(struct kvm_vm *vm);
+static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
+ uint64_t flags)
+{
+ struct kvm_create_guest_memfd guest_memfd = {
+ .size = size,
+ .flags = flags,
+ };
+
+ return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
+}
+
+static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
+ uint64_t flags)
+{
+ int fd = __vm_create_guest_memfd(vm, size, flags);
+
+ TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd));
+ return fd;
+}
+
void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
uint64_t gpa, uint64_t size, void *hva);
int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
@@ -439,6 +459,9 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
enum vm_mem_backing_src_type src_type,
uint64_t guest_paddr, uint32_t slot, uint64_t npages,
uint32_t flags);
+void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
@@ -142,6 +142,11 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
}
+static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t)
+{
+ return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM;
+}
+
/* Aligns x up to the next multiple of size. Size must be a power of 2. */
static inline uint64_t align_up(uint64_t x, uint64_t size)
{
@@ -669,6 +669,8 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
close(region->fd);
}
+ if (region->region.guest_memfd >= 0)
+ close(region->region.guest_memfd);
free(region);
}
@@ -870,36 +872,15 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
errno, strerror(errno));
}
-/*
- * VM Userspace Memory Region Add
- *
- * Input Args:
- * vm - Virtual Machine
- * src_type - Storage source for this region.
- * NULL to use anonymous memory.
- * guest_paddr - Starting guest physical address
- * slot - KVM region slot
- * npages - Number of physical pages
- * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
- *
- * Output Args: None
- *
- * Return: None
- *
- * Allocates a memory area of the number of pages specified by npages
- * and maps it to the VM specified by vm, at a starting physical address
- * given by guest_paddr. The region is created with a KVM region slot
- * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
- * region is created with the flags given by flags.
- */
-void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags)
+/* FIXME: This thing needs to be ripped apart and rewritten. */
+void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset)
{
int ret;
struct userspace_mem_region *region;
size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
+ size_t mem_size = npages * vm->page_size;
size_t alignment;
TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
@@ -952,7 +933,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
/* Allocate and initialize new mem region structure. */
region = calloc(1, sizeof(*region));
TEST_ASSERT(region != NULL, "Insufficient Memory");
- region->mmap_size = npages * vm->page_size;
+ region->mmap_size = mem_size;
#ifdef __s390x__
/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
@@ -999,14 +980,47 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
/* As needed perform madvise */
if ((src_type == VM_MEM_SRC_ANONYMOUS ||
src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
- ret = madvise(region->host_mem, npages * vm->page_size,
+ ret = madvise(region->host_mem, mem_size,
src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
- region->host_mem, npages * vm->page_size,
+ region->host_mem, mem_size,
vm_mem_backing_src_alias(src_type)->name);
}
region->backing_src_type = src_type;
+
+ if (flags & KVM_MEM_PRIVATE) {
+ if (guest_memfd < 0) {
+ uint32_t guest_memfd_flags = 0;
+
+ /*
+ * Allow hugepages for the guest memfd backing if the
+ * "normal" backing is allowed/required to be huge.
+ */
+ if (src_type != VM_MEM_SRC_ANONYMOUS &&
+ src_type != VM_MEM_SRC_SHMEM)
+ guest_memfd_flags |= KVM_GUEST_MEMFD_ALLOW_HUGEPAGE;
+
+ TEST_ASSERT(!guest_memfd_offset,
+ "Offset must be zero when creating new guest_memfd");
+ guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
+ } else {
+ /*
+ * Install a unique fd for each memslot so that the fd
+ * can be closed when the region is deleted without
+ * needing to track if the fd is owned by the framework
+ * or by the caller.
+ */
+ guest_memfd = dup(guest_memfd);
+ TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
+ }
+
+ region->region.guest_memfd = guest_memfd;
+ region->region.guest_memfd_offset = guest_memfd_offset;
+ } else {
+ region->region.guest_memfd = -1;
+ }
+
region->unused_phy_pages = sparsebit_alloc();
sparsebit_set_num(region->unused_phy_pages,
guest_paddr >> vm->page_shift, npages);
@@ -1019,9 +1033,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx",
+ " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d\n",
ret, errno, slot, flags,
- guest_paddr, (uint64_t) region->region.memory_size);
+ guest_paddr, (uint64_t) region->region.memory_size,
+ region->region.guest_memfd);
/* Add to quick lookup data structures */
vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
@@ -1042,6 +1057,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
}
}
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot,
+ uint64_t npages, uint32_t flags)
+{
+ vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0);
+}
+
/*
* Memslot to region
*