@@ -544,6 +544,16 @@ static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
void vm_create_irqchip(struct kvm_vm *vm);
+static inline bool backing_src_guest_memfd_flags(enum vm_mem_backing_src_type t)
+{
+ switch (t) {
+ case VM_MEM_SRC_GUEST_MEMFD_NO_DIRECT_MAP:
+ return KVM_GMEM_NO_DIRECT_MAP;
+ default:
+ return 0;
+ }
+}
+
static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
uint64_t flags)
{
@@ -133,6 +133,8 @@ enum vm_mem_backing_src_type {
VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB,
VM_MEM_SRC_SHMEM,
VM_MEM_SRC_SHARED_HUGETLB,
+ VM_MEM_SRC_GUEST_MEMFD,
+ VM_MEM_SRC_GUEST_MEMFD_NO_DIRECT_MAP,
NUM_SRC_TYPES,
};
@@ -164,6 +166,11 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
}
+static inline bool backing_src_is_guest_memfd(enum vm_mem_backing_src_type t)
+{
+ return t == VM_MEM_SRC_GUEST_MEMFD || t == VM_MEM_SRC_GUEST_MEMFD_NO_DIRECT_MAP;
+}
+
static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t)
{
return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM;
@@ -970,6 +970,34 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
alignment = 1;
#endif
+ if (guest_memfd < 0) {
+ if ((flags & KVM_MEM_GUEST_MEMFD) || backing_src_is_guest_memfd(src_type)) {
+ uint32_t guest_memfd_flags = backing_src_guest_memfd_flags(src_type);
+
+ TEST_ASSERT(!guest_memfd_offset,
+ "Offset must be zero when creating new guest_memfd");
+ guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
+ }
+ } else {
+ /*
+ * Install a unique fd for each memslot so that the fd
+ * can be closed when the region is deleted without
+ * needing to track if the fd is owned by the framework
+ * or by the caller.
+ */
+ guest_memfd = dup(guest_memfd);
+ TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
+ }
+
+ if (guest_memfd > 0) {
+ flags |= KVM_MEM_GUEST_MEMFD;
+
+ region->region.guest_memfd = guest_memfd;
+ region->region.guest_memfd_offset = guest_memfd_offset;
+ } else {
+ region->region.guest_memfd = -1;
+ }
+
/*
* When using THP mmap is not guaranteed to returned a hugepage aligned
* address so we have to pad the mmap. Padding is not needed for HugeTLB
@@ -985,10 +1013,13 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
if (alignment > 1)
region->mmap_size += alignment;
- region->fd = -1;
- if (backing_src_is_shared(src_type))
+ if (backing_src_is_guest_memfd(src_type))
+ region->fd = guest_memfd;
+ else if (backing_src_is_guest_memfd(src_type))
region->fd = kvm_memfd_alloc(region->mmap_size,
src_type == VM_MEM_SRC_SHARED_HUGETLB);
+ else
+ region->fd = -1;
region->mmap_start = mmap(NULL, region->mmap_size,
PROT_READ | PROT_WRITE,
@@ -1016,34 +1047,6 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
}
region->backing_src_type = src_type;
-
- if (guest_memfd < 0) {
- if (flags & KVM_MEM_GUEST_MEMFD) {
- uint32_t guest_memfd_flags = 0;
- TEST_ASSERT(!guest_memfd_offset,
- "Offset must be zero when creating new guest_memfd");
- guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
- }
- } else {
- /*
- * Install a unique fd for each memslot so that the fd
- * can be closed when the region is deleted without
- * needing to track if the fd is owned by the framework
- * or by the caller.
- */
- guest_memfd = dup(guest_memfd);
- TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
- }
-
- if (guest_memfd > 0) {
- flags |= KVM_MEM_GUEST_MEMFD;
-
- region->region.guest_memfd = guest_memfd;
- region->region.guest_memfd_offset = guest_memfd_offset;
- } else {
- region->region.guest_memfd = -1;
- }
-
region->unused_phy_pages = sparsebit_alloc();
if (vm_arch_has_protected_memory(vm))
region->protected_phy_pages = sparsebit_alloc();
@@ -1063,6 +1066,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
guest_paddr, (uint64_t) region->region.memory_size,
region->region.guest_memfd);
+ if (region->region.guest_memfd != -1 && kvm_has_cap(KVM_CAP_MEMORY_ATTRIBUTES))
+ vm_set_memory_attributes(vm, region->region.guest_phys_addr,
+ region->region.memory_size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
+
/* Add to quick lookup data structures */
vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
@@ -279,6 +279,14 @@ const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
*/
.flag = MAP_SHARED,
},
+ [VM_MEM_SRC_GUEST_MEMFD] = {
+ .name = "guest_memfd",
+ .flag = MAP_SHARED,
+ },
+ [VM_MEM_SRC_GUEST_MEMFD_NO_DIRECT_MAP] = {
+ .name = "guest_memfd_no_direct_map",
+ .flag = MAP_SHARED,
+ }
};
_Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES,
"Missing new backing src types?");
Allow selftests to configure their memslots such that userspace_addr is set to a MAP_SHARED mapping of the guest_memfd that's associated with the memslot. This setup is the configuration for non-CoCo VMs, where all guest memory is backed by a guest_memfd whose folios are all marked shared, but KVM is still able to access guest memory to provide functionality such as MMIO emulation on x86. Add backing types for normal guest_memfd, as well as direct map removed guest_memfd. If KVM_CAP_MEMORY_ATTRIBUTES is available, explicitly set gmem-enabled memslots to private, as otherwise guest page faults will be resolved by GUP-ing the guest_memfd VMA (instead of using the special VMA-less guest_memfd fault code in the KVM MMU), but this is not always supported (e.g. if direct map entries are not available). Signed-off-by: Patrick Roy <roypat@amazon.co.uk> --- .../testing/selftests/kvm/include/kvm_util.h | 10 +++ .../testing/selftests/kvm/include/test_util.h | 7 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 67 ++++++++++--------- tools/testing/selftests/kvm/lib/test_util.c | 8 +++ 4 files changed, 62 insertions(+), 30 deletions(-)