Message ID | 20231105163040.14904-25-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: guest_memfd() and per-page attributes | expand |
Hi, Regarding the subject (and the commit message), should we still be calling them "private" slots, or guestmem_slots? On Sun, Nov 5, 2023 at 4:34 PM Paolo Bonzini <pbonzini@redhat.com> wrote: > > From: Sean Christopherson <seanjc@google.com> > > Add support for creating "private" memslots via KVM_CREATE_GUEST_MEMFD and > KVM_SET_USER_MEMORY_REGION2. Make vm_userspace_mem_region_add() a wrapper > to its effective replacement, vm_mem_add(), so that private memslots are > fully opt-in, i.e. don't require update all tests that add memory regions. nit: update->updating > > Pivot on the KVM_MEM_PRIVATE flag instead of the validity of the "gmem" KVM_MEM_PRIVATE -> KVM_MEM_GUEST_MEMFD > file descriptor so that simple tests can let vm_mem_add() do the heavy > lifting of creating the guest memfd, but also allow the caller to pass in > an explicit fd+offset so that fancier tests can do things like back > multiple memslots with a single file. If the caller passes in a fd, dup() > the fd so that (a) __vm_mem_region_delete() can close the fd associated > with the memory region without needing yet another flag, and (b) so that > the caller can safely close its copy of the fd without having to first > destroy memslots. > > Co-developed-by: Ackerley Tng <ackerleytng@google.com> > Signed-off-by: Ackerley Tng <ackerleytng@google.com> > Signed-off-by: Sean Christopherson <seanjc@google.com> > Message-Id: <20231027182217.3615211-27-seanjc@google.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > .../selftests/kvm/include/kvm_util_base.h | 23 ++++++ > .../testing/selftests/kvm/include/test_util.h | 5 ++ > tools/testing/selftests/kvm/lib/kvm_util.c | 76 +++++++++++-------- > 3 files changed, 73 insertions(+), 31 deletions(-) > > diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h > index 9f144841c2ee..9f861182c02a 100644 > --- a/tools/testing/selftests/kvm/include/kvm_util_base.h > +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h > @@ -431,6 +431,26 @@ static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) > > void vm_create_irqchip(struct kvm_vm *vm); > > +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, > + uint64_t flags) > +{ > + struct kvm_create_guest_memfd guest_memfd = { > + .size = size, > + .flags = flags, > + }; > + > + return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); > +} > + > +static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, > + uint64_t flags) > +{ > + int fd = __vm_create_guest_memfd(vm, size, flags); > + > + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd)); > + return fd; > +} > + > void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, > uint64_t gpa, uint64_t size, void *hva); > int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, > @@ -439,6 +459,9 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, > enum vm_mem_backing_src_type src_type, > uint64_t guest_paddr, uint32_t slot, uint64_t npages, > uint32_t flags); > +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, > + uint64_t guest_paddr, uint32_t slot, uint64_t npages, > + uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); > > void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); > void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); > diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h > index 7e614adc6cf4..7257f2243ab9 100644 > --- a/tools/testing/selftests/kvm/include/test_util.h > +++ b/tools/testing/selftests/kvm/include/test_util.h > @@ -142,6 +142,11 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t) > return vm_mem_backing_src_alias(t)->flag & MAP_SHARED; > } > > +static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t) > +{ > + return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM; > +} > + > /* Aligns x up to the next multiple of size. Size must be a power of 2. */ > static inline uint64_t align_up(uint64_t x, uint64_t size) > { > diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c > index 3676b37bea38..b63500fca627 100644 > --- a/tools/testing/selftests/kvm/lib/kvm_util.c > +++ b/tools/testing/selftests/kvm/lib/kvm_util.c > @@ -669,6 +669,8 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, > TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); > close(region->fd); > } > + if (region->region.guest_memfd >= 0) > + close(region->region.guest_memfd); > > free(region); > } > @@ -870,36 +872,15 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, > errno, strerror(errno)); > } > > -/* > - * VM Userspace Memory Region Add > - * > - * Input Args: > - * vm - Virtual Machine > - * src_type - Storage source for this region. > - * NULL to use anonymous memory. "VM_MEM_SRC_ANONYMOUS to use anonymous memory" > - * guest_paddr - Starting guest physical address > - * slot - KVM region slot > - * npages - Number of physical pages > - * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) > - * > - * Output Args: None > - * > - * Return: None > - * > - * Allocates a memory area of the number of pages specified by npages > - * and maps it to the VM specified by vm, at a starting physical address > - * given by guest_paddr. The region is created with a KVM region slot > - * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The > - * region is created with the flags given by flags. > - */ > -void vm_userspace_mem_region_add(struct kvm_vm *vm, > - enum vm_mem_backing_src_type src_type, > - uint64_t guest_paddr, uint32_t slot, uint64_t npages, > - uint32_t flags) > +/* FIXME: This thing needs to be ripped apart and rewritten. */ It sure does :) With these nits: Reviewed-by: Fuad Tabba <tabba@google.com> Tested-by: Fuad Tabba <tabba@google.com> Cheers, /fuad > +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, > + uint64_t guest_paddr, uint32_t slot, uint64_t npages, > + uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset) > { > int ret; > struct userspace_mem_region *region; > size_t backing_src_pagesz = get_backing_src_pagesz(src_type); > + size_t mem_size = npages * vm->page_size; > size_t alignment; > > TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, > @@ -952,7 +933,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, > /* Allocate and initialize new mem region structure. */ > region = calloc(1, sizeof(*region)); > TEST_ASSERT(region != NULL, "Insufficient Memory"); > - region->mmap_size = npages * vm->page_size; > + region->mmap_size = mem_size; > > #ifdef __s390x__ > /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ > @@ -999,14 +980,38 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, > /* As needed perform madvise */ > if ((src_type == VM_MEM_SRC_ANONYMOUS || > src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { > - ret = madvise(region->host_mem, npages * vm->page_size, > + ret = madvise(region->host_mem, mem_size, > src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); > TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", > - region->host_mem, npages * vm->page_size, > + region->host_mem, mem_size, > vm_mem_backing_src_alias(src_type)->name); > } > > region->backing_src_type = src_type; > + > + if (flags & KVM_MEM_GUEST_MEMFD) { > + if (guest_memfd < 0) { > + uint32_t guest_memfd_flags = 0; > + TEST_ASSERT(!guest_memfd_offset, > + "Offset must be zero when creating new guest_memfd"); > + guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags); > + } else { > + /* > + * Install a unique fd for each memslot so that the fd > + * can be closed when the region is deleted without > + * needing to track if the fd is owned by the framework > + * or by the caller. > + */ > + guest_memfd = dup(guest_memfd); > + TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd)); > + } > + > + region->region.guest_memfd = guest_memfd; > + region->region.guest_memfd_offset = guest_memfd_offset; > + } else { > + region->region.guest_memfd = -1; > + } > + > region->unused_phy_pages = sparsebit_alloc(); > sparsebit_set_num(region->unused_phy_pages, > guest_paddr >> vm->page_shift, npages); > @@ -1019,9 +1024,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, > TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" > " rc: %i errno: %i\n" > " slot: %u flags: 0x%x\n" > - " guest_phys_addr: 0x%lx size: 0x%lx", > + " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d\n", > ret, errno, slot, flags, > - guest_paddr, (uint64_t) region->region.memory_size); > + guest_paddr, (uint64_t) region->region.memory_size, > + region->region.guest_memfd); > > /* Add to quick lookup data structures */ > vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); > @@ -1042,6 +1048,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, > } > } > > +void vm_userspace_mem_region_add(struct kvm_vm *vm, > + enum vm_mem_backing_src_type src_type, > + uint64_t guest_paddr, uint32_t slot, > + uint64_t npages, uint32_t flags) > +{ > + vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0); > +} > + > /* > * Memslot to region > * > -- > 2.39.1 > >
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 9f144841c2ee..9f861182c02a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -431,6 +431,26 @@ static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) void vm_create_irqchip(struct kvm_vm *vm); +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + struct kvm_create_guest_memfd guest_memfd = { + .size = size, + .flags = flags, + }; + + return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); +} + +static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + int fd = __vm_create_guest_memfd(vm, size, flags); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd)); + return fd; +} + void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, uint64_t gpa, uint64_t size, void *hva); int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, @@ -439,6 +459,9 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, uint64_t guest_paddr, uint32_t slot, uint64_t npages, uint32_t flags); +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 7e614adc6cf4..7257f2243ab9 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -142,6 +142,11 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t) return vm_mem_backing_src_alias(t)->flag & MAP_SHARED; } +static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t) +{ + return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM; +} + /* Aligns x up to the next multiple of size. Size must be a power of 2. */ static inline uint64_t align_up(uint64_t x, uint64_t size) { diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 3676b37bea38..b63500fca627 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -669,6 +669,8 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); close(region->fd); } + if (region->region.guest_memfd >= 0) + close(region->region.guest_memfd); free(region); } @@ -870,36 +872,15 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, errno, strerror(errno)); } -/* - * VM Userspace Memory Region Add - * - * Input Args: - * vm - Virtual Machine - * src_type - Storage source for this region. - * NULL to use anonymous memory. - * guest_paddr - Starting guest physical address - * slot - KVM region slot - * npages - Number of physical pages - * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) - * - * Output Args: None - * - * Return: None - * - * Allocates a memory area of the number of pages specified by npages - * and maps it to the VM specified by vm, at a starting physical address - * given by guest_paddr. The region is created with a KVM region slot - * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The - * region is created with the flags given by flags. - */ -void vm_userspace_mem_region_add(struct kvm_vm *vm, - enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags) +/* FIXME: This thing needs to be ripped apart and rewritten. */ +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset) { int ret; struct userspace_mem_region *region; size_t backing_src_pagesz = get_backing_src_pagesz(src_type); + size_t mem_size = npages * vm->page_size; size_t alignment; TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, @@ -952,7 +933,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* Allocate and initialize new mem region structure. */ region = calloc(1, sizeof(*region)); TEST_ASSERT(region != NULL, "Insufficient Memory"); - region->mmap_size = npages * vm->page_size; + region->mmap_size = mem_size; #ifdef __s390x__ /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ @@ -999,14 +980,38 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* As needed perform madvise */ if ((src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { - ret = madvise(region->host_mem, npages * vm->page_size, + ret = madvise(region->host_mem, mem_size, src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", - region->host_mem, npages * vm->page_size, + region->host_mem, mem_size, vm_mem_backing_src_alias(src_type)->name); } region->backing_src_type = src_type; + + if (flags & KVM_MEM_GUEST_MEMFD) { + if (guest_memfd < 0) { + uint32_t guest_memfd_flags = 0; + TEST_ASSERT(!guest_memfd_offset, + "Offset must be zero when creating new guest_memfd"); + guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags); + } else { + /* + * Install a unique fd for each memslot so that the fd + * can be closed when the region is deleted without + * needing to track if the fd is owned by the framework + * or by the caller. + */ + guest_memfd = dup(guest_memfd); + TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd)); + } + + region->region.guest_memfd = guest_memfd; + region->region.guest_memfd_offset = guest_memfd_offset; + } else { + region->region.guest_memfd = -1; + } + region->unused_phy_pages = sparsebit_alloc(); sparsebit_set_num(region->unused_phy_pages, guest_paddr >> vm->page_shift, npages); @@ -1019,9 +1024,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" - " guest_phys_addr: 0x%lx size: 0x%lx", + " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d\n", ret, errno, slot, flags, - guest_paddr, (uint64_t) region->region.memory_size); + guest_paddr, (uint64_t) region->region.memory_size, + region->region.guest_memfd); /* Add to quick lookup data structures */ vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); @@ -1042,6 +1048,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, } } +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, + uint64_t npages, uint32_t flags) +{ + vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0); +} + /* * Memslot to region *