Message ID | 20220624213257.1504783-10-ricarkol@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: selftests: Add aarch64/page_fault_test | expand |
Hi Ricardo, On Fri, Jun 24, 2022 at 02:32:53PM -0700, Ricardo Koller wrote: > Add a new test for stage 2 faults when using different combinations of > guest accesses (e.g., write, S1PTW), backing source type (e.g., anon) > and types of faults (e.g., read on hugetlbfs with a hole). The next > commits will add different handling methods and more faults (e.g., uffd > and dirty logging). This first commit starts by adding two sanity checks > for all types of accesses: AF setting by the hw, and accessing memslots > with holes. > > Signed-off-by: Ricardo Koller <ricarkol@google.com> > --- > tools/testing/selftests/kvm/Makefile | 1 + > .../selftests/kvm/aarch64/page_fault_test.c | 695 ++++++++++++++++++ > .../selftests/kvm/include/aarch64/processor.h | 6 + > 3 files changed, 702 insertions(+) > create mode 100644 tools/testing/selftests/kvm/aarch64/page_fault_test.c > > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile > index e4497a3a27d4..13b913225ae7 100644 > --- a/tools/testing/selftests/kvm/Makefile > +++ b/tools/testing/selftests/kvm/Makefile > @@ -139,6 +139,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer > TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions > TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list > TEST_GEN_PROGS_aarch64 += aarch64/hypercalls > +TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test > TEST_GEN_PROGS_aarch64 += aarch64/psci_test > TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config > TEST_GEN_PROGS_aarch64 += aarch64/vgic_init > diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c > new file mode 100644 > index 000000000000..bdda4e3fcdaa > --- /dev/null > +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c [...] > +/* Compare and swap instruction. */ > +static void guest_cas(void) > +{ > + uint64_t val; > + > + GUEST_ASSERT_EQ(guest_check_lse(), 1); Why not just GUEST_ASSERT(guest_check_lse()) ? > + asm volatile(".arch_extension lse\n" > + "casal %0, %1, [%2]\n" > + :: "r" (0), "r" (TEST_DATA), "r" (guest_test_memory)); > + val = READ_ONCE(*guest_test_memory); > + GUEST_ASSERT_EQ(val, TEST_DATA); > +} > + > +static void guest_read64(void) > +{ > + uint64_t val; > + > + val = READ_ONCE(*guest_test_memory); > + GUEST_ASSERT_EQ(val, 0); > +} > + > +/* Address translation instruction */ > +static void guest_at(void) > +{ > + uint64_t par; > + uint64_t paddr; > + > + asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); > + par = read_sysreg(par_el1); I believe you need explicit synchronization (an isb) before the fault information is guaranteed visibile in PAR_EL1. > + /* Bit 1 indicates whether the AT was successful */ > + GUEST_ASSERT_EQ(par & 1, 0); > + /* The PA in bits [51:12] */ > + paddr = par & (((1ULL << 40) - 1) << 12); > + GUEST_ASSERT_EQ(paddr, memslot[TEST].gpa); > +} > + > +/* > + * The size of the block written by "dc zva" is guaranteed to be between (2 << > + * 0) and (2 << 9), which is safe in our case as we need the write to happen > + * for at least a word, and not more than a page. > + */ > +static void guest_dc_zva(void) > +{ > + uint16_t val; > + > + asm volatile("dc zva, %0\n" > + "dsb ish\n" nit: use the dsb() macro instead. Extremely minor, but makes it a bit more obvious to the reader. Or maybe I need to get my eyes checked ;-) > + :: "r" (guest_test_memory)); > + val = READ_ONCE(*guest_test_memory); > + GUEST_ASSERT_EQ(val, 0); > +} > + > +/* > + * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0). > + * And that's special because KVM must take special care with those: they > + * should still count as accesses for dirty logging or user-faulting, but > + * should be handled differently on mmio. > + */ > +static void guest_ld_preidx(void) > +{ > + uint64_t val; > + uint64_t addr = TEST_GVA - 8; > + > + /* > + * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is > + * in a gap between memslots not backing by anything. > + */ > + asm volatile("ldr %0, [%1, #8]!" > + : "=r" (val), "+r" (addr)); > + GUEST_ASSERT_EQ(val, 0); > + GUEST_ASSERT_EQ(addr, TEST_GVA); > +} > + > +static void guest_st_preidx(void) > +{ > + uint64_t val = TEST_DATA; > + uint64_t addr = TEST_GVA - 8; > + > + asm volatile("str %0, [%1, #8]!" > + : "+r" (val), "+r" (addr)); > + > + GUEST_ASSERT_EQ(addr, TEST_GVA); > + val = READ_ONCE(*guest_test_memory); > +} > + > +static bool guest_set_ha(void) > +{ > + uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1); > + uint64_t hadbs, tcr; > + > + /* Skip if HA is not supported. */ > + hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1); > + if (hadbs == 0) > + return false; > + > + tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; > + write_sysreg(tcr, tcr_el1); > + isb(); > + > + return true; > +} > + > +static bool guest_clear_pte_af(void) > +{ > + *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF; > + flush_tlb_page(TEST_PTE_GVA); Don't you want to actually flush TEST_GVA to force the TLB fill when you poke the address again? This looks like you're flushing the VA of the *PTE* not the test address. > + return true; > +} > + > +static void guest_check_pte_af(void) nit: call this guest_test_pte_af(). You use the guest_check_* pattern for test preconditions (like guest_check_lse()). > +{ > + flush_tlb_page(TEST_PTE_GVA); What is the purpose of this flush? I believe you are actually depending on a dsb(ish) between the hardware PTE update and the load below. Or, that's at least what I gleaned from the jargon of DDI0487H.a D5.4.13 'Ordering of hardware updates to the translation tables'. > + GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); > +} [...] > +static void sync_stats_from_guest(struct kvm_vm *vm) > +{ > + struct event_cnt *ec = addr_gva2hva(vm, (uint64_t)&events); > + > + events.aborts += ec->aborts; > +} I believe you can use sync_global_from_guest() instead of this. > +void fail_vcpu_run_no_handler(int ret) > +{ > + TEST_FAIL("Unexpected vcpu run failure\n"); > +} > + > +extern unsigned char __exec_test; > + > +void noinline __return_0x77(void) > +{ > + asm volatile("__exec_test: mov x0, #0x77\n" > + "ret\n"); > +} > + > +static void load_exec_code_for_test(void) > +{ > + uint64_t *code, *c; > + > + assert(TEST_EXEC_GVA - TEST_GVA); > + code = memslot[TEST].hva + 8; > + > + /* > + * We need the cast to be separate in order for the compiler to not > + * complain with: "‘memcpy’ forming offset [1, 7] is out of the bounds > + * [0, 1] of object ‘__exec_test’ with type ‘unsigned char’" > + */ > + c = (uint64_t *)&__exec_test; > + memcpy(code, c, 8); Don't you need to sync D$ and I$? -- Thanks, Oliver
Hey Oliver, On Tue, Jun 28, 2022 at 04:43:29PM -0700, Oliver Upton wrote: > Hi Ricardo, > > On Fri, Jun 24, 2022 at 02:32:53PM -0700, Ricardo Koller wrote: > > Add a new test for stage 2 faults when using different combinations of > > guest accesses (e.g., write, S1PTW), backing source type (e.g., anon) > > and types of faults (e.g., read on hugetlbfs with a hole). The next > > commits will add different handling methods and more faults (e.g., uffd > > and dirty logging). This first commit starts by adding two sanity checks > > for all types of accesses: AF setting by the hw, and accessing memslots > > with holes. > > > > Signed-off-by: Ricardo Koller <ricarkol@google.com> > > --- > > tools/testing/selftests/kvm/Makefile | 1 + > > .../selftests/kvm/aarch64/page_fault_test.c | 695 ++++++++++++++++++ > > .../selftests/kvm/include/aarch64/processor.h | 6 + > > 3 files changed, 702 insertions(+) > > create mode 100644 tools/testing/selftests/kvm/aarch64/page_fault_test.c > > > > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile > > index e4497a3a27d4..13b913225ae7 100644 > > --- a/tools/testing/selftests/kvm/Makefile > > +++ b/tools/testing/selftests/kvm/Makefile > > @@ -139,6 +139,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer > > TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions > > TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list > > TEST_GEN_PROGS_aarch64 += aarch64/hypercalls > > +TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test > > TEST_GEN_PROGS_aarch64 += aarch64/psci_test > > TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config > > TEST_GEN_PROGS_aarch64 += aarch64/vgic_init > > diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c > > new file mode 100644 > > index 000000000000..bdda4e3fcdaa > > --- /dev/null > > +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c > > [...] > > > +/* Compare and swap instruction. */ > > +static void guest_cas(void) > > +{ > > + uint64_t val; > > + > > + GUEST_ASSERT_EQ(guest_check_lse(), 1); > > Why not just GUEST_ASSERT(guest_check_lse()) ? > > > + asm volatile(".arch_extension lse\n" > > + "casal %0, %1, [%2]\n" > > + :: "r" (0), "r" (TEST_DATA), "r" (guest_test_memory)); > > + val = READ_ONCE(*guest_test_memory); > > + GUEST_ASSERT_EQ(val, TEST_DATA); > > +} > > + > > +static void guest_read64(void) > > +{ > > + uint64_t val; > > + > > + val = READ_ONCE(*guest_test_memory); > > + GUEST_ASSERT_EQ(val, 0); > > +} > > + > > +/* Address translation instruction */ > > +static void guest_at(void) > > +{ > > + uint64_t par; > > + uint64_t paddr; > > + > > + asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); > > + par = read_sysreg(par_el1); > > I believe you need explicit synchronization (an isb) before the fault > information is guaranteed visibile in PAR_EL1. > > > + /* Bit 1 indicates whether the AT was successful */ > > + GUEST_ASSERT_EQ(par & 1, 0); > > + /* The PA in bits [51:12] */ > > + paddr = par & (((1ULL << 40) - 1) << 12); > > + GUEST_ASSERT_EQ(paddr, memslot[TEST].gpa); > > +} > > + > > +/* > > + * The size of the block written by "dc zva" is guaranteed to be between (2 << > > + * 0) and (2 << 9), which is safe in our case as we need the write to happen > > + * for at least a word, and not more than a page. > > + */ > > +static void guest_dc_zva(void) > > +{ > > + uint16_t val; > > + > > + asm volatile("dc zva, %0\n" > > + "dsb ish\n" > > nit: use the dsb() macro instead. Extremely minor, but makes it a bit > more obvious to the reader. Or maybe I need to get my eyes checked ;-) > > > + :: "r" (guest_test_memory)); > > + val = READ_ONCE(*guest_test_memory); > > + GUEST_ASSERT_EQ(val, 0); > > +} > > + > > +/* > > + * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0). > > + * And that's special because KVM must take special care with those: they > > + * should still count as accesses for dirty logging or user-faulting, but > > + * should be handled differently on mmio. > > + */ > > +static void guest_ld_preidx(void) > > +{ > > + uint64_t val; > > + uint64_t addr = TEST_GVA - 8; > > + > > + /* > > + * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is > > + * in a gap between memslots not backing by anything. > > + */ > > + asm volatile("ldr %0, [%1, #8]!" > > + : "=r" (val), "+r" (addr)); > > + GUEST_ASSERT_EQ(val, 0); > > + GUEST_ASSERT_EQ(addr, TEST_GVA); > > +} > > + > > +static void guest_st_preidx(void) > > +{ > > + uint64_t val = TEST_DATA; > > + uint64_t addr = TEST_GVA - 8; > > + > > + asm volatile("str %0, [%1, #8]!" > > + : "+r" (val), "+r" (addr)); > > + > > + GUEST_ASSERT_EQ(addr, TEST_GVA); > > + val = READ_ONCE(*guest_test_memory); > > +} > > + > > +static bool guest_set_ha(void) > > +{ > > + uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1); > > + uint64_t hadbs, tcr; > > + > > + /* Skip if HA is not supported. */ > > + hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1); > > + if (hadbs == 0) > > + return false; > > + > > + tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; > > + write_sysreg(tcr, tcr_el1); > > + isb(); > > + > > + return true; > > +} > > + > > +static bool guest_clear_pte_af(void) > > +{ > > + *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF; > > + flush_tlb_page(TEST_PTE_GVA); > > Don't you want to actually flush TEST_GVA to force the TLB fill when you > poke the address again? This looks like you're flushing the VA of the > *PTE* not the test address. Yes, you are right, this was supposed to be: flush_tlb_page(TEST_GVA); (I could swear this was TEST_GVA at one time) > > > + return true; > > +} > > + > > +static void guest_check_pte_af(void) > > nit: call this guest_test_pte_af(). You use the guest_check_* pattern > for test preconditions (like guest_check_lse()). > > > +{ > > + flush_tlb_page(TEST_PTE_GVA); > > What is the purpose of this flush? I believe you are actually depending > on a dsb(ish) between the hardware PTE update and the load below. Or, > that's at least what I gleaned from the jargon of DDI0487H.a D5.4.13 > 'Ordering of hardware updates to the translation tables'. This was also supposed to be: flush_tlb_page(TEST_GVA) But will removed based on D5.4.13, as it's indeed saying that the DSB should be enough. > > > + GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); > > +} > > [...] > > > +static void sync_stats_from_guest(struct kvm_vm *vm) > > +{ > > + struct event_cnt *ec = addr_gva2hva(vm, (uint64_t)&events); > > + > > + events.aborts += ec->aborts; > > +} > > I believe you can use sync_global_from_guest() instead of this. > > > +void fail_vcpu_run_no_handler(int ret) > > +{ > > + TEST_FAIL("Unexpected vcpu run failure\n"); > > +} > > + > > +extern unsigned char __exec_test; > > + > > +void noinline __return_0x77(void) > > +{ > > + asm volatile("__exec_test: mov x0, #0x77\n" > > + "ret\n"); > > +} > > + > > +static void load_exec_code_for_test(void) > > +{ > > + uint64_t *code, *c; > > + > > + assert(TEST_EXEC_GVA - TEST_GVA); > > + code = memslot[TEST].hva + 8; > > + > > + /* > > + * We need the cast to be separate in order for the compiler to not > > + * complain with: "‘memcpy’ forming offset [1, 7] is out of the bounds > > + * [0, 1] of object ‘__exec_test’ with type ‘unsigned char’" > > + */ > > + c = (uint64_t *)&__exec_test; > > + memcpy(code, c, 8); > > Don't you need to sync D$ and I$? This is done before running the VM for the first time, and it's only ever written this one time. I think KVM itself is doing the sync when mapping new pages for the first time, which would be this case. > > -- > Thanks, > Oliver ACK on all the other points, will fix accordingly. Thanks for the review!
On Fri, Jun 24, 2022, Ricardo Koller wrote: > Add a new test for stage 2 faults when using different combinations of > guest accesses (e.g., write, S1PTW), backing source type (e.g., anon) > and types of faults (e.g., read on hugetlbfs with a hole). The next > commits will add different handling methods and more faults (e.g., uffd > and dirty logging). This first commit starts by adding two sanity checks > for all types of accesses: AF setting by the hw, and accessing memslots > with holes. > > Signed-off-by: Ricardo Koller <ricarkol@google.com> > --- ... > +/* > + * Create a memslot for test data (memslot[TEST]) and another one for PT > + * tables (memslot[PT]). This diagram show the resulting guest virtual and > + * physical address space when using 4K backing pages for the memslots, and > + * 4K guest pages. > + * > + * Guest physical Guest virtual > + * > + * | | | | > + * | | | | > + * +--------------+ +-------------+ > + * max_gfn - 0x1000 | TEST memslot |<---------+ test data | 0xc0000000 > + * +--------------+ +-------------+ > + * max_gfn - 0x2000 | gap |<---------+ gap | 0xbffff000 > + * +--------------+ +-------------+ > + * | | | | > + * | | | | > + * | PT memslot | | | > + * | | +-------------+ > + * max_gfn - 0x6000 | |<----+ | | > + * +--------------+ | | | > + * | | | | PTE for the | > + * | | | | test data | > + * | | +----+ page | 0xb0000000 > + * | | +-------------+ > + * | | | | > + * | | | | > + * > + * Using different guest page sizes or backing pages will result in the > + * same layout but at different addresses. In particular, the memslot > + * sizes need to be multiple of the backing page sizes (e.g., 2MB). > + */ > +static void setup_memslots(struct kvm_vm *vm, enum vm_guest_mode mode, > + struct test_params *p) > +{ > + uint64_t backing_page_size = get_backing_src_pagesz(p->src_type); > + uint64_t guest_page_size = vm_guest_mode_params[mode].page_size; > + struct test_desc *test = p->test_desc; > + uint64_t gap_gpa; > + uint64_t alignment; > + int i; > + > + memslot[TEST].size = align_up(guest_page_size, backing_page_size); > + /* > + * We need one guest page for the PT table containing the PTE (for > + * TEST_GVA), but might need more in case the higher level PT tables > + * were not allocated yet. > + */ > + memslot[PT].size = align_up(4 * guest_page_size, backing_page_size); > + > + for (i = 0; i < NR_MEMSLOTS; i++) { > + memslot[i].guest_pages = memslot[i].size / guest_page_size; > + memslot[i].src_type = p->src_type; > + } > + > + /* Place the memslots GPAs at the end of physical memory */ > + alignment = max(backing_page_size, guest_page_size); > + memslot[TEST].gpa = (vm->max_gfn - memslot[TEST].guest_pages) * > + guest_page_size; > + memslot[TEST].gpa = align_down(memslot[TEST].gpa, alignment); > + > + /* Add a 1-guest_page gap between the two memslots */ > + gap_gpa = memslot[TEST].gpa - guest_page_size; > + /* Map the gap so it's still adressable from the guest. */ > + virt_pg_map(vm, TEST_GVA - guest_page_size, gap_gpa); > + > + memslot[PT].gpa = gap_gpa - memslot[PT].size; > + memslot[PT].gpa = align_down(memslot[PT].gpa, alignment); > + > + vm_userspace_mem_region_add(vm, p->src_type, memslot[PT].gpa, > + memslot[PT].idx, memslot[PT].guest_pages, > + test->pt_memslot_flags); > + vm_userspace_mem_region_add(vm, p->src_type, memslot[TEST].gpa, > + memslot[TEST].idx, memslot[TEST].guest_pages, > + test->test_memslot_flags); > + > + for (i = 0; i < NR_MEMSLOTS; i++) > + memslot[i].hva = addr_gpa2hva(vm, memslot[i].gpa); > + > + /* Map the test TEST_GVA using the PT memslot. */ > + _virt_pg_map(vm, TEST_GVA, memslot[TEST].gpa, MT_NORMAL, > + TEST_PT_SLOT_INDEX); Use memslot[TEST].idx instead of TEST_PT_SLOT_INDEX to be consistent, though my preference would be to avoid this API. IIUC, the goal is to test different backing stores for the memory the guest uses for it's page tables. But do we care about testing that a single guest's page tables are backed with different types concurrently? If we don't care, and maybe even if we do, then my preference would be to enhance the __vm_create family of helpers to allow for specifying what backing type should be used for page tables, i.e. associate the info the VM instead of passing it around the stack. One idea would be to do something like David Matlack suggested a while back and replace extra_mem_pages with a struct, e.g. struct kvm_vm_mem_params That struct can then provide the necessary knobs to control how memory is allocated. And then the lib can provide a global struct kvm_vm_mem_params kvm_default_vm_mem_params; or whatever (probably a shorter name) for the tests that don't care. And then down the road, if someone wants to control the backing type for code vs. data, we could and those flags to kvm_vm_mem_params and add vm_vaddr_alloc() wrappers to alloc code vs. data (with a default to data?). I don't like the param approach because it bleeds implementation details that really shouldn't matter, e.g. which memslot is the default, into tests. And it's not very easy to use, e.g. if a different test wants to do something similar, it would have to create its own memslot, populate the tables, etc...
On Thu, Jul 21, 2022 at 01:29:34AM +0000, Sean Christopherson wrote: > On Fri, Jun 24, 2022, Ricardo Koller wrote: > > Add a new test for stage 2 faults when using different combinations of > > guest accesses (e.g., write, S1PTW), backing source type (e.g., anon) > > and types of faults (e.g., read on hugetlbfs with a hole). The next > > commits will add different handling methods and more faults (e.g., uffd > > and dirty logging). This first commit starts by adding two sanity checks > > for all types of accesses: AF setting by the hw, and accessing memslots > > with holes. > > > > Signed-off-by: Ricardo Koller <ricarkol@google.com> > > --- > > ... > > > +/* > > + * Create a memslot for test data (memslot[TEST]) and another one for PT > > + * tables (memslot[PT]). This diagram show the resulting guest virtual and > > + * physical address space when using 4K backing pages for the memslots, and > > + * 4K guest pages. > > + * > > + * Guest physical Guest virtual > > + * > > + * | | | | > > + * | | | | > > + * +--------------+ +-------------+ > > + * max_gfn - 0x1000 | TEST memslot |<---------+ test data | 0xc0000000 > > + * +--------------+ +-------------+ > > + * max_gfn - 0x2000 | gap |<---------+ gap | 0xbffff000 > > + * +--------------+ +-------------+ > > + * | | | | > > + * | | | | > > + * | PT memslot | | | > > + * | | +-------------+ > > + * max_gfn - 0x6000 | |<----+ | | > > + * +--------------+ | | | > > + * | | | | PTE for the | > > + * | | | | test data | > > + * | | +----+ page | 0xb0000000 > > + * | | +-------------+ > > + * | | | | > > + * | | | | > > + * > > + * Using different guest page sizes or backing pages will result in the > > + * same layout but at different addresses. In particular, the memslot > > + * sizes need to be multiple of the backing page sizes (e.g., 2MB). > > + */ > > +static void setup_memslots(struct kvm_vm *vm, enum vm_guest_mode mode, > > + struct test_params *p) > > +{ > > + uint64_t backing_page_size = get_backing_src_pagesz(p->src_type); > > + uint64_t guest_page_size = vm_guest_mode_params[mode].page_size; > > + struct test_desc *test = p->test_desc; > > + uint64_t gap_gpa; > > + uint64_t alignment; > > + int i; > > + > > + memslot[TEST].size = align_up(guest_page_size, backing_page_size); > > + /* > > + * We need one guest page for the PT table containing the PTE (for > > + * TEST_GVA), but might need more in case the higher level PT tables > > + * were not allocated yet. > > + */ > > + memslot[PT].size = align_up(4 * guest_page_size, backing_page_size); > > + > > + for (i = 0; i < NR_MEMSLOTS; i++) { > > + memslot[i].guest_pages = memslot[i].size / guest_page_size; > > + memslot[i].src_type = p->src_type; > > + } > > + > > + /* Place the memslots GPAs at the end of physical memory */ > > + alignment = max(backing_page_size, guest_page_size); > > + memslot[TEST].gpa = (vm->max_gfn - memslot[TEST].guest_pages) * > > + guest_page_size; > > + memslot[TEST].gpa = align_down(memslot[TEST].gpa, alignment); > > + > > + /* Add a 1-guest_page gap between the two memslots */ > > + gap_gpa = memslot[TEST].gpa - guest_page_size; > > + /* Map the gap so it's still adressable from the guest. */ > > + virt_pg_map(vm, TEST_GVA - guest_page_size, gap_gpa); > > + > > + memslot[PT].gpa = gap_gpa - memslot[PT].size; > > + memslot[PT].gpa = align_down(memslot[PT].gpa, alignment); > > + > > + vm_userspace_mem_region_add(vm, p->src_type, memslot[PT].gpa, > > + memslot[PT].idx, memslot[PT].guest_pages, > > + test->pt_memslot_flags); > > + vm_userspace_mem_region_add(vm, p->src_type, memslot[TEST].gpa, > > + memslot[TEST].idx, memslot[TEST].guest_pages, > > + test->test_memslot_flags); > > + > > + for (i = 0; i < NR_MEMSLOTS; i++) > > + memslot[i].hva = addr_gpa2hva(vm, memslot[i].gpa); > > + > > + /* Map the test TEST_GVA using the PT memslot. */ > > + _virt_pg_map(vm, TEST_GVA, memslot[TEST].gpa, MT_NORMAL, > > + TEST_PT_SLOT_INDEX); > > Use memslot[TEST].idx instead of TEST_PT_SLOT_INDEX to be consistent, though my > preference would be to avoid this API. > > IIUC, the goal is to test different backing stores for the memory the guest uses > for it's page tables. But do we care about testing that a single guest's page > tables are backed with different types concurrently? This test creates a new VM for each subtest, so an API like that would definitely make this code simpler/nicer. > If we don't care, and maybe > even if we do, then my preference would be to enhance the __vm_create family of > helpers to allow for specifying what backing type should be used for page tables, > i.e. associate the info the VM instead of passing it around the stack. > > One idea would be to do something like David Matlack suggested a while back and > replace extra_mem_pages with a struct, e.g. struct kvm_vm_mem_params That struct > can then provide the necessary knobs to control how memory is allocated. And then > the lib can provide a global > > struct kvm_vm_mem_params kvm_default_vm_mem_params; > I like this idea, passing the info at vm creation. What about dividing the changes in two. 1. Will add the struct to "__vm_create()" as part of this series, and then use it in this commit. There's only one user dirty_log_test.c: vm = __vm_create(mode, 1, extra_mem_pages); so that would avoid having to touch every test as part of this patchset. 2. I can then send another series to add support for all the other vm_create() functions. Alternatively, I can send a new series that does 1 and 2 afterwards. WDYT? > or whatever (probably a shorter name) for the tests that don't care. And then > down the road, if someone wants to control the backing type for code vs. data, > we could and those flags to kvm_vm_mem_params and add vm_vaddr_alloc() wrappers > to alloc code vs. data (with a default to data?). > > I don't like the param approach because it bleeds implementation details that > really shouldn't matter, e.g. which memslot is the default, into tests. And it's > not very easy to use, e.g. if a different test wants to do something similar, > it would have to create its own memslot, populate the tables, etc...
On Fri, Jul 22, 2022, Ricardo Koller wrote: > On Thu, Jul 21, 2022 at 01:29:34AM +0000, Sean Christopherson wrote: > > If we don't care, and maybe even if we do, then my preference would be to > > enhance the __vm_create family of helpers to allow for specifying what > > backing type should be used for page tables, i.e. associate the info the VM > > instead of passing it around the stack. > > > > One idea would be to do something like David Matlack suggested a while back > > and replace extra_mem_pages with a struct, e.g. struct kvm_vm_mem_params > > That struct can then provide the necessary knobs to control how memory is > > allocated. And then the lib can provide a global > > > > struct kvm_vm_mem_params kvm_default_vm_mem_params; > > > > I like this idea, passing the info at vm creation. > > What about dividing the changes in two. > > 1. Will add the struct to "__vm_create()" as part of this > series, and then use it in this commit. There's only one user > > dirty_log_test.c: vm = __vm_create(mode, 1, extra_mem_pages); > > so that would avoid having to touch every test as part of this patchset. > > 2. I can then send another series to add support for all the other > vm_create() functions. > > Alternatively, I can send a new series that does 1 and 2 afterwards. > WDYT? Don't do #2, ever. :-) The intent of having vm_create() versus is __vm_create() is so that tests that don't care about things like backing pages don't have to pass in extra params. I very much want to keep that behavior, i.e. I don't want to extend vm_create() at all. IMO, adding _anything_ is a slippery slope, e.g. why are the backing types special enough to get a param, but thing XYZ isn't? Thinking more, the struct idea probably isn't going to work all that well. It again puts the selftests into a state where it becomes difficult to control one setting and ignore the rest, e.g. the dirty_log_test and anything else with extra pages suddenly has to care about the backing type for page tables and code. Rather than adding a struct, what about extending the @mode param? We already have vm_mem_backing_src_type, we just need a way to splice things together. There are a total of four things we can control: primary mode, and then code, data, and page tables backing types. So, turn @mode into a uint32_t and carve out 8 bits for each of those four "modes". The defaults Just Work because VM_MEM_SRC_ANONYMOUS==0. Lightly tested, but the below should provide the necessary base infrastructure, then you just need to have ____vm_create() consume the secondary "modes". --- From: Sean Christopherson <seanjc@google.com> Date: Fri, 22 Jul 2022 10:56:08 -0700 Subject: [PATCH] KVM: selftests: Extend VM creation's @mode to allow control of backing types Carve out space in the @mode passed to the various VM creation helpers to allow using the mode to control the backing type for code, data, and page table allocations made by the selftests framework. E.g. to allow tests to force guest page tables to be backed with huge pages. No functional change intended. Signed-off-by: Sean Christopherson <seanjc@google.com> --- .../selftests/kvm/include/kvm_util_base.h | 78 ++++++++++++------- tools/testing/selftests/kvm/lib/guest_modes.c | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 35 +++++---- 3 files changed, 69 insertions(+), 46 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 24fde97f6121..992dcc7b39e7 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -29,6 +29,45 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ + +enum vm_guest_mode { + VM_MODE_P52V48_4K, + VM_MODE_P52V48_64K, + VM_MODE_P48V48_4K, + VM_MODE_P48V48_16K, + VM_MODE_P48V48_64K, + VM_MODE_P40V48_4K, + VM_MODE_P40V48_16K, + VM_MODE_P40V48_64K, + VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ + VM_MODE_P47V64_4K, + VM_MODE_P44V64_4K, + VM_MODE_P36V48_4K, + VM_MODE_P36V48_16K, + VM_MODE_P36V48_64K, + VM_MODE_P36V47_16K, + NUM_VM_MODES, +}; + +/* + * There are four flavors of "modes" that tests can control. The primary mode + * defines the physical and virtual address widths, and page sizes configured + * in hardware. The code/data/page_table modifiers control the backing types + * for code, data, and page tables that are allocated by the infrastructure, + * e.g. to allow tests to force page tables to be back by huge pages. + * + * Valid values for the primary mask are "enum vm_guest_mode", and valid values + * for code, data, and page tables are "enum vm_mem_backing_src_type". + */ +#define VM_MODE_PRIMARY_MASK GENMASK(7, 0) +#define VM_MODE_CODE_MASK GENMASK(15, 8) +#define VM_MODE_DATA_MASK GENMASK(23, 16) +#define VM_MODE_PAGE_TABLE_MASK GENMASK(31, 24) + +/* 8 bits in each mask above, i.e. 255 possible values */ +_Static_assert(NUM_VM_MODES < 256); +_Static_assert(NUM_SRC_TYPES < 256); + struct userspace_mem_region { struct kvm_userspace_memory_region region; struct sparsebit *unused_phy_pages; @@ -65,7 +104,7 @@ struct userspace_mem_regions { }; struct kvm_vm { - int mode; + enum vm_guest_mode mode; unsigned long type; int kvm_fd; int fd; @@ -111,28 +150,9 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot); #define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 #define DEFAULT_STACK_PGS 5 -enum vm_guest_mode { - VM_MODE_P52V48_4K, - VM_MODE_P52V48_64K, - VM_MODE_P48V48_4K, - VM_MODE_P48V48_16K, - VM_MODE_P48V48_64K, - VM_MODE_P40V48_4K, - VM_MODE_P40V48_16K, - VM_MODE_P40V48_64K, - VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ - VM_MODE_P47V64_4K, - VM_MODE_P44V64_4K, - VM_MODE_P36V48_4K, - VM_MODE_P36V48_16K, - VM_MODE_P36V48_64K, - VM_MODE_P36V47_16K, - NUM_VM_MODES, -}; - #if defined(__aarch64__) -extern enum vm_guest_mode vm_mode_default; +extern uint32_t vm_mode_default; #define VM_MODE_DEFAULT vm_mode_default #define MIN_PAGE_SHIFT 12U @@ -642,8 +662,8 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to * calculate the amount of memory needed for per-vCPU data, e.g. stacks. */ -struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages); -struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, +struct kvm_vm *____vm_create(uint32_t mode, uint64_t nr_pages); +struct kvm_vm *__vm_create(uint32_t mode, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages); static inline struct kvm_vm *vm_create_barebones(void) @@ -656,7 +676,7 @@ static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) return __vm_create(VM_MODE_DEFAULT, nr_runnable_vcpus, 0); } -struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, +struct kvm_vm *__vm_create_with_vcpus(uint32_t mode, uint32_t nr_vcpus, uint64_t extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]); @@ -685,11 +705,11 @@ static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); unsigned long vm_compute_max_gfn(struct kvm_vm *vm); -unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); -unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); -unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); -static inline unsigned int -vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) +unsigned int vm_calc_num_guest_pages(uint32_t mode, size_t size); +unsigned int vm_num_host_pages(uint32_t mode, unsigned int num_guest_pages); +unsigned int vm_num_guest_pages(uint32_t mode, unsigned int num_host_pages); +static inline unsigned int vm_adjust_num_guest_pages(uint32_t mode, + unsigned int num_guest_pages) { unsigned int n; n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index 99a575bbbc52..93c6ca9ebb49 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -6,7 +6,7 @@ #ifdef __aarch64__ #include "processor.h" -enum vm_guest_mode vm_mode_default; +uint32_t vm_mode_default; #endif struct guest_mode guest_modes[NUM_VM_MODES]; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 9889fe0d8919..c2f3c49643b1 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -143,13 +143,10 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); -struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) +struct kvm_vm *____vm_create(uint32_t mode, uint64_t nr_pages) { struct kvm_vm *vm; - pr_debug("%s: mode='%s' pages='%ld'\n", __func__, - vm_guest_mode_string(mode), nr_pages); - vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficient Memory"); @@ -158,13 +155,19 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) vm->regions.hva_tree = RB_ROOT; hash_init(vm->regions.slot_hash); - vm->mode = mode; vm->type = 0; - vm->pa_bits = vm_guest_mode_params[mode].pa_bits; - vm->va_bits = vm_guest_mode_params[mode].va_bits; - vm->page_size = vm_guest_mode_params[mode].page_size; - vm->page_shift = vm_guest_mode_params[mode].page_shift; + vm->mode = mode & VM_MODE_PRIMARY_MASK; + pr_debug("%s: mode='%s' pages='%ld'\n", __func__, + vm_guest_mode_string(mode), nr_pages); + + TEST_ASSERT(vm->mode == mode, + "Code, data, and page tables \"modes\" not yet implemented"); + + vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits; + vm->va_bits = vm_guest_mode_params[vm->mode].va_bits; + vm->page_size = vm_guest_mode_params[vm->mode].page_size; + vm->page_shift = vm_guest_mode_params[vm->mode].page_shift; /* Setup mode specific traits. */ switch (vm->mode) { @@ -222,7 +225,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) vm->pgtable_levels = 5; break; default: - TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } #ifdef __aarch64__ @@ -252,7 +255,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) return vm; } -static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, +static uint64_t vm_nr_pages_required(uint32_t mode, uint32_t nr_runnable_vcpus, uint64_t extra_mem_pages) { @@ -287,7 +290,7 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, return vm_adjust_num_guest_pages(mode, nr_pages); } -struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, +struct kvm_vm *__vm_create(uint32_t mode, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages) { uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, @@ -323,7 +326,7 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, * extra_mem_pages is only used to calculate the maximum page table size, * no real memory allocation for non-slot0 memory in this function. */ -struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, +struct kvm_vm *__vm_create_with_vcpus(uint32_t mode, uint32_t nr_vcpus, uint64_t extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]) { @@ -1849,7 +1852,7 @@ static inline int getpageshift(void) } unsigned int -vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) +vm_num_host_pages(uint32_t mode, unsigned int num_guest_pages) { return vm_calc_num_pages(num_guest_pages, vm_guest_mode_params[mode].page_shift, @@ -1857,13 +1860,13 @@ vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) } unsigned int -vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) +vm_num_guest_pages(uint32_t mode, unsigned int num_host_pages) { return vm_calc_num_pages(num_host_pages, getpageshift(), vm_guest_mode_params[mode].page_shift, false); } -unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) +unsigned int vm_calc_num_guest_pages(uint32_t mode, size_t size) { unsigned int n; n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); base-commit: 1a4d88a361af4f2e91861d632c6a1fe87a9665c2 --
On Fri, Jul 22, 2022 at 06:20:07PM +0000, Sean Christopherson wrote: > On Fri, Jul 22, 2022, Ricardo Koller wrote: > > On Thu, Jul 21, 2022 at 01:29:34AM +0000, Sean Christopherson wrote: > > > If we don't care, and maybe even if we do, then my preference would be to > > > enhance the __vm_create family of helpers to allow for specifying what > > > backing type should be used for page tables, i.e. associate the info the VM > > > instead of passing it around the stack. > > > > > > One idea would be to do something like David Matlack suggested a while back > > > and replace extra_mem_pages with a struct, e.g. struct kvm_vm_mem_params > > > That struct can then provide the necessary knobs to control how memory is > > > allocated. And then the lib can provide a global > > > > > > struct kvm_vm_mem_params kvm_default_vm_mem_params; > > > > > > > I like this idea, passing the info at vm creation. > > > > What about dividing the changes in two. > > > > 1. Will add the struct to "__vm_create()" as part of this > > series, and then use it in this commit. There's only one user > > > > dirty_log_test.c: vm = __vm_create(mode, 1, extra_mem_pages); > > > > so that would avoid having to touch every test as part of this patchset. > > > > 2. I can then send another series to add support for all the other > > vm_create() functions. > > > > Alternatively, I can send a new series that does 1 and 2 afterwards. > > WDYT? > > Don't do #2, ever. :-) The intent of having vm_create() versus is __vm_create() > is so that tests that don't care about things like backing pages don't have to > pass in extra params. I very much want to keep that behavior, i.e. I don't want > to extend vm_create() at all. IMO, adding _anything_ is a slippery slope, e.g. > why are the backing types special enough to get a param, but thing XYZ isn't? > > Thinking more, the struct idea probably isn't going to work all that well. It > again puts the selftests into a state where it becomes difficult to control one > setting and ignore the rest, e.g. the dirty_log_test and anything else with extra > pages suddenly has to care about the backing type for page tables and code. > > Rather than adding a struct, what about extending the @mode param? We already > have vm_mem_backing_src_type, we just need a way to splice things together. There > are a total of four things we can control: primary mode, and then code, data, and > page tables backing types. > > So, turn @mode into a uint32_t and carve out 8 bits for each of those four "modes". > The defaults Just Work because VM_MEM_SRC_ANONYMOUS==0. Hi Sean, How about merging both proposals and turn @mode into a struct and pass around a pointer to it? Then, when calling something that requires @mode, if @mode is NULL, the called function would use vm_arch_default_mode() to get a pointer to the arch-specific default mode struct. If a test needs to modify the parameters then it can construct a mode struct from scratch or start with a copy of the default. As long as all members of the struct representing parameters, such as backing type, have defaults mapped to zero for the struct members, then we shouldn't be adding any burden to users that don't care about other parameters (other than ensuring their @mode struct was zero initialized). Thanks, drew
On Sat, Jul 23, 2022, Andrew Jones wrote: > On Fri, Jul 22, 2022 at 06:20:07PM +0000, Sean Christopherson wrote: > > On Fri, Jul 22, 2022, Ricardo Koller wrote: > > > What about dividing the changes in two. > > > > > > 1. Will add the struct to "__vm_create()" as part of this > > > series, and then use it in this commit. There's only one user > > > > > > dirty_log_test.c: vm = __vm_create(mode, 1, extra_mem_pages); > > > > > > so that would avoid having to touch every test as part of this patchset. > > > > > > 2. I can then send another series to add support for all the other > > > vm_create() functions. > > > > > > Alternatively, I can send a new series that does 1 and 2 afterwards. > > > WDYT? > > > > Don't do #2, ever. :-) The intent of having vm_create() versus is __vm_create() > > is so that tests that don't care about things like backing pages don't have to > > pass in extra params. I very much want to keep that behavior, i.e. I don't want > > to extend vm_create() at all. IMO, adding _anything_ is a slippery slope, e.g. > > why are the backing types special enough to get a param, but thing XYZ isn't? > > > > Thinking more, the struct idea probably isn't going to work all that well. It > > again puts the selftests into a state where it becomes difficult to control one > > setting and ignore the rest, e.g. the dirty_log_test and anything else with extra > > pages suddenly has to care about the backing type for page tables and code. > > > > Rather than adding a struct, what about extending the @mode param? We already > > have vm_mem_backing_src_type, we just need a way to splice things together. There > > are a total of four things we can control: primary mode, and then code, data, and > > page tables backing types. > > > > So, turn @mode into a uint32_t and carve out 8 bits for each of those four "modes". > > The defaults Just Work because VM_MEM_SRC_ANONYMOUS==0. > > Hi Sean, > > How about merging both proposals and turn @mode into a struct and pass > around a pointer to it? Then, when calling something that requires @mode, > if @mode is NULL, the called function would use vm_arch_default_mode() > to get a pointer to the arch-specific default mode struct. One tweak: rather that use @NULL as a magic param, #define VM_MODE_DEFAULT to point at a global struct, similar to what is already done for __aarch64__. E.g. __vm_create(VM_MODE_DEFAULT, nr_runnable_vcpus, 0); does a much better job of self-documenting its behavior than this: __vm_create(NULL, nr_runnable_vcpus, 0); > If a test needs to modify the parameters then it can construct a mode struct > from scratch or start with a copy of the default. As long as all members of > the struct representing parameters, such as backing type, have defaults > mapped to zero for the struct members, then we shouldn't be adding any burden > to users that don't care about other parameters (other than ensuring their > @mode struct was zero initialized). I was hoping to avoid forcing tests to build a struct, but looking at all the existing users, they either use for_each_guest_mode() or just pass VM_MODE_DEFAULT, so in practice it's a complete non-issue. The page fault usage will likely be similar, e.g. programatically generate the set of combinations to test. So yeah, let's try the struct approach.
On Tue, Jul 26, 2022 at 06:15:31PM +0000, Sean Christopherson wrote: > On Sat, Jul 23, 2022, Andrew Jones wrote: > > On Fri, Jul 22, 2022 at 06:20:07PM +0000, Sean Christopherson wrote: > > > On Fri, Jul 22, 2022, Ricardo Koller wrote: > > > > What about dividing the changes in two. > > > > > > > > 1. Will add the struct to "__vm_create()" as part of this > > > > series, and then use it in this commit. There's only one user > > > > > > > > dirty_log_test.c: vm = __vm_create(mode, 1, extra_mem_pages); > > > > > > > > so that would avoid having to touch every test as part of this patchset. > > > > > > > > 2. I can then send another series to add support for all the other > > > > vm_create() functions. > > > > > > > > Alternatively, I can send a new series that does 1 and 2 afterwards. > > > > WDYT? > > > > > > Don't do #2, ever. :-) The intent of having vm_create() versus is __vm_create() > > > is so that tests that don't care about things like backing pages don't have to > > > pass in extra params. I very much want to keep that behavior, i.e. I don't want > > > to extend vm_create() at all. IMO, adding _anything_ is a slippery slope, e.g. > > > why are the backing types special enough to get a param, but thing XYZ isn't? > > > > > > Thinking more, the struct idea probably isn't going to work all that well. It > > > again puts the selftests into a state where it becomes difficult to control one > > > setting and ignore the rest, e.g. the dirty_log_test and anything else with extra > > > pages suddenly has to care about the backing type for page tables and code. > > > > > > Rather than adding a struct, what about extending the @mode param? We already > > > have vm_mem_backing_src_type, we just need a way to splice things together. There > > > are a total of four things we can control: primary mode, and then code, data, and > > > page tables backing types. > > > > > > So, turn @mode into a uint32_t and carve out 8 bits for each of those four "modes". > > > The defaults Just Work because VM_MEM_SRC_ANONYMOUS==0. > > > > Hi Sean, > > > > How about merging both proposals and turn @mode into a struct and pass > > around a pointer to it? Then, when calling something that requires @mode, > > if @mode is NULL, the called function would use vm_arch_default_mode() > > to get a pointer to the arch-specific default mode struct. > > One tweak: rather that use @NULL as a magic param, #define VM_MODE_DEFAULT to > point at a global struct, similar to what is already done for __aarch64__. > > E.g. > > __vm_create(VM_MODE_DEFAULT, nr_runnable_vcpus, 0); > > does a much better job of self-documenting its behavior than this: > > __vm_create(NULL, nr_runnable_vcpus, 0); > > > If a test needs to modify the parameters then it can construct a mode struct > > from scratch or start with a copy of the default. As long as all members of > > the struct representing parameters, such as backing type, have defaults > > mapped to zero for the struct members, then we shouldn't be adding any burden > > to users that don't care about other parameters (other than ensuring their > > @mode struct was zero initialized). > > I was hoping to avoid forcing tests to build a struct, but looking at all the > existing users, they either use for_each_guest_mode() or just pass VM_MODE_DEFAULT, > so in practice it's a complete non-issue. > > The page fault usage will likely be similar, e.g. programatically generate the set > of combinations to test. > > So yeah, let's try the struct approach. Thank you both for the suggestions. About to send v5 with the suggested changes, with a slight modification. V5 adds "struct kvm_vm_mem_params" which includes a "guest mode" field. The suggestion was to overload "guest mode". What I have doesn't change the meaning of "guest mode", and just keeps everything dealing with "guest mode" the same (like guest_mode.c). The main changes are: 1. adding a struct kvm_vm_mem_params that defines the memory layout: -struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages); +struct kvm_vm *____vm_create(struct kvm_vm_mem_params *mem_params); 2. adding memslot vm->memslot.[code|pt|data] and change all allocators to use the right memslot, e.g.,: lib/elf should use the code memslot. 3. change the new page_fault_test.c setup_memslot() accordingly (much nicer). Let me know what you think. Thanks! Ricardo
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index e4497a3a27d4..13b913225ae7 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -139,6 +139,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/hypercalls +TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c new file mode 100644 index 000000000000..bdda4e3fcdaa --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * page_fault_test.c - Test stage 2 faults. + * + * This test tries different combinations of guest accesses (e.g., write, + * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on + * hugetlbfs with a hole). It checks that the expected handling method is + * called (e.g., uffd faults with the right address and write/read flag). + */ + +#define _GNU_SOURCE +#include <linux/bitmap.h> +#include <fcntl.h> +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> +#include <asm/sysreg.h> +#include <linux/bitfield.h> +#include "guest_modes.h" +#include "userfaultfd_util.h" + +#define TEST_MEM_SLOT_INDEX 1 +#define TEST_PT_SLOT_INDEX 2 + +/* Guest virtual addresses that point to the test page and its PTE. */ +#define TEST_GVA 0xc0000000 +#define TEST_EXEC_GVA 0xc0000008 +#define TEST_PTE_GVA 0xb0000000 +#define TEST_DATA 0x0123456789ABCDEF + +static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; + +#define CMD_NONE (0) +#define CMD_SKIP_TEST (1ULL << 1) +#define CMD_HOLE_PT (1ULL << 2) +#define CMD_HOLE_TEST (1ULL << 3) + +#define PREPARE_FN_NR 10 +#define CHECK_FN_NR 10 + +uint64_t pte_gpa; + +enum { + PT, + TEST, + NR_MEMSLOTS +}; + +struct memslot_desc { + void *hva; + uint64_t gpa; + uint64_t size; + uint64_t guest_pages; + enum vm_mem_backing_src_type src_type; + uint32_t idx; +} memslot[NR_MEMSLOTS] = { + { + .idx = TEST_PT_SLOT_INDEX, + }, + { + .idx = TEST_MEM_SLOT_INDEX, + }, +}; + +static struct event_cnt { + int aborts; + int fail_vcpu_runs; +} events; + +struct test_desc { + const char *name; + uint64_t mem_mark_cmd; + /* Skip the test if any prepare function returns false */ + bool (*guest_prepare[PREPARE_FN_NR])(void); + void (*guest_test)(void); + void (*guest_test_check[CHECK_FN_NR])(void); + void (*dabt_handler)(struct ex_regs *regs); + void (*iabt_handler)(struct ex_regs *regs); + uint32_t pt_memslot_flags; + uint32_t test_memslot_flags; + bool skip; + struct event_cnt expected_events; +}; + +struct test_params { + enum vm_mem_backing_src_type src_type; + struct test_desc *test_desc; +}; + +static inline void flush_tlb_page(uint64_t vaddr) +{ + uint64_t page = vaddr >> 12; + + dsb(ishst); + asm volatile("tlbi vaae1is, %0" :: "r" (page)); + dsb(ish); + isb(); +} + +static void guest_write64(void) +{ + uint64_t val; + + WRITE_ONCE(*guest_test_memory, TEST_DATA); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, TEST_DATA); +} + +/* Check the system for atomic instructions. */ +static bool guest_check_lse(void) +{ + uint64_t isar0 = read_sysreg(id_aa64isar0_el1); + uint64_t atomic; + + atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS), isar0); + return atomic >= 2; +} + +static bool guest_check_dc_zva(void) +{ + uint64_t dczid = read_sysreg(dczid_el0); + uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_DZP), dczid); + + return dzp == 0; +} + +/* Compare and swap instruction. */ +static void guest_cas(void) +{ + uint64_t val; + + GUEST_ASSERT_EQ(guest_check_lse(), 1); + asm volatile(".arch_extension lse\n" + "casal %0, %1, [%2]\n" + :: "r" (0), "r" (TEST_DATA), "r" (guest_test_memory)); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, TEST_DATA); +} + +static void guest_read64(void) +{ + uint64_t val; + + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, 0); +} + +/* Address translation instruction */ +static void guest_at(void) +{ + uint64_t par; + uint64_t paddr; + + asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); + par = read_sysreg(par_el1); + + /* Bit 1 indicates whether the AT was successful */ + GUEST_ASSERT_EQ(par & 1, 0); + /* The PA in bits [51:12] */ + paddr = par & (((1ULL << 40) - 1) << 12); + GUEST_ASSERT_EQ(paddr, memslot[TEST].gpa); +} + +/* + * The size of the block written by "dc zva" is guaranteed to be between (2 << + * 0) and (2 << 9), which is safe in our case as we need the write to happen + * for at least a word, and not more than a page. + */ +static void guest_dc_zva(void) +{ + uint16_t val; + + asm volatile("dc zva, %0\n" + "dsb ish\n" + :: "r" (guest_test_memory)); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, 0); +} + +/* + * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0). + * And that's special because KVM must take special care with those: they + * should still count as accesses for dirty logging or user-faulting, but + * should be handled differently on mmio. + */ +static void guest_ld_preidx(void) +{ + uint64_t val; + uint64_t addr = TEST_GVA - 8; + + /* + * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is + * in a gap between memslots not backing by anything. + */ + asm volatile("ldr %0, [%1, #8]!" + : "=r" (val), "+r" (addr)); + GUEST_ASSERT_EQ(val, 0); + GUEST_ASSERT_EQ(addr, TEST_GVA); +} + +static void guest_st_preidx(void) +{ + uint64_t val = TEST_DATA; + uint64_t addr = TEST_GVA - 8; + + asm volatile("str %0, [%1, #8]!" + : "+r" (val), "+r" (addr)); + + GUEST_ASSERT_EQ(addr, TEST_GVA); + val = READ_ONCE(*guest_test_memory); +} + +static bool guest_set_ha(void) +{ + uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1); + uint64_t hadbs, tcr; + + /* Skip if HA is not supported. */ + hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1); + if (hadbs == 0) + return false; + + tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + write_sysreg(tcr, tcr_el1); + isb(); + + return true; +} + +static bool guest_clear_pte_af(void) +{ + *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF; + flush_tlb_page(TEST_PTE_GVA); + + return true; +} + +static void guest_check_pte_af(void) +{ + flush_tlb_page(TEST_PTE_GVA); + GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); +} + +static void guest_exec(void) +{ + int (*code)(void) = (int (*)(void))TEST_EXEC_GVA; + int ret; + + ret = code(); + GUEST_ASSERT_EQ(ret, 0x77); +} + +static bool guest_prepare(struct test_desc *test) +{ + bool (*prepare_fn)(void); + int i; + + for (i = 0; i < PREPARE_FN_NR; i++) { + prepare_fn = test->guest_prepare[i]; + if (prepare_fn && !prepare_fn()) + return false; + } + + return true; +} + +static void guest_test_check(struct test_desc *test) +{ + void (*check_fn)(void); + int i; + + for (i = 0; i < CHECK_FN_NR; i++) { + check_fn = test->guest_test_check[i]; + if (check_fn) + check_fn(); + } +} + +static void guest_code(struct test_desc *test) +{ + if (!guest_prepare(test)) + GUEST_SYNC(CMD_SKIP_TEST); + + GUEST_SYNC(test->mem_mark_cmd); + + if (test->guest_test) + test->guest_test(); + + guest_test_check(test); + GUEST_DONE(); +} + +static void no_dabt_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(false, read_sysreg(far_el1)); +} + +static void no_iabt_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(false, regs->pc); +} + +/* Returns true to continue the test, and false if it should be skipped. */ +static bool punch_hole_in_memslot(struct kvm_vm *vm, + struct memslot_desc *memslot) +{ + int ret, fd; + void *hva; + + fd = vm_mem_region_get_src_fd(vm, memslot->idx); + if (fd != -1) { + ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + 0, memslot->size); + TEST_ASSERT(ret == 0, "fallocate failed, errno: %d\n", errno); + } else { + if (is_backing_src_hugetlb(memslot->src_type)) + return false; + + hva = addr_gpa2hva(vm, memslot->gpa); + ret = madvise(hva, memslot->size, MADV_DONTNEED); + TEST_ASSERT(ret == 0, "madvise failed, errno: %d\n", errno); + } + + return true; +} + +/* Returns true to continue the test, and false if it should be skipped. */ +static bool handle_cmd(struct kvm_vm *vm, int cmd) +{ + bool continue_test = true; + + if (cmd == CMD_SKIP_TEST) + continue_test = false; + + if (cmd & CMD_HOLE_PT) + continue_test = punch_hole_in_memslot(vm, &memslot[PT]); + if (cmd & CMD_HOLE_TEST) + continue_test = punch_hole_in_memslot(vm, &memslot[TEST]); + + return continue_test; +} + +static void sync_stats_from_guest(struct kvm_vm *vm) +{ + struct event_cnt *ec = addr_gva2hva(vm, (uint64_t)&events); + + events.aborts += ec->aborts; +} + +void fail_vcpu_run_no_handler(int ret) +{ + TEST_FAIL("Unexpected vcpu run failure\n"); +} + +extern unsigned char __exec_test; + +void noinline __return_0x77(void) +{ + asm volatile("__exec_test: mov x0, #0x77\n" + "ret\n"); +} + +static void load_exec_code_for_test(void) +{ + uint64_t *code, *c; + + assert(TEST_EXEC_GVA - TEST_GVA); + code = memslot[TEST].hva + 8; + + /* + * We need the cast to be separate in order for the compiler to not + * complain with: "‘memcpy’ forming offset [1, 7] is out of the bounds + * [0, 1] of object ‘__exec_test’ with type ‘unsigned char’" + */ + c = (uint64_t *)&__exec_test; + memcpy(code, c, 8); +} + +static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + struct test_desc *test) +{ + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + if (!test->dabt_handler) + test->dabt_handler = no_dabt_handler; + if (!test->iabt_handler) + test->iabt_handler = no_iabt_handler; + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + 0x25, test->dabt_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + 0x21, test->iabt_handler); +} + +/* + * Create a memslot for test data (memslot[TEST]) and another one for PT + * tables (memslot[PT]). This diagram show the resulting guest virtual and + * physical address space when using 4K backing pages for the memslots, and + * 4K guest pages. + * + * Guest physical Guest virtual + * + * | | | | + * | | | | + * +--------------+ +-------------+ + * max_gfn - 0x1000 | TEST memslot |<---------+ test data | 0xc0000000 + * +--------------+ +-------------+ + * max_gfn - 0x2000 | gap |<---------+ gap | 0xbffff000 + * +--------------+ +-------------+ + * | | | | + * | | | | + * | PT memslot | | | + * | | +-------------+ + * max_gfn - 0x6000 | |<----+ | | + * +--------------+ | | | + * | | | | PTE for the | + * | | | | test data | + * | | +----+ page | 0xb0000000 + * | | +-------------+ + * | | | | + * | | | | + * + * Using different guest page sizes or backing pages will result in the + * same layout but at different addresses. In particular, the memslot + * sizes need to be multiple of the backing page sizes (e.g., 2MB). + */ +static void setup_memslots(struct kvm_vm *vm, enum vm_guest_mode mode, + struct test_params *p) +{ + uint64_t backing_page_size = get_backing_src_pagesz(p->src_type); + uint64_t guest_page_size = vm_guest_mode_params[mode].page_size; + struct test_desc *test = p->test_desc; + uint64_t gap_gpa; + uint64_t alignment; + int i; + + memslot[TEST].size = align_up(guest_page_size, backing_page_size); + /* + * We need one guest page for the PT table containing the PTE (for + * TEST_GVA), but might need more in case the higher level PT tables + * were not allocated yet. + */ + memslot[PT].size = align_up(4 * guest_page_size, backing_page_size); + + for (i = 0; i < NR_MEMSLOTS; i++) { + memslot[i].guest_pages = memslot[i].size / guest_page_size; + memslot[i].src_type = p->src_type; + } + + /* Place the memslots GPAs at the end of physical memory */ + alignment = max(backing_page_size, guest_page_size); + memslot[TEST].gpa = (vm->max_gfn - memslot[TEST].guest_pages) * + guest_page_size; + memslot[TEST].gpa = align_down(memslot[TEST].gpa, alignment); + + /* Add a 1-guest_page gap between the two memslots */ + gap_gpa = memslot[TEST].gpa - guest_page_size; + /* Map the gap so it's still adressable from the guest. */ + virt_pg_map(vm, TEST_GVA - guest_page_size, gap_gpa); + + memslot[PT].gpa = gap_gpa - memslot[PT].size; + memslot[PT].gpa = align_down(memslot[PT].gpa, alignment); + + vm_userspace_mem_region_add(vm, p->src_type, memslot[PT].gpa, + memslot[PT].idx, memslot[PT].guest_pages, + test->pt_memslot_flags); + vm_userspace_mem_region_add(vm, p->src_type, memslot[TEST].gpa, + memslot[TEST].idx, memslot[TEST].guest_pages, + test->test_memslot_flags); + + for (i = 0; i < NR_MEMSLOTS; i++) + memslot[i].hva = addr_gpa2hva(vm, memslot[i].gpa); + + /* Map the test TEST_GVA using the PT memslot. */ + _virt_pg_map(vm, TEST_GVA, memslot[TEST].gpa, MT_NORMAL, + TEST_PT_SLOT_INDEX); + + /* + * Find the PTE of the test page and map it in the guest so it can + * clear the AF. + */ + pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA)); + TEST_ASSERT(memslot[PT].gpa <= pte_gpa && + pte_gpa < (memslot[PT].gpa + memslot[PT].size), + "The EPT should be in the PT memslot."); + /* This is an artibrary requirement just to make things simpler. */ + TEST_ASSERT(pte_gpa % guest_page_size == 0, + "The pte_gpa (%p) should be aligned to the guest page (%lx).", + (void *)pte_gpa, guest_page_size); + virt_pg_map(vm, TEST_PTE_GVA, pte_gpa); +} + +static void check_event_counts(struct test_desc *test) +{ + ASSERT_EQ(test->expected_events.aborts, events.aborts); +} + +static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) +{ + struct test_desc *test = p->test_desc; + + pr_debug("Test: %s\n", test->name); + pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + pr_debug("Testing memory backing src type: %s\n", + vm_mem_backing_src_alias(p->src_type)->name); +} + +static void reset_event_counts(void) +{ + memset(&events, 0, sizeof(events)); +} + +static bool vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + struct test_desc *test) +{ + bool skip_test = false; + struct ucall uc; + int stage; + + for (stage = 0; ; stage++) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + if (!handle_cmd(vm, uc.args[1])) { + pr_debug("Skipped.\n"); + skip_test = true; + goto done; + } + break; + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx", + (const char *)uc.args[0], + __FILE__, uc.args[1], uc.args[2], uc.args[3]); + break; + case UCALL_DONE: + pr_debug("Done.\n"); + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + return skip_test; +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + struct test_params *p = (struct test_params *)arg; + struct test_desc *test = p->test_desc; + struct kvm_vm *vm; + struct kvm_vcpu *vcpus[1], *vcpu; + bool skip_test = false; + + print_test_banner(mode, p); + + vm = __vm_create_with_vcpus(mode, 1, 6, guest_code, vcpus); + vcpu = vcpus[0]; + ucall_init(vm, NULL); + + reset_event_counts(); + setup_memslots(vm, mode, p); + + load_exec_code_for_test(); + setup_abort_handlers(vm, vcpu, test); + vcpu_args_set(vcpu, 1, test); + + sync_global_to_guest(vm, memslot); + + skip_test = vcpu_run_loop(vm, vcpu, test); + + sync_stats_from_guest(vm); + ucall_uninit(vm); + kvm_vm_free(vm); + + if (!skip_test) + check_event_counts(test); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-s mem-type]\n", name); + puts(""); + guest_modes_help(); + backing_src_help("-s"); + puts(""); +} + +#define SNAME(s) #s +#define SCAT2(a, b) SNAME(a ## _ ## b) +#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c)) + +#define _CHECK(_test) _CHECK_##_test +#define _PREPARE(_test) _PREPARE_##_test +#define _PREPARE_guest_read64 NULL +#define _PREPARE_guest_ld_preidx NULL +#define _PREPARE_guest_write64 NULL +#define _PREPARE_guest_st_preidx NULL +#define _PREPARE_guest_exec NULL +#define _PREPARE_guest_at NULL +#define _PREPARE_guest_dc_zva guest_check_dc_zva +#define _PREPARE_guest_cas guest_check_lse + +/* With or without access flag checks */ +#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af +#define _PREPARE_no_af NULL +#define _CHECK_with_af guest_check_pte_af +#define _CHECK_no_af NULL + +/* Performs an access and checks that no faults (no events) were triggered. */ +#define TEST_ACCESS(_access, _with_af, _mark_cmd) \ +{ \ + .name = SCAT3(_access, _with_af, #_mark_cmd), \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .mem_mark_cmd = _mark_cmd, \ + .guest_test = _access, \ + .guest_test_check = { _CHECK(_with_af) }, \ + .expected_events = { 0 }, \ +} + +static struct test_desc tests[] = { + /* Check that HW is setting the Access Flag (AF) (sanity checks). */ + TEST_ACCESS(guest_read64, with_af, CMD_NONE), + TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE), + TEST_ACCESS(guest_cas, with_af, CMD_NONE), + TEST_ACCESS(guest_write64, with_af, CMD_NONE), + TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE), + TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE), + TEST_ACCESS(guest_exec, with_af, CMD_NONE), + + /* + * Accessing a hole in the test memslot (punched with fallocate or + * madvise) shouldn't fault (more sanity checks). + */ + TEST_ACCESS(guest_read64, no_af, CMD_HOLE_TEST), + TEST_ACCESS(guest_cas, no_af, CMD_HOLE_TEST), + TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_TEST), + TEST_ACCESS(guest_write64, no_af, CMD_HOLE_TEST), + TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_TEST), + TEST_ACCESS(guest_at, no_af, CMD_HOLE_TEST), + TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_TEST), + + { 0 } +}; + +static void for_each_test_and_guest_mode( + void (*func)(enum vm_guest_mode m, void *a), + enum vm_mem_backing_src_type src_type) +{ + struct test_desc *t; + + for (t = &tests[0]; t->name; t++) { + if (t->skip) + continue; + + struct test_params p = { + .src_type = src_type, + .test_desc = t, + }; + + for_each_guest_mode(run_test, &p); + } +} + +int main(int argc, char *argv[]) +{ + enum vm_mem_backing_src_type src_type; + int opt; + + setbuf(stdout, NULL); + + src_type = DEFAULT_VM_MEM_SRC; + + guest_modes_append_default(); + + while ((opt = getopt(argc, argv, "hm:s:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); + break; + case 's': + src_type = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + exit(0); + } + } + + for_each_test_and_guest_mode(run_test, src_type); + return 0; +} diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index 74f10d006e15..818665e86f32 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -110,6 +110,12 @@ enum { #define ESR_EC_WP_CURRENT 0x35 #define ESR_EC_BRK_INS 0x3c +/* Access flag */ +#define PTE_AF (1ULL << 10) + +/* Access flag update enable/disable */ +#define TCR_EL1_HA (1ULL << 39) + void aarch64_get_supported_page_sizes(uint32_t ipa, bool *ps4k, bool *ps16k, bool *ps64k);
Add a new test for stage 2 faults when using different combinations of guest accesses (e.g., write, S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on hugetlbfs with a hole). The next commits will add different handling methods and more faults (e.g., uffd and dirty logging). This first commit starts by adding two sanity checks for all types of accesses: AF setting by the hw, and accessing memslots with holes. Signed-off-by: Ricardo Koller <ricarkol@google.com> --- tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/aarch64/page_fault_test.c | 695 ++++++++++++++++++ .../selftests/kvm/include/aarch64/processor.h | 6 + 3 files changed, 702 insertions(+) create mode 100644 tools/testing/selftests/kvm/aarch64/page_fault_test.c