diff mbox series

[v2,07/11] KVM: selftests: aarch64: Add aarch64/page_fault_test

Message ID 20220323225405.267155-8-ricarkol@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: selftests: Add aarch64/page_fault_test | expand

Commit Message

Ricardo Koller March 23, 2022, 10:54 p.m. UTC
Add a new test for stage 2 faults when using different combinations of
guest accesses (e.g., write, S1PTW), backing source type (e.g., anon)
and types of faults (e.g., read on hugetlbfs with a hole). The next
commits will add different handling methods and more faults (e.g., uffd
and dirty logging). This first commit starts by adding two sanity checks
for all types of accesses: AF setting by the hw, and accessing memslots
with holes.

Note that this commit borrows some code from kvm-unit-tests: RET,
MOV_X0, and flush_tlb_page.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
---
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/aarch64/page_fault_test.c   | 667 ++++++++++++++++++
 2 files changed, 668 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/aarch64/page_fault_test.c

Comments

Oliver Upton March 25, 2022, 9:39 p.m. UTC | #1
Hi Ricardo,

On Wed, Mar 23, 2022 at 03:54:01PM -0700, Ricardo Koller wrote:
> Add a new test for stage 2 faults when using different combinations of
> guest accesses (e.g., write, S1PTW), backing source type (e.g., anon)
> and types of faults (e.g., read on hugetlbfs with a hole). The next
> commits will add different handling methods and more faults (e.g., uffd
> and dirty logging). This first commit starts by adding two sanity checks
> for all types of accesses: AF setting by the hw, and accessing memslots
> with holes.
> 
> Note that this commit borrows some code from kvm-unit-tests: RET,
> MOV_X0, and flush_tlb_page.
> 
> Signed-off-by: Ricardo Koller <ricarkol@google.com>
> ---
>  tools/testing/selftests/kvm/Makefile          |   1 +
>  .../selftests/kvm/aarch64/page_fault_test.c   | 667 ++++++++++++++++++
>  2 files changed, 668 insertions(+)
>  create mode 100644 tools/testing/selftests/kvm/aarch64/page_fault_test.c
> 
> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> index bc5f89b3700e..6a192798b217 100644
> --- a/tools/testing/selftests/kvm/Makefile
> +++ b/tools/testing/selftests/kvm/Makefile
> @@ -103,6 +103,7 @@ TEST_GEN_PROGS_x86_64 += system_counter_offset_test
>  TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
>  TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
>  TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
> +TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
>  TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
>  TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
>  TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
> diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> new file mode 100644
> index 000000000000..00477a4f10cb
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> @@ -0,0 +1,667 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * page_fault_test.c - Test stage 2 faults.
> + *
> + * This test tries different combinations of guest accesses (e.g., write,
> + * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
> + * hugetlbfs with a hole). It checks that the expected handling method is
> + * called (e.g., uffd faults with the right address and write/read flag).
> + */
> +
> +#define _GNU_SOURCE

I don't think this is necessary, defining this in tests is mostly
leftover from Google's old internal test implementation :)

http://lore.kernel.org/r/YjgYh89k8s+w34FQ@google.com

[...]

> +/* Access flag */
> +#define PTE_AF					(1ULL << 10)
> +
> +/* Acces flag update enable/disable */
> +#define TCR_EL1_HA				(1ULL << 39)

Should these be lifted into/come from a shared header file?

[...]

> +static const uint64_t test_gva = GUEST_TEST_GVA;
> +static const uint64_t test_exec_gva = GUEST_TEST_EXEC_GVA;
> +static const uint64_t pte_gva = GUEST_TEST_PTE_GVA;

Could you just use the macros directly?

> +uint64_t pte_gpa;
> +
> +enum { PT, TEST, NR_MEMSLOTS};

While it doesn't appear you need to directly use this type by name, I
think it would be best to give it a name still and/or a clarifying
comment.

> +struct memslot_desc {
> +	void *hva;
> +	uint64_t gpa;
> +	uint64_t size;
> +	uint64_t guest_pages;
> +	uint64_t backing_pages;
> +	enum vm_mem_backing_src_type src_type;
> +	uint32_t idx;
> +} memslot[NR_MEMSLOTS] = {
> +	{
> +		.idx = TEST_PT_SLOT_INDEX,
> +		.backing_pages = PT_MEMSLOT_BACKING_SRC_NPAGES,
> +	},
> +	{
> +		.idx = TEST_MEM_SLOT_INDEX,
> +		.backing_pages = TEST_MEMSLOT_BACKING_SRC_NPAGES,
> +	},
> +};
> +
> +static struct event_cnt {
> +	int aborts;
> +	int fail_vcpu_runs;
> +} events;

nit: for static structs I'd recommend keeping the type name and variable
name the same.

[...]

> +/* Check the system for atomic instructions. */
> +static bool guest_check_lse(void)
> +{
> +	uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
> +	uint64_t atomic = (isar0 >> 20) & 7;

Is it possible to do:

  FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS), isar0)

> +	return atomic >= 2;
> +}
> +
> +/* Compare and swap instruction. */
> +static void guest_test_cas(void)
> +{
> +	uint64_t val;
> +	uint64_t addr = test_gva;
> +
> +	GUEST_ASSERT_EQ(guest_check_lse(), 1);
> +	asm volatile(".arch_extension lse\n"
> +		     "casal %0, %1, [%2]\n"
> +			:: "r" (0), "r" (0x0123456789ABCDEF), "r" (addr));

Please put the test data in a macro :)

[...]

> +static void guest_test_dc_zva(void)
> +{
> +	/* The smallest guaranteed block size (bs) is a word. */
> +	uint16_t val;

There's also an assumption that the maximal block size (2 << 9 bytes) is
also safe, since it is within the bounds of the test page. It might be a
good idea to surface that as well.

> +	asm volatile("dc zva, %0\n"

this depends on DCZID_EL1.DZP=0b0, right?

[...]

> +static void guest_test_ld_preidx(void)
> +{
> +	uint64_t val;
> +	uint64_t addr = test_gva - 8;
> +
> +	/*
> +	 * This ends up accessing "test_gva + 8 - 8", where "test_gva - 8"
> +	 * is not backed by a memslot.
> +	 */
> +	asm volatile("ldr %0, [%1, #8]!"
> +			: "=r" (val), "+r" (addr));
> +	GUEST_ASSERT_EQ(val, 0);
> +	GUEST_ASSERT_EQ(addr, test_gva);
> +}
> +
> +static void guest_test_st_preidx(void)
> +{
> +	uint64_t val = 0x0123456789ABCDEF;
> +	uint64_t addr = test_gva - 8;
> +
> +	asm volatile("str %0, [%1, #8]!"
> +			: "+r" (val), "+r" (addr));
> +
> +	GUEST_ASSERT_EQ(addr, test_gva);
> +	val = READ_ONCE(*(uint64_t *)test_gva);
> +}

What is the reason for testing pre-indexing instructions? These
instructions already have a bad rap under virtualization given that we
completely bail if the IPA isn't backed by a memslot. Given that, I
think you should state up front the expecations around these
instructions.

Now, I agree that KVM is on the hook for handling this correctly if the
IPA is backed, but a clarifying comment would be helpful.

It seems to me these tests assert we don't freak out about
ESR_EL2.ISV=0b0 unless we absolutely must.

> +static bool guest_set_ha(void)
> +{
> +	uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
> +	uint64_t hadbs = mmfr1 & 6;

See suggestion on FIELD_GET(...)

> +static void load_exec_code_for_test(void)
> +{
> +	uint32_t *code;
> +
> +	/* Write this "code" into test_exec_gva */
> +	assert(test_exec_gva - test_gva);
> +	code = memslot[TEST].hva + 8;
> +
> +	code[0] = MOV_X0(0x77);
> +	code[1] = RET;

It might be nicer to use naked 'asm' and memcpy() that into the test
memslot. That way, there is zero question if this hand assembly is
correct or not :)

> +}
> +
> +static void setup_guest_args(struct kvm_vm *vm, struct test_desc *test)
> +{
> +	vm_vaddr_t test_desc_gva;
> +
> +	test_desc_gva = vm_vaddr_alloc_page(vm);
> +	memcpy(addr_gva2hva(vm, test_desc_gva), test,
> +			sizeof(struct test_desc));

Aren't the test descriptors already visible in the guest's address
space? The only caveat with globals is that if userspace tweaks a global
we must explicitly sync it to the guest.

So I think you could just tell the guest the test index or a direct
pointer, right?

[...]

> +static void setup_memslots(struct kvm_vm *vm, enum vm_guest_mode mode,
> +		struct test_params *p)
> +{
> +	uint64_t large_page_size = get_backing_src_pagesz(p->src_type);

nit: large_page_size seems a bit confusing to me. Theoretically this
could be a 4k page from anon memory, right?

> +	uint64_t guest_page_size = vm_guest_mode_params[mode].page_size;
> +	struct test_desc *test = p->test_desc;
> +	uint64_t hole_gpa;
> +	uint64_t alignment;
> +	int i;
> +
> +	/* Calculate the test and PT memslot sizes */
> +	for (i = 0; i < NR_MEMSLOTS; i++) {
> +		memslot[i].size = large_page_size * memslot[i].backing_pages;
> +		memslot[i].guest_pages = memslot[i].size / guest_page_size;
> +		memslot[i].src_type = p->src_type;
> +	}
> +
> +	TEST_ASSERT(memslot[TEST].size >= guest_page_size,
> +			"The test memslot should have space one guest page.\n");
> +	TEST_ASSERT(memslot[PT].size >= (4 * guest_page_size),
> +			"The PT memslot sould have space for 4 guest pages.\n");
> +
> +	/* Place the memslots GPAs at the end of physical memory */
> +	alignment = max(large_page_size, guest_page_size);
> +	memslot[TEST].gpa = (vm_get_max_gfn(vm) - memslot[TEST].guest_pages) *
> +		guest_page_size;
> +	memslot[TEST].gpa = align_down(memslot[TEST].gpa, alignment);

newline

> +	/* Add a 1-guest_page-hole between the two memslots */
> +	hole_gpa = memslot[TEST].gpa - guest_page_size;
> +	virt_pg_map(vm, test_gva - guest_page_size, hole_gpa);

newline

> +	memslot[PT].gpa = hole_gpa - (memslot[PT].guest_pages *
> +			guest_page_size);
> +	memslot[PT].gpa = align_down(memslot[PT].gpa, alignment);
> +
> +	/* Create memslots for and test data and a PTE. */

nit: for the test data

> +	vm_userspace_mem_region_add(vm, p->src_type, memslot[PT].gpa,
> +			memslot[PT].idx, memslot[PT].guest_pages,
> +			test->pt_memslot_flags);
> +	vm_userspace_mem_region_add(vm, p->src_type, memslot[TEST].gpa,
> +			memslot[TEST].idx, memslot[TEST].guest_pages,
> +			test->test_memslot_flags);
> +
> +	for (i = 0; i < NR_MEMSLOTS; i++)
> +		memslot[i].hva = addr_gpa2hva(vm, memslot[i].gpa);
> +
> +	/* Map the test test_gva using the PT memslot. */
> +	_virt_pg_map(vm, test_gva, memslot[TEST].gpa,
> +			4 /* NORMAL (See DEFAULT_MAIR_EL1) */,

Should we provide an enumeration to give meaningful names to the memory
attribute indices?

> +			TEST_PT_SLOT_INDEX);
> +
> +	/*
> +	 * Find the PTE of the test page and map it in the guest so it can
> +	 * clear the AF.
> +	 */
> +	pte_gpa = vm_get_pte_gpa(vm, test_gva);
> +	TEST_ASSERT(memslot[PT].gpa <= pte_gpa &&
> +			pte_gpa < (memslot[PT].gpa + memslot[PT].size),
> +			"The EPT should be in the PT memslot.");
> +	/* This is an artibrary requirement just to make things simpler. */
> +	TEST_ASSERT(pte_gpa % guest_page_size == 0,
> +			"The pte_gpa (%p) should be aligned to the guest page (%lx).",
> +			(void *)pte_gpa, guest_page_size);
> +	virt_pg_map(vm, pte_gva, pte_gpa);

Curious: if we are going to have more tests that involve guest
inspection of the page tables, should all of the stage-1 paging
structures be made visible to the guest?

[...]

> +
> +static bool vcpu_run_loop(struct kvm_vm *vm, struct test_desc *test)
> +{
> +	bool skip_test = false;
> +	struct ucall uc;
> +	int stage;
> +
> +	for (stage = 0; ; stage++) {
> +		vcpu_run(vm, VCPU_ID);
> +
> +		switch (get_ucall(vm, VCPU_ID, &uc)) {
> +		case UCALL_SYNC:
> +			if (uc.args[1] == CMD_SKIP_TEST) {
> +				pr_debug("Skipped.\n");
> +				skip_test = true;
> +				goto done;
> +			}

Is there a way to do this check from handle_cmd()?

[...]

> +	/* Accessing a hole shouldn't fault (more sanity checks). */
> +	TEST_ACCESS_ON_HOLE_NO_FAULTS(guest_test_ld_preidx),
[...]
> +	TEST_ACCESS_ON_HOLE_NO_FAULTS(guest_test_st_preidx),

I think you may be overloading the 'hole' terminology. The guest's IPA
space is set up with a 1-page hole between the TEST and PT memslots.
Additionally, it would appear that you're hole punching with fallocate()
and madvise().

--
Thanks,
Oliver
diff mbox series

Patch

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index bc5f89b3700e..6a192798b217 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -103,6 +103,7 @@  TEST_GEN_PROGS_x86_64 += system_counter_offset_test
 TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
+TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
 TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
new file mode 100644
index 000000000000..00477a4f10cb
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -0,0 +1,667 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * page_fault_test.c - Test stage 2 faults.
+ *
+ * This test tries different combinations of guest accesses (e.g., write,
+ * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
+ * hugetlbfs with a hole). It checks that the expected handling method is
+ * called (e.g., uffd faults with the right address and write/read flag).
+ */
+
+#define _GNU_SOURCE
+#include <linux/bitmap.h>
+#include <fcntl.h>
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include "guest_modes.h"
+#include "userfaultfd_util.h"
+
+#define VCPU_ID					0
+
+#define TEST_MEM_SLOT_INDEX			1
+#define TEST_PT_SLOT_INDEX			2
+
+/* Max number of backing pages per guest page */
+#define BACKING_PG_PER_GUEST_PG			(64 / 4)
+
+/* Test memslot in backing source pages */
+#define TEST_MEMSLOT_BACKING_SRC_NPAGES		(1 * BACKING_PG_PER_GUEST_PG)
+
+/* PT memslot size in backing source pages */
+#define PT_MEMSLOT_BACKING_SRC_NPAGES		(4 * BACKING_PG_PER_GUEST_PG)
+
+/* Guest virtual addresses that point to the test page and its PTE. */
+#define GUEST_TEST_GVA				0xc0000000
+#define GUEST_TEST_EXEC_GVA			0xc0000008
+#define GUEST_TEST_PTE_GVA			0xd0000000
+
+/* Access flag */
+#define PTE_AF					(1ULL << 10)
+
+/* Acces flag update enable/disable */
+#define TCR_EL1_HA				(1ULL << 39)
+
+#define CMD_SKIP_TEST				(-1LL)
+#define CMD_HOLE_PT				(1ULL << 2)
+#define CMD_HOLE_TEST				(1ULL << 3)
+
+#define PREPARE_FN_NR				10
+#define CHECK_FN_NR				10
+
+static const uint64_t test_gva = GUEST_TEST_GVA;
+static const uint64_t test_exec_gva = GUEST_TEST_EXEC_GVA;
+static const uint64_t pte_gva = GUEST_TEST_PTE_GVA;
+uint64_t pte_gpa;
+
+enum { PT, TEST, NR_MEMSLOTS};
+
+struct memslot_desc {
+	void *hva;
+	uint64_t gpa;
+	uint64_t size;
+	uint64_t guest_pages;
+	uint64_t backing_pages;
+	enum vm_mem_backing_src_type src_type;
+	uint32_t idx;
+} memslot[NR_MEMSLOTS] = {
+	{
+		.idx = TEST_PT_SLOT_INDEX,
+		.backing_pages = PT_MEMSLOT_BACKING_SRC_NPAGES,
+	},
+	{
+		.idx = TEST_MEM_SLOT_INDEX,
+		.backing_pages = TEST_MEMSLOT_BACKING_SRC_NPAGES,
+	},
+};
+
+static struct event_cnt {
+	int aborts;
+	int fail_vcpu_runs;
+} events;
+
+struct test_desc {
+	const char *name;
+	uint64_t mem_mark_cmd;
+	/* Skip the test if any prepare function returns false */
+	bool (*guest_prepare[PREPARE_FN_NR])(void);
+	void (*guest_test)(void);
+	void (*guest_test_check[CHECK_FN_NR])(void);
+	void (*dabt_handler)(struct ex_regs *regs);
+	void (*iabt_handler)(struct ex_regs *regs);
+	uint32_t pt_memslot_flags;
+	uint32_t test_memslot_flags;
+	void (*guest_pre_run)(struct kvm_vm *vm);
+	bool skip;
+	struct event_cnt expected_events;
+};
+
+struct test_params {
+	enum vm_mem_backing_src_type src_type;
+	struct test_desc *test_desc;
+};
+
+
+static inline void flush_tlb_page(uint64_t vaddr)
+{
+	uint64_t page = vaddr >> 12;
+
+	dsb(ishst);
+	asm("tlbi vaae1is, %0" :: "r" (page));
+	dsb(ish);
+	isb();
+}
+
+#define RET			0xd65f03c0
+#define MOV_X0(x)		(0xd2800000 | (((x) & 0xffff) << 5))
+
+static void guest_test_nop(void)
+{}
+
+static void guest_test_write64(void)
+{
+	uint64_t val;
+
+	WRITE_ONCE(*((uint64_t *)test_gva), 0x0123456789ABCDEF);
+	val = READ_ONCE(*(uint64_t *)test_gva);
+	GUEST_ASSERT_EQ(val, 0x0123456789ABCDEF);
+}
+
+/* Check the system for atomic instructions. */
+static bool guest_check_lse(void)
+{
+	uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
+	uint64_t atomic = (isar0 >> 20) & 7;
+
+	return atomic >= 2;
+}
+
+/* Compare and swap instruction. */
+static void guest_test_cas(void)
+{
+	uint64_t val;
+	uint64_t addr = test_gva;
+
+	GUEST_ASSERT_EQ(guest_check_lse(), 1);
+	asm volatile(".arch_extension lse\n"
+		     "casal %0, %1, [%2]\n"
+			:: "r" (0), "r" (0x0123456789ABCDEF), "r" (addr));
+	val = READ_ONCE(*(uint64_t *)(addr));
+	GUEST_ASSERT_EQ(val, 0x0123456789ABCDEF);
+}
+
+static void guest_test_read64(void)
+{
+	uint64_t val;
+
+	val = READ_ONCE(*(uint64_t *)test_gva);
+	GUEST_ASSERT_EQ(val, 0);
+}
+
+/* Address translation instruction */
+static void guest_test_at(void)
+{
+	uint64_t par;
+	uint64_t addr = 0;
+
+	asm volatile("at s1e1r, %0" :: "r" (test_gva));
+	par = read_sysreg(par_el1);
+
+	/* Bit 1 indicates whether the AT was successful */
+	GUEST_ASSERT_EQ(par & 1, 0);
+	/* The PA in bits [51:12] */
+	addr = par & (((1ULL << 40) - 1) << 12);
+	GUEST_ASSERT_EQ(addr, memslot[TEST].gpa);
+}
+
+static void guest_test_dc_zva(void)
+{
+	/* The smallest guaranteed block size (bs) is a word. */
+	uint16_t val;
+
+	asm volatile("dc zva, %0\n"
+			"dsb ish\n"
+			:: "r" (test_gva));
+	val = READ_ONCE(*(uint16_t *)test_gva);
+	GUEST_ASSERT_EQ(val, 0);
+}
+
+static void guest_test_ld_preidx(void)
+{
+	uint64_t val;
+	uint64_t addr = test_gva - 8;
+
+	/*
+	 * This ends up accessing "test_gva + 8 - 8", where "test_gva - 8"
+	 * is not backed by a memslot.
+	 */
+	asm volatile("ldr %0, [%1, #8]!"
+			: "=r" (val), "+r" (addr));
+	GUEST_ASSERT_EQ(val, 0);
+	GUEST_ASSERT_EQ(addr, test_gva);
+}
+
+static void guest_test_st_preidx(void)
+{
+	uint64_t val = 0x0123456789ABCDEF;
+	uint64_t addr = test_gva - 8;
+
+	asm volatile("str %0, [%1, #8]!"
+			: "+r" (val), "+r" (addr));
+
+	GUEST_ASSERT_EQ(addr, test_gva);
+	val = READ_ONCE(*(uint64_t *)test_gva);
+}
+
+static bool guest_set_ha(void)
+{
+	uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
+	uint64_t hadbs = mmfr1 & 6;
+	uint64_t tcr;
+
+	/* Skip if HA is not supported. */
+	if (hadbs == 0)
+		return false;
+
+	tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
+	write_sysreg(tcr, tcr_el1);
+	isb();
+
+	return true;
+}
+
+static bool guest_clear_pte_af(void)
+{
+	*((uint64_t *)pte_gva) &= ~PTE_AF;
+	flush_tlb_page(pte_gva);
+
+	return true;
+}
+
+static void guest_check_pte_af(void)
+{
+	flush_tlb_page(pte_gva);
+	GUEST_ASSERT_EQ(*((uint64_t *)pte_gva) & PTE_AF, PTE_AF);
+}
+
+static void guest_test_exec(void)
+{
+	int (*code)(void) = (int (*)(void))test_exec_gva;
+	int ret;
+
+	ret = code();
+	GUEST_ASSERT_EQ(ret, 0x77);
+}
+
+static bool guest_prepare(struct test_desc *test)
+{
+	bool (*prepare_fn)(void);
+	int i;
+
+	for (i = 0; i < PREPARE_FN_NR; i++) {
+		prepare_fn = test->guest_prepare[i];
+		if (prepare_fn && !prepare_fn())
+			return false;
+	}
+
+	return true;
+}
+
+static void guest_test_check(struct test_desc *test)
+{
+	void (*check_fn)(void);
+	int i;
+
+	for (i = 0; i < CHECK_FN_NR; i++) {
+		check_fn = test->guest_test_check[i];
+		if (!check_fn)
+			continue;
+		check_fn();
+	}
+}
+
+static void guest_code(struct test_desc *test)
+{
+	if (!test->guest_test)
+		test->guest_test = guest_test_nop;
+
+	if (!guest_prepare(test))
+		GUEST_SYNC(CMD_SKIP_TEST);
+
+	GUEST_SYNC(test->mem_mark_cmd);
+	test->guest_test();
+
+	guest_test_check(test);
+	GUEST_DONE();
+}
+
+static void no_dabt_handler(struct ex_regs *regs)
+{
+	GUEST_ASSERT_1(false, read_sysreg(far_el1));
+}
+
+static void no_iabt_handler(struct ex_regs *regs)
+{
+	GUEST_ASSERT_1(false, regs->pc);
+}
+
+static void punch_hole_in_memslot(struct kvm_vm *vm,
+		struct memslot_desc *memslot)
+{
+	int ret, fd;
+	void *hva;
+
+	fd = vm_mem_region_get_src_fd(vm, memslot->idx);
+	if (fd != -1) {
+		ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+				0, memslot->size);
+		TEST_ASSERT(ret == 0, "fallocate failed, errno: %d\n", errno);
+	} else {
+		hva = addr_gpa2hva(vm, memslot->gpa);
+		ret = madvise(hva, memslot->size, MADV_DONTNEED);
+		TEST_ASSERT(ret == 0, "madvise failed, errno: %d\n", errno);
+	}
+}
+
+static void handle_cmd(struct kvm_vm *vm, int cmd)
+{
+	if (cmd & CMD_HOLE_PT)
+		punch_hole_in_memslot(vm, &memslot[PT]);
+	if (cmd & CMD_HOLE_TEST)
+		punch_hole_in_memslot(vm, &memslot[TEST]);
+}
+
+static void sync_stats_from_guest(struct kvm_vm *vm)
+{
+	struct event_cnt *ec = addr_gva2hva(vm, (uint64_t)&events);
+
+	events.aborts += ec->aborts;
+}
+
+void fail_vcpu_run_no_handler(int ret)
+{
+	TEST_FAIL("Unexpected vcpu run failure\n");
+}
+
+static uint64_t get_total_guest_pages(enum vm_guest_mode mode,
+		struct test_params *p)
+{
+	uint64_t large_page_size = get_backing_src_pagesz(p->src_type);
+	uint64_t guest_page_size = vm_guest_mode_params[mode].page_size;
+	uint64_t size;
+
+	size = PT_MEMSLOT_BACKING_SRC_NPAGES * large_page_size;
+	size += TEST_MEMSLOT_BACKING_SRC_NPAGES * large_page_size;
+
+	return size / guest_page_size;
+}
+
+static void load_exec_code_for_test(void)
+{
+	uint32_t *code;
+
+	/* Write this "code" into test_exec_gva */
+	assert(test_exec_gva - test_gva);
+	code = memslot[TEST].hva + 8;
+
+	code[0] = MOV_X0(0x77);
+	code[1] = RET;
+}
+
+static void setup_guest_args(struct kvm_vm *vm, struct test_desc *test)
+{
+	vm_vaddr_t test_desc_gva;
+
+	test_desc_gva = vm_vaddr_alloc_page(vm);
+	memcpy(addr_gva2hva(vm, test_desc_gva), test,
+			sizeof(struct test_desc));
+	vcpu_args_set(vm, 0, 1, test_desc_gva);
+}
+
+static void setup_abort_handlers(struct kvm_vm *vm, struct test_desc *test)
+{
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+	if (!test->dabt_handler)
+		test->dabt_handler = no_dabt_handler;
+	if (!test->iabt_handler)
+		test->iabt_handler = no_iabt_handler;
+	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+			0x25, test->dabt_handler);
+	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+			0x21, test->iabt_handler);
+}
+
+static void setup_memslots(struct kvm_vm *vm, enum vm_guest_mode mode,
+		struct test_params *p)
+{
+	uint64_t large_page_size = get_backing_src_pagesz(p->src_type);
+	uint64_t guest_page_size = vm_guest_mode_params[mode].page_size;
+	struct test_desc *test = p->test_desc;
+	uint64_t hole_gpa;
+	uint64_t alignment;
+	int i;
+
+	/* Calculate the test and PT memslot sizes */
+	for (i = 0; i < NR_MEMSLOTS; i++) {
+		memslot[i].size = large_page_size * memslot[i].backing_pages;
+		memslot[i].guest_pages = memslot[i].size / guest_page_size;
+		memslot[i].src_type = p->src_type;
+	}
+
+	TEST_ASSERT(memslot[TEST].size >= guest_page_size,
+			"The test memslot should have space one guest page.\n");
+	TEST_ASSERT(memslot[PT].size >= (4 * guest_page_size),
+			"The PT memslot sould have space for 4 guest pages.\n");
+
+	/* Place the memslots GPAs at the end of physical memory */
+	alignment = max(large_page_size, guest_page_size);
+	memslot[TEST].gpa = (vm_get_max_gfn(vm) - memslot[TEST].guest_pages) *
+		guest_page_size;
+	memslot[TEST].gpa = align_down(memslot[TEST].gpa, alignment);
+	/* Add a 1-guest_page-hole between the two memslots */
+	hole_gpa = memslot[TEST].gpa - guest_page_size;
+	virt_pg_map(vm, test_gva - guest_page_size, hole_gpa);
+	memslot[PT].gpa = hole_gpa - (memslot[PT].guest_pages *
+			guest_page_size);
+	memslot[PT].gpa = align_down(memslot[PT].gpa, alignment);
+
+	/* Create memslots for and test data and a PTE. */
+	vm_userspace_mem_region_add(vm, p->src_type, memslot[PT].gpa,
+			memslot[PT].idx, memslot[PT].guest_pages,
+			test->pt_memslot_flags);
+	vm_userspace_mem_region_add(vm, p->src_type, memslot[TEST].gpa,
+			memslot[TEST].idx, memslot[TEST].guest_pages,
+			test->test_memslot_flags);
+
+	for (i = 0; i < NR_MEMSLOTS; i++)
+		memslot[i].hva = addr_gpa2hva(vm, memslot[i].gpa);
+
+	/* Map the test test_gva using the PT memslot. */
+	_virt_pg_map(vm, test_gva, memslot[TEST].gpa,
+			4 /* NORMAL (See DEFAULT_MAIR_EL1) */,
+			TEST_PT_SLOT_INDEX);
+
+	/*
+	 * Find the PTE of the test page and map it in the guest so it can
+	 * clear the AF.
+	 */
+	pte_gpa = vm_get_pte_gpa(vm, test_gva);
+	TEST_ASSERT(memslot[PT].gpa <= pte_gpa &&
+			pte_gpa < (memslot[PT].gpa + memslot[PT].size),
+			"The EPT should be in the PT memslot.");
+	/* This is an artibrary requirement just to make things simpler. */
+	TEST_ASSERT(pte_gpa % guest_page_size == 0,
+			"The pte_gpa (%p) should be aligned to the guest page (%lx).",
+			(void *)pte_gpa, guest_page_size);
+	virt_pg_map(vm, pte_gva, pte_gpa);
+}
+
+static void check_event_counts(struct test_desc *test)
+{
+	ASSERT_EQ(test->expected_events.aborts,	events.aborts);
+}
+
+static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
+{
+	struct test_desc *test = p->test_desc;
+
+	pr_debug("Test: %s\n", test->name);
+	pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+	pr_debug("Testing memory backing src type: %s\n",
+			vm_mem_backing_src_alias(p->src_type)->name);
+}
+
+static void reset_event_counts(void)
+{
+	memset(&events, 0, sizeof(events));
+}
+
+static bool vcpu_run_loop(struct kvm_vm *vm, struct test_desc *test)
+{
+	bool skip_test = false;
+	struct ucall uc;
+	int stage;
+
+	for (stage = 0; ; stage++) {
+		vcpu_run(vm, VCPU_ID);
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_SYNC:
+			if (uc.args[1] == CMD_SKIP_TEST) {
+				pr_debug("Skipped.\n");
+				skip_test = true;
+				goto done;
+			}
+			handle_cmd(vm, uc.args[1]);
+			break;
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx",
+				(const char *)uc.args[0],
+				__FILE__, uc.args[1], uc.args[2], uc.args[3]);
+			break;
+		case UCALL_DONE:
+			pr_debug("Done.\n");
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+
+done:
+	return skip_test;
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+	struct test_params *p = (struct test_params *)arg;
+	struct test_desc *test = p->test_desc;
+	struct kvm_vm *vm;
+	bool skip_test = false;
+
+	print_test_banner(mode, p);
+
+	vm = vm_create_with_vcpus(mode, 1, DEFAULT_GUEST_PHY_PAGES,
+			get_total_guest_pages(mode, p), 0, guest_code, NULL);
+	ucall_init(vm, NULL);
+
+	reset_event_counts();
+	setup_memslots(vm, mode, p);
+
+	load_exec_code_for_test();
+	setup_abort_handlers(vm, test);
+	setup_guest_args(vm, test);
+
+	if (test->guest_pre_run)
+		test->guest_pre_run(vm);
+
+	sync_global_to_guest(vm, memslot);
+
+	skip_test = vcpu_run_loop(vm, test);
+
+	sync_stats_from_guest(vm);
+	ucall_uninit(vm);
+	kvm_vm_free(vm);
+
+	if (!skip_test)
+		check_event_counts(test);
+}
+
+static void for_each_test_and_guest_mode(void (*func)(enum vm_guest_mode, void *),
+		enum vm_mem_backing_src_type src_type);
+
+static void help(char *name)
+{
+	puts("");
+	printf("usage: %s [-h] [-s mem-type]\n", name);
+	puts("");
+	guest_modes_help();
+	backing_src_help("-s");
+	puts("");
+}
+
+int main(int argc, char *argv[])
+{
+	enum vm_mem_backing_src_type src_type;
+	int opt;
+
+	setbuf(stdout, NULL);
+
+	src_type = DEFAULT_VM_MEM_SRC;
+
+	guest_modes_append_default();
+
+	while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
+		switch (opt) {
+		case 'm':
+			guest_modes_cmdline(optarg);
+			break;
+		case 's':
+			src_type = parse_backing_src_type(optarg);
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			exit(0);
+		}
+	}
+
+	for_each_test_and_guest_mode(run_test, src_type);
+	return 0;
+}
+
+#define SNAME(s)		#s
+#define SCAT(a, b)		SNAME(a ## _ ## b)
+
+#define TEST_BASIC_ACCESS(__a, ...)						\
+{										\
+	.name			= SNAME(BASIC_ACCESS ## _ ## __a),		\
+	.guest_test		= __a,						\
+	.expected_events	= { 0 },					\
+	__VA_ARGS__								\
+}
+
+#define __AF_TEST_ARGS								\
+	.guest_prepare		= { guest_set_ha, guest_clear_pte_af, },	\
+	.guest_test_check	= { guest_check_pte_af, },			\
+
+#define __AF_LSE_TEST_ARGS							\
+	.guest_prepare		= { guest_set_ha, guest_clear_pte_af,		\
+				    guest_check_lse, },				\
+	.guest_test_check	= { guest_check_pte_af, },			\
+
+#define __PREPARE_LSE_TEST_ARGS							\
+	.guest_prepare		= { guest_check_lse, },
+
+#define TEST_HW_ACCESS_FLAG(__a)						\
+	TEST_BASIC_ACCESS(__a, __AF_TEST_ARGS)
+
+#define TEST_ACCESS_ON_HOLE_NO_FAULTS(__a, ...)					\
+{										\
+	.name			= SNAME(ACCESS_ON_HOLE_NO_FAULTS ## _ ## __a),	\
+	.guest_test		= __a,						\
+	.mem_mark_cmd		= CMD_HOLE_TEST,				\
+	.expected_events	= { 0 },					\
+	__VA_ARGS__								\
+}
+
+static struct test_desc tests[] = {
+	/* Check that HW is setting the AF (sanity checks). */
+	TEST_HW_ACCESS_FLAG(guest_test_read64),
+	TEST_HW_ACCESS_FLAG(guest_test_ld_preidx),
+	TEST_BASIC_ACCESS(guest_test_cas, __AF_LSE_TEST_ARGS),
+	TEST_HW_ACCESS_FLAG(guest_test_write64),
+	TEST_HW_ACCESS_FLAG(guest_test_st_preidx),
+	TEST_HW_ACCESS_FLAG(guest_test_dc_zva),
+	TEST_HW_ACCESS_FLAG(guest_test_exec),
+
+	/* Accessing a hole shouldn't fault (more sanity checks). */
+	TEST_ACCESS_ON_HOLE_NO_FAULTS(guest_test_read64),
+	TEST_ACCESS_ON_HOLE_NO_FAULTS(guest_test_cas, __PREPARE_LSE_TEST_ARGS),
+	TEST_ACCESS_ON_HOLE_NO_FAULTS(guest_test_ld_preidx),
+	TEST_ACCESS_ON_HOLE_NO_FAULTS(guest_test_write64),
+	TEST_ACCESS_ON_HOLE_NO_FAULTS(guest_test_at),
+	TEST_ACCESS_ON_HOLE_NO_FAULTS(guest_test_dc_zva),
+	TEST_ACCESS_ON_HOLE_NO_FAULTS(guest_test_st_preidx),
+
+	{ 0 },
+};
+
+static void for_each_test_and_guest_mode(
+		void (*func)(enum vm_guest_mode m, void *a),
+		enum vm_mem_backing_src_type src_type)
+{
+	struct test_desc *t;
+
+	for (t = &tests[0]; t->name; t++) {
+		if (t->skip)
+			continue;
+
+		struct test_params p = {
+			.src_type = src_type,
+			.test_desc = t,
+		};
+
+		for_each_guest_mode(run_test, &p);
+	}
+}