diff mbox series

[kvm-unit-tests,3/3] arm64: Add cache code generation test

Message ID 20190930142508.25102-4-alexandru.elisei@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: Add code generation test | expand

Commit Message

Alexandru Elisei Sept. 30, 2019, 2:25 p.m. UTC
Caches are a misterious creature on arm64, requiring a more hands-on
approach from the programmer than on x86. When generating code, two cache
maintenance operations are generally required: an invalidation for the
stale instruction and a clean to the PoU (Point of Unification) for the new
instruction. Fortunately, the ARM architecture has features to alleviate
some of this overhead, which are advertised via the IDC and DIC bits in
CTR_EL0: if IDC is 1, then the dcache clean is not required, and if DIC is
1, the icache invalidation can be absent. KVM exposes these bits to the
guest.

Until Linux v4.16.1, KVM performed an icache invalidation each time a stage
2 page was mapped. This was then optimized so that the icache invalidation
was performed when the guest tried to execute code from the page for the
first time. And that was optimized again when support for the DIC bit was
added to KVM.

The interactions between a guest that is generating code, the stage 2
tables and the IDC and DIC bits can be subtle, especially when KVM
optimizations come into play. Let's add a test that generates a few
instructions and checks that KVM indeed honors those bits.

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---
 arm/Makefile.arm64 |   1 +
 arm/cache.c        | 122 +++++++++++++++++++++++++++++++++++++++++++++
 arm/unittests.cfg  |   6 +++
 3 files changed, 129 insertions(+)
 create mode 100644 arm/cache.c

Comments

Andrew Jones Sept. 30, 2019, 3:10 p.m. UTC | #1
On Mon, Sep 30, 2019 at 03:25:08PM +0100, Alexandru Elisei wrote:
> Caches are a misterious creature on arm64, requiring a more hands-on
> approach from the programmer than on x86. When generating code, two cache
> maintenance operations are generally required: an invalidation for the
> stale instruction and a clean to the PoU (Point of Unification) for the new
> instruction. Fortunately, the ARM architecture has features to alleviate
> some of this overhead, which are advertised via the IDC and DIC bits in
> CTR_EL0: if IDC is 1, then the dcache clean is not required, and if DIC is
> 1, the icache invalidation can be absent. KVM exposes these bits to the
> guest.
> 
> Until Linux v4.16.1, KVM performed an icache invalidation each time a stage
> 2 page was mapped. This was then optimized so that the icache invalidation
> was performed when the guest tried to execute code from the page for the
> first time. And that was optimized again when support for the DIC bit was
> added to KVM.
> 
> The interactions between a guest that is generating code, the stage 2
> tables and the IDC and DIC bits can be subtle, especially when KVM
> optimizations come into play. Let's add a test that generates a few
> instructions and checks that KVM indeed honors those bits.
> 
> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> ---
>  arm/Makefile.arm64 |   1 +
>  arm/cache.c        | 122 +++++++++++++++++++++++++++++++++++++++++++++
>  arm/unittests.cfg  |   6 +++
>  3 files changed, 129 insertions(+)
>  create mode 100644 arm/cache.c
> 
> diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64
> index 35de5ea333b4..6d3dc2c4a464 100644
> --- a/arm/Makefile.arm64
> +++ b/arm/Makefile.arm64
> @@ -25,6 +25,7 @@ OBJDIRS += lib/arm64
>  # arm64 specific tests
>  tests = $(TEST_DIR)/timer.flat
>  tests += $(TEST_DIR)/micro-bench.flat
> +tests += $(TEST_DIR)/cache.flat
>  
>  include $(SRCDIR)/$(TEST_DIR)/Makefile.common
>  
> diff --git a/arm/cache.c b/arm/cache.c
> new file mode 100644
> index 000000000000..2939b85a8c9a
> --- /dev/null
> +++ b/arm/cache.c
> @@ -0,0 +1,122 @@
> +#include <libcflat.h>
> +#include <alloc_page.h>
> +#include <asm/mmu.h>
> +#include <asm/processor.h>
> +
> +#define NTIMES			(1 << 16)
> +
> +#define CTR_DIC			(1UL << 29)
> +#define CTR_IDC			(1UL << 28)
> +
> +#define CLIDR_LOC_SHIFT		24
> +#define CLIDR_LOC_MASK		(7UL << CLIDR_LOC_SHIFT)
> +#define CLIDR_LOUU_SHIFT	27
> +#define CLIDR_LOUU_MASK		(7UL << CLIDR_LOUU_SHIFT)
> +#define CLIDR_LOUIS_SHIFT	21
> +#define CLIDR_LOUIS_MASK	(7UL << CLIDR_LOUIS_SHIFT)
> +
> +#define RET			0xd65f03c0
> +#define MOV_X0(x)		(0xd2800000 | (((x) & 0xffff) << 5))
> +
> +#define clean_dcache_pou(addr)			\
> +	asm volatile("dc cvau, %0\n" :: "r" (addr) : "memory")
> +#define inval_icache_pou(addr)			\
> +	asm volatile("ic ivau, %0\n" :: "r" (addr) : "memory")
> +
> +typedef int (*fn_t)(void);
> +
> +static inline void prime_icache(u32 *code, u32 insn)
> +{
> +	*code = insn;
> +	/* This is the sequence recommended in ARM DDI 0487E.a, page B2-136. */
> +	clean_dcache_pou(code);
> +	dsb(ish);
> +	inval_icache_pou(code);
> +	dsb(ish);
> +	isb();
> +
> +	((fn_t)code)();
> +}
> +
> +static void check_code_generation(bool dcache_clean, bool icache_inval)
> +{
> +	u32 fn[] = {MOV_X0(0x42), RET};
> +	u32 *code = alloc_page();
> +	unsigned long sctlr;
> +	int i, ret;
> +	bool success;
> +
> +	/* Make sure we can execute from a writable page */
> +	mmu_clear_user((unsigned long)code);
> +
> +	sctlr = read_sysreg(sctlr_el1);
> +	if (sctlr & SCTLR_EL1_WXN) {
> +		sctlr &= ~SCTLR_EL1_WXN;
> +		write_sysreg(sctlr, sctlr_el1);
> +		isb();
> +		/* SCTLR_EL1.WXN is permitted to be cached in a TLB. */
> +		flush_tlb_all();
> +	}
> +
> +	for (i = 0; i < ARRAY_SIZE(fn); i++) {
> +		*(code + i) = fn[i];
> +		clean_dcache_pou(code + i);
> +		dsb(ish);
> +		inval_icache_pou(code + i);
> +	}
> +	dsb(ish);
> +	isb();
> +
> +	/* Sanity check */
> +	((fn_t)code)();
> +
> +	success = true;
> +	for (i = 0; i < NTIMES; i++) {
> +		prime_icache(code, MOV_X0(0x42));
> +		*code = MOV_X0(0x66);
> +		if (dcache_clean)
> +			clean_dcache_pou(code);
> +		if (icache_inval) {
> +			if (dcache_clean)
> +				dsb(ish);
> +			inval_icache_pou(code);
> +		}
> +		dsb(ish);
> +		isb();
> +
> +		ret = ((fn_t)code)();
> +		success &= (ret == 0x66);
> +	}
> +
> +	report("code generation", success);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	u64 ctr, clidr;
> +	bool dcache_clean, icache_inval;
> +
> +	report_prefix_push("IDC-DIC");
> +
> +	ctr = read_sysreg(ctr_el0);
> +	dcache_clean = !(ctr & CTR_IDC);
> +	icache_inval = !(ctr & CTR_DIC);
> +
> +	if (dcache_clean) {
> +		clidr = read_sysreg(clidr_el1);
> +		if ((clidr & CLIDR_LOC_MASK) == 0)
> +			dcache_clean = false;
> +		if ((clidr & CLIDR_LOUU_MASK) == 0 &&
> +		    (clidr & CLIDR_LOUIS_MASK) == 0)
> +			dcache_clean = false;
> +	}
> +
> +	if (dcache_clean)
> +		report_info("dcache clean to PoU required");
> +	if (icache_inval)
> +		report_info("icache invalidation to PoU required");
> +
> +	check_code_generation(dcache_clean, icache_inval);
> +
> +	return report_summary();
> +}
> diff --git a/arm/unittests.cfg b/arm/unittests.cfg
> index 6d3df92a4e28..37f07788c5f0 100644
> --- a/arm/unittests.cfg
> +++ b/arm/unittests.cfg
> @@ -142,3 +142,9 @@ smp = 2
>  groups = nodefault,micro-bench
>  accel = kvm
>  arch = arm64
> +
> +# Cache emulation tests
> +[cache]
> +file = cache.flat
> +arch = arm64
> +group = cache

s/group/groups/

Besides this 'groups' typo

Reviewed-by: Andrew Jones <drjones@redhat.com>
diff mbox series

Patch

diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64
index 35de5ea333b4..6d3dc2c4a464 100644
--- a/arm/Makefile.arm64
+++ b/arm/Makefile.arm64
@@ -25,6 +25,7 @@  OBJDIRS += lib/arm64
 # arm64 specific tests
 tests = $(TEST_DIR)/timer.flat
 tests += $(TEST_DIR)/micro-bench.flat
+tests += $(TEST_DIR)/cache.flat
 
 include $(SRCDIR)/$(TEST_DIR)/Makefile.common
 
diff --git a/arm/cache.c b/arm/cache.c
new file mode 100644
index 000000000000..2939b85a8c9a
--- /dev/null
+++ b/arm/cache.c
@@ -0,0 +1,122 @@ 
+#include <libcflat.h>
+#include <alloc_page.h>
+#include <asm/mmu.h>
+#include <asm/processor.h>
+
+#define NTIMES			(1 << 16)
+
+#define CTR_DIC			(1UL << 29)
+#define CTR_IDC			(1UL << 28)
+
+#define CLIDR_LOC_SHIFT		24
+#define CLIDR_LOC_MASK		(7UL << CLIDR_LOC_SHIFT)
+#define CLIDR_LOUU_SHIFT	27
+#define CLIDR_LOUU_MASK		(7UL << CLIDR_LOUU_SHIFT)
+#define CLIDR_LOUIS_SHIFT	21
+#define CLIDR_LOUIS_MASK	(7UL << CLIDR_LOUIS_SHIFT)
+
+#define RET			0xd65f03c0
+#define MOV_X0(x)		(0xd2800000 | (((x) & 0xffff) << 5))
+
+#define clean_dcache_pou(addr)			\
+	asm volatile("dc cvau, %0\n" :: "r" (addr) : "memory")
+#define inval_icache_pou(addr)			\
+	asm volatile("ic ivau, %0\n" :: "r" (addr) : "memory")
+
+typedef int (*fn_t)(void);
+
+static inline void prime_icache(u32 *code, u32 insn)
+{
+	*code = insn;
+	/* This is the sequence recommended in ARM DDI 0487E.a, page B2-136. */
+	clean_dcache_pou(code);
+	dsb(ish);
+	inval_icache_pou(code);
+	dsb(ish);
+	isb();
+
+	((fn_t)code)();
+}
+
+static void check_code_generation(bool dcache_clean, bool icache_inval)
+{
+	u32 fn[] = {MOV_X0(0x42), RET};
+	u32 *code = alloc_page();
+	unsigned long sctlr;
+	int i, ret;
+	bool success;
+
+	/* Make sure we can execute from a writable page */
+	mmu_clear_user((unsigned long)code);
+
+	sctlr = read_sysreg(sctlr_el1);
+	if (sctlr & SCTLR_EL1_WXN) {
+		sctlr &= ~SCTLR_EL1_WXN;
+		write_sysreg(sctlr, sctlr_el1);
+		isb();
+		/* SCTLR_EL1.WXN is permitted to be cached in a TLB. */
+		flush_tlb_all();
+	}
+
+	for (i = 0; i < ARRAY_SIZE(fn); i++) {
+		*(code + i) = fn[i];
+		clean_dcache_pou(code + i);
+		dsb(ish);
+		inval_icache_pou(code + i);
+	}
+	dsb(ish);
+	isb();
+
+	/* Sanity check */
+	((fn_t)code)();
+
+	success = true;
+	for (i = 0; i < NTIMES; i++) {
+		prime_icache(code, MOV_X0(0x42));
+		*code = MOV_X0(0x66);
+		if (dcache_clean)
+			clean_dcache_pou(code);
+		if (icache_inval) {
+			if (dcache_clean)
+				dsb(ish);
+			inval_icache_pou(code);
+		}
+		dsb(ish);
+		isb();
+
+		ret = ((fn_t)code)();
+		success &= (ret == 0x66);
+	}
+
+	report("code generation", success);
+}
+
+int main(int argc, char **argv)
+{
+	u64 ctr, clidr;
+	bool dcache_clean, icache_inval;
+
+	report_prefix_push("IDC-DIC");
+
+	ctr = read_sysreg(ctr_el0);
+	dcache_clean = !(ctr & CTR_IDC);
+	icache_inval = !(ctr & CTR_DIC);
+
+	if (dcache_clean) {
+		clidr = read_sysreg(clidr_el1);
+		if ((clidr & CLIDR_LOC_MASK) == 0)
+			dcache_clean = false;
+		if ((clidr & CLIDR_LOUU_MASK) == 0 &&
+		    (clidr & CLIDR_LOUIS_MASK) == 0)
+			dcache_clean = false;
+	}
+
+	if (dcache_clean)
+		report_info("dcache clean to PoU required");
+	if (icache_inval)
+		report_info("icache invalidation to PoU required");
+
+	check_code_generation(dcache_clean, icache_inval);
+
+	return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index 6d3df92a4e28..37f07788c5f0 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -142,3 +142,9 @@  smp = 2
 groups = nodefault,micro-bench
 accel = kvm
 arch = arm64
+
+# Cache emulation tests
+[cache]
+file = cache.flat
+arch = arm64
+group = cache