@@ -10,9 +10,17 @@ arch_LDFLAGS = -pie -n
arch_LDFLAGS += -z notext
CFLAGS += -mstrict-align
+sve_flag := $(call cc-option, -march=armv8.5-a+sve, "")
+ifneq ($(strip $(sve_flag)),)
+# Don't pass the option to the compiler, we don't
+# want the compiler to generate SVE instructions.
+CFLAGS += -DCC_HAS_SVE
+endif
+
mno_outline_atomics := $(call cc-option, -mno-outline-atomics, "")
CFLAGS += $(mno_outline_atomics)
CFLAGS += -DCONFIG_RELOC
+CFLAGS += -mgeneral-regs-only
define arch_elf_check =
$(if $(shell ! $(READELF) -rW $(1) >&/dev/null && echo "nok"),
@@ -49,6 +57,7 @@ tests = $(TEST_DIR)/timer.$(exe)
tests += $(TEST_DIR)/micro-bench.$(exe)
tests += $(TEST_DIR)/cache.$(exe)
tests += $(TEST_DIR)/debug.$(exe)
+tests += $(TEST_DIR)/fpu.$(exe)
tests += $(TEST_DIR)/realm-rsi.$(exe)
include $(SRCDIR)/$(TEST_DIR)/Makefile.common
@@ -12,6 +12,7 @@
#include <asm/ptrace.h>
#include <asm/page.h>
#include <asm/pgtable-hwdef.h>
+#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/sysreg.h>
#include <asm/smc-rsi.h>
new file mode 100644
@@ -0,0 +1,424 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Limited.
+ * All rights reserved.
+ */
+
+#include <libcflat.h>
+#include <asm/smp.h>
+#include <stdlib.h>
+
+#include <asm/rsi.h>
+
+#define CPU0_ID 0
+#define CPU1_ID (CPU0_ID + 1)
+#define CPUS_MAX (CPU1_ID + 1)
+#define FPU_QREG_MAX 32
+#define FPU_RESULT_PASS (-1U)
+
+/*
+ * Write 8 bytes of random data in random. Returns true on success, false on
+ * failure.
+ */
+static inline bool arch_collect_entropy(uint64_t *random)
+{
+ unsigned long ret;
+
+ asm volatile(
+ " mrs %[ptr], " xstr(RNDR) "\n"
+ " cset %[ret], ne\n" /* RNDR sets NZCV to 0b0100 on failure */
+ :
+ [ret] "=r" (ret),
+ [ptr] "=r" (*random)
+ :
+ : "cc"
+ );
+
+ return ret == 1;
+}
+
+#define fpu_reg_read(val) \
+({ \
+ uint64_t *__val = (val); \
+ asm volatile("stp q0, q1, [%0], #32\n\t" \
+ "stp q2, q3, [%0], #32\n\t" \
+ "stp q4, q5, [%0], #32\n\t" \
+ "stp q6, q7, [%0], #32\n\t" \
+ "stp q8, q9, [%0], #32\n\t" \
+ "stp q10, q11, [%0], #32\n\t" \
+ "stp q12, q13, [%0], #32\n\t" \
+ "stp q14, q15, [%0], #32\n\t" \
+ "stp q16, q17, [%0], #32\n\t" \
+ "stp q18, q19, [%0], #32\n\t" \
+ "stp q20, q21, [%0], #32\n\t" \
+ "stp q22, q23, [%0], #32\n\t" \
+ "stp q24, q25, [%0], #32\n\t" \
+ "stp q26, q27, [%0], #32\n\t" \
+ "stp q28, q29, [%0], #32\n\t" \
+ "stp q30, q31, [%0], #32\n\t" \
+ : "=r" (__val) \
+ : \
+ : "q0", "q1", "q2", "q3", \
+ "q4", "q5", "q6", "q7", \
+ "q8", "q9", "q10", "q11", \
+ "q12", "q13", "q14", \
+ "q15", "q16", "q17", \
+ "q18", "q19", "q20", \
+ "q21", "q22", "q23", \
+ "q24", "q25", "q26", \
+ "q27", "q28", "q29", \
+ "q30", "q31", "memory"); \
+})
+
+#define fpu_reg_write(val) \
+do { \
+ uint64_t *__val = (val); \
+ asm volatile("ldp q0, q1, [%0], #32\n\t" \
+ "ldp q2, q3, [%0], #32\n\t" \
+ "ldp q4, q5, [%0], #32\n\t" \
+ "ldp q6, q7, [%0], #32\n\t" \
+ "ldp q8, q9, [%0], #32\n\t" \
+ "ldp q10, q11, [%0], #32\n\t" \
+ "ldp q12, q13, [%0], #32\n\t" \
+ "ldp q14, q15, [%0], #32\n\t" \
+ "ldp q16, q17, [%0], #32\n\t" \
+ "ldp q18, q19, [%0], #32\n\t" \
+ "ldp q20, q21, [%0], #32\n\t" \
+ "ldp q22, q23, [%0], #32\n\t" \
+ "ldp q24, q25, [%0], #32\n\t" \
+ "ldp q26, q27, [%0], #32\n\t" \
+ "ldp q28, q29, [%0], #32\n\t" \
+ "ldp q30, q31, [%0], #32\n\t" \
+ : \
+ : "r" (__val) \
+ : "q0", "q1", "q2", "q3", \
+ "q4", "q5", "q6", "q7", \
+ "q8", "q9", "q10", "q11", \
+ "q12", "q13", "q14", \
+ "q15", "q16", "q17", \
+ "q18", "q19", "q20", \
+ "q21", "q22", "q23", \
+ "q24", "q25", "q26", \
+ "q27", "q28", "q29", \
+ "q30", "q31", "memory"); \
+} while (0)
+
+#ifdef CC_HAS_SVE
+#define sve_reg_read(val) \
+({ \
+ uint64_t *__val = (val); \
+ asm volatile(".arch_extension sve\n" \
+ "str z0, [%0, #0, MUL VL]\n" \
+ "str z1, [%0, #1, MUL VL]\n" \
+ "str z2, [%0, #2, MUL VL]\n" \
+ "str z3, [%0, #3, MUL VL]\n" \
+ "str z4, [%0, #4, MUL VL]\n" \
+ "str z5, [%0, #5, MUL VL]\n" \
+ "str z6, [%0, #6, MUL VL]\n" \
+ "str z7, [%0, #7, MUL VL]\n" \
+ "str z8, [%0, #8, MUL VL]\n" \
+ "str z9, [%0, #9, MUL VL]\n" \
+ "str z10, [%0, #10, MUL VL]\n" \
+ "str z11, [%0, #11, MUL VL]\n" \
+ "str z12, [%0, #12, MUL VL]\n" \
+ "str z13, [%0, #13, MUL VL]\n" \
+ "str z14, [%0, #14, MUL VL]\n" \
+ "str z15, [%0, #15, MUL VL]\n" \
+ "str z16, [%0, #16, MUL VL]\n" \
+ "str z17, [%0, #17, MUL VL]\n" \
+ "str z18, [%0, #18, MUL VL]\n" \
+ "str z19, [%0, #19, MUL VL]\n" \
+ "str z20, [%0, #20, MUL VL]\n" \
+ "str z21, [%0, #21, MUL VL]\n" \
+ "str z22, [%0, #22, MUL VL]\n" \
+ "str z23, [%0, #23, MUL VL]\n" \
+ "str z24, [%0, #24, MUL VL]\n" \
+ "str z25, [%0, #25, MUL VL]\n" \
+ "str z26, [%0, #26, MUL VL]\n" \
+ "str z27, [%0, #27, MUL VL]\n" \
+ "str z28, [%0, #28, MUL VL]\n" \
+ "str z29, [%0, #29, MUL VL]\n" \
+ "str z30, [%0, #30, MUL VL]\n" \
+ "str z31, [%0, #31, MUL VL]\n" \
+ : "=r" (__val) \
+ : \
+ : "z0", "z1", "z2", "z3", \
+ "z4", "z5", "z6", "z7", \
+ "z8", "z9", "z10", "z11", \
+ "z12", "z13", "z14", \
+ "z15", "z16", "z17", \
+ "z18", "z19", "z20", \
+ "z21", "z22", "z23", \
+ "z24", "z25", "z26", \
+ "z27", "z28", "z29", \
+ "z30", "z31", "memory"); \
+})
+
+#define sve_reg_write(val) \
+({ \
+ uint64_t *__val = (val); \
+ asm volatile(".arch_extension sve\n" \
+ "ldr z0, [%0, #0, MUL VL]\n" \
+ "ldr z1, [%0, #1, MUL VL]\n" \
+ "ldr z2, [%0, #2, MUL VL]\n" \
+ "ldr z3, [%0, #3, MUL VL]\n" \
+ "ldr z4, [%0, #4, MUL VL]\n" \
+ "ldr z5, [%0, #5, MUL VL]\n" \
+ "ldr z6, [%0, #6, MUL VL]\n" \
+ "ldr z7, [%0, #7, MUL VL]\n" \
+ "ldr z8, [%0, #8, MUL VL]\n" \
+ "ldr z9, [%0, #9, MUL VL]\n" \
+ "ldr z10, [%0, #10, MUL VL]\n" \
+ "ldr z11, [%0, #11, MUL VL]\n" \
+ "ldr z12, [%0, #12, MUL VL]\n" \
+ "ldr z13, [%0, #13, MUL VL]\n" \
+ "ldr z14, [%0, #14, MUL VL]\n" \
+ "ldr z15, [%0, #15, MUL VL]\n" \
+ "ldr z16, [%0, #16, MUL VL]\n" \
+ "ldr z17, [%0, #17, MUL VL]\n" \
+ "ldr z18, [%0, #18, MUL VL]\n" \
+ "ldr z19, [%0, #19, MUL VL]\n" \
+ "ldr z20, [%0, #20, MUL VL]\n" \
+ "ldr z21, [%0, #21, MUL VL]\n" \
+ "ldr z22, [%0, #22, MUL VL]\n" \
+ "ldr z23, [%0, #23, MUL VL]\n" \
+ "ldr z24, [%0, #24, MUL VL]\n" \
+ "ldr z25, [%0, #25, MUL VL]\n" \
+ "ldr z26, [%0, #26, MUL VL]\n" \
+ "ldr z27, [%0, #27, MUL VL]\n" \
+ "ldr z28, [%0, #28, MUL VL]\n" \
+ "ldr z29, [%0, #29, MUL VL]\n" \
+ "ldr z30, [%0, #30, MUL VL]\n" \
+ "ldr z31, [%0, #31, MUL VL]\n" \
+ : \
+ : "r" (__val) \
+ : "z0", "z1", "z2", "z3", \
+ "z4", "z5", "z6", "z7", \
+ "z8", "z9", "z10", "z11", \
+ "z12", "z13", "z14", \
+ "z15", "z16", "z17", \
+ "z18", "z19", "z20", \
+ "z21", "z22", "z23", \
+ "z24", "z25", "z26", \
+ "z27", "z28", "z29", \
+ "z30", "z31", "memory"); \
+})
+#else
+#define sve_reg_read(val) report_abort("SVE: not supported")
+#define sve_reg_write(val) report_abort("SVE: not supported")
+#endif
+
+static void nr_cpu_check(int nr)
+{
+ if (nr_cpus < nr)
+ report_abort("At least %d cpus required", nr);
+}
+
+/**
+ * @brief check if the FPU/SIMD/SVE register contents are the same as
+ * the input data provided.
+ */
+static uint32_t __fpuregs_testall(uint64_t *indata, int sve)
+{
+ /* 128b aligned array to read data into */
+ uint64_t outdata[FPU_QREG_MAX * 2]
+ __attribute__((aligned(sizeof(__uint128_t)))) = {
+ [0 ... ((FPU_QREG_MAX * 2) - 1)] = 0 };
+ uint8_t regcnt = 0;
+ uint32_t result = 0;
+
+ if (indata == NULL)
+ report_abort("invalid data pointer received");
+
+ /* Read data from FPU/SVE registers */
+ if (sve)
+ sve_reg_read(outdata);
+ else
+ fpu_reg_read(outdata);
+
+ /* Check is the data is the same */
+ for (regcnt = 0; regcnt < (FPU_QREG_MAX * 2); regcnt += 2) {
+ if ((outdata[regcnt] != indata[regcnt]) ||
+ (outdata[regcnt + 1] != indata[regcnt + 1])) {
+ report_info(
+ "%s save/restore failed for reg: %c%u expected: %lx_%lx received: %lx_%lx\n",
+ sve ? "SVE" : "FPU/SIMD",
+ sve ? 'z' : 'q',
+ regcnt / 2,
+ indata[regcnt + 1], indata[regcnt],
+ outdata[regcnt + 1], outdata[regcnt]);
+ } else {
+ /* populate a bitmask indicating which
+ * registers passed/failed
+ */
+ result |= (1 << (regcnt / 2));
+ }
+ }
+
+ return result;
+}
+
+/**
+ * @brief writes randomly sampled data into the FPU/SIMD registers.
+ */
+static void __fpuregs_writeall_random(uint64_t **indata, int sve)
+{
+ /* allocate 128b aligned memory */
+ *indata = memalign(sizeof(__uint128_t), sizeof(uint64_t) * FPU_QREG_MAX);
+
+ if (system_supports_rndr()) {
+ /* Populate memory with random data */
+ for (unsigned int i = 0; i < (FPU_QREG_MAX * 2); i++)
+ while (!arch_collect_entropy(&(*indata)[i])) {}
+ } else {
+ /* Populate memory with data from the counter register */
+ for (unsigned int i = 0; i < (FPU_QREG_MAX * 2); i++)
+ (*indata)[i] = get_cntvct();
+ }
+
+ /* Write data into FPU registers */
+ if (sve)
+ sve_reg_write(*indata);
+ else
+ fpu_reg_write(*indata);
+}
+
+static void fpuregs_writeall_run(void *data)
+{
+ uint64_t **indata = (uint64_t **)data;
+
+ __fpuregs_writeall_random(indata, 0);
+}
+
+static void sveregs_writeall_run(void *data)
+{
+ uint64_t **indata = (uint64_t **)data;
+
+ __fpuregs_writeall_random(indata, 1);
+}
+
+static void fpuregs_testall_run(void *data)
+{
+ uint64_t *indata = (uint64_t *)data;
+ uint32_t result = 0;
+
+ result = __fpuregs_testall(indata, 0);
+ report((result == FPU_RESULT_PASS),
+ "FPU/SIMD register save/restore mask: 0x%x", result);
+}
+
+static void sveregs_testall_run(void *data)
+{
+ uint64_t *indata = (uint64_t *)data;
+ uint32_t result = 0;
+
+ result = __fpuregs_testall(indata, 1);
+ report((result == FPU_RESULT_PASS),
+ "SVE register save/restore mask: 0x%x", result);
+}
+
+/**
+ * @brief This test uses two CPUs to test FPU/SIMD save/restore
+ * @details CPU1 writes random data into FPU/SIMD registers,
+ * CPU0 corrupts/overwrites the data and finally CPU1 checks
+ * if the data remains unchanged in its context.
+ */
+static void fpuregs_context_switch_cpu1(int sve)
+{
+ int target = CPU1_ID;
+ uint64_t *indata_remote = NULL;
+ uint64_t *indata_local = NULL;
+
+ /* write data from CPU1 */
+ on_cpu(target, sve ? sveregs_writeall_run
+ : fpuregs_writeall_run,
+ &indata_remote);
+
+ /* Overwrite from CPU0 */
+ __fpuregs_writeall_random(&indata_local, sve);
+
+ /* Check data consistency */
+ on_cpu(target, sve ? sveregs_testall_run
+ : fpuregs_testall_run,
+ indata_remote);
+
+ free(indata_remote);
+ free(indata_local);
+}
+
+/**
+ * @brief This test uses two CPUs to test FPU/SIMD save/restore
+ * @details CPU0 writes random data into FPU/SIMD registers,
+ * CPU1 corrupts/overwrites the data and finally CPU0 checks if
+ * the data remains unchanged in its context.
+ */
+static void fpuregs_context_switch_cpu0(int sve)
+{
+ int target = CPU1_ID;
+ uint64_t *indata_local = NULL;
+ uint64_t *indata_remote = NULL;
+ uint32_t result = 0;
+
+ /* write data from CPU0 */
+ __fpuregs_writeall_random(&indata_local, sve);
+
+ /* Overwrite from CPU1 */
+ on_cpu(target, sve ? sveregs_writeall_run
+ : fpuregs_writeall_run,
+ &indata_remote);
+
+ /* Check data consistency */
+ result = __fpuregs_testall(indata_local, sve);
+ report((result == FPU_RESULT_PASS),
+ "%s register save/restore mask: 0x%x", sve ? "SVE" : "FPU/SIMD", result);
+
+ free(indata_remote);
+ free(indata_local);
+}
+
+/**
+ * Checks if during context switch, FPU/SIMD registers
+ * are saved/restored.
+ */
+static void fpuregs_context_switch(void)
+{
+ fpuregs_context_switch_cpu0(0);
+ fpuregs_context_switch_cpu1(0);
+}
+
+/**
+ * Checks if during realm context switch, SVE registers
+ * are saved/restored.
+ */
+static void sveregs_context_switch(void)
+{
+ unsigned long zcr = read_sysreg(ZCR_EL1);
+
+ // Set the SVE vector length to 128-bits
+ write_sysreg(zcr & ~ZCR_EL1_LEN, ZCR_EL1);
+
+ fpuregs_context_switch_cpu0(1);
+ fpuregs_context_switch_cpu1(1);
+}
+
+static bool should_run_sve_tests(void)
+{
+#ifdef CC_HAS_SVE
+ if (system_supports_sve())
+ return true;
+#endif
+ return false;
+}
+
+int main(int argc, char **argv)
+{
+ report_prefix_pushf("fpu");
+
+ nr_cpu_check(CPUS_MAX);
+ fpuregs_context_switch();
+
+ if (should_run_sve_tests())
+ sveregs_context_switch();
+
+ return report_summary();
+}
@@ -303,3 +303,11 @@ groups = nodefault realms
extra_params = -append 'hvc'
accel = kvm
arch = arm64
+
+# FPU/SIMD test
+[fpu-context]
+file = fpu.flat
+smp = 2
+groups = nodefault realms
+accel = kvm
+arch = arm64
@@ -122,6 +122,8 @@ static inline unsigned long get_id_aa64pfr0_el1(void)
#define ID_AA64PFR0_EL1_EL3 (0xf << 12)
#define ID_AA64PFR0_EL1_EL3_NI (0x0 << 12)
+#define ID_AA64PFR0_EL1_SVE_SHIFT 32
+
static inline bool system_supports_granule(size_t granule)
{
u32 shift;
@@ -145,5 +147,29 @@ static inline bool system_supports_granule(size_t granule)
return ((mmfr0 >> shift) & 0xf) == val;
}
+static inline bool system_supports_sve(void)
+{
+ return ((get_id_aa64pfr0_el1() >> ID_AA64PFR0_EL1_SVE_SHIFT) & 0xf) != 0;
+}
+
+static inline int sve_vl(void)
+{
+ int vl;
+
+ asm volatile(".arch_extension sve\n"
+ "rdvl %0, #8"
+ : "=r" (vl));
+
+ return vl;
+}
+
+
+static inline bool system_supports_rndr(void)
+{
+ u64 id_aa64isar0_el1 = read_sysreg(ID_AA64ISAR0_EL1);
+
+ return ((id_aa64isar0_el1 >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf) != 0;
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASMARM64_PROCESSOR_H_ */
@@ -73,6 +73,8 @@ asm(
);
#endif /* __ASSEMBLY__ */
+#define ID_AA64ISAR0_EL1_RNDR_SHIFT 60
+
#define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
#define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5)
#define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
@@ -102,4 +104,9 @@ asm(
SCTLR_EL1_TSCXT | SCTLR_EL1_EIS | SCTLR_EL1_SPAN | \
SCTLR_EL1_NTLSMD | SCTLR_EL1_LSMAOE)
+#define ZCR_EL1 S3_0_C1_C2_0
+#define ZCR_EL1_LEN GENMASK(3, 0)
+
+#define RNDR S3_3_C2_C4_0
+
#endif /* _ASMARM64_SYSREG_H_ */