@@ -37,7 +37,7 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \
$(TEST_DIR)/kvmclock_test.flat $(TEST_DIR)/eventinj.flat \
$(TEST_DIR)/s3.flat $(TEST_DIR)/pmu.flat \
$(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \
- $(TEST_DIR)/init.flat
+ $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat
ifdef API
tests-common += api/api-sample
@@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o
$(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o
+$(TEST_DIR)/smap.elf: $(cstart.o) $(TEST_DIR)/smap.o
+
$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o $(TEST_DIR)/vmx_tests.o
$(TEST_DIR)/debug.elf: $(cstart.o) $(TEST_DIR)/debug.o
@@ -25,6 +25,7 @@
#define X86_CR4_PSE 0x00000010
#define X86_CR4_PAE 0x00000020
#define X86_CR4_PCIDE 0x00020000
+#define X86_CR4_SMAP 0x00200000
#define X86_IA32_EFER 0xc0000080
#define X86_EFER_LMA (1UL << 8)
@@ -39,6 +40,16 @@ static inline void barrier(void)
asm volatile ("" : : : "memory");
}
+static inline void clac(void)
+{
+ asm volatile (".byte 0x0f, 0x01, 0xca" : : : "memory");
+}
+
+static inline void stac(void)
+{
+ asm volatile (".byte 0x0f, 0x01, 0xcb" : : : "memory");
+}
+
static inline u16 read_cs(void)
{
unsigned val;
@@ -330,7 +341,7 @@ static inline void irq_enable(void)
asm volatile("sti");
}
-static inline void invlpg(void *va)
+static inline void invlpg(volatile void *va)
{
asm volatile("invlpg (%0)" ::"r" (va) : "memory");
}
new file mode 100644
@@ -0,0 +1,156 @@
+#include "libcflat.h"
+#include "lib/x86/desc.h"
+#include "lib/x86/processor.h"
+#include "lib/x86/vm.h"
+
+#define X86_FEATURE_SMAP 20
+#define X86_EFLAGS_AC (1 << 18)
+
+volatile int pf_count = 0;
+volatile int save;
+volatile unsigned test;
+
+
+// When doing ring 3 tests, page fault handlers will always run on a
+// separate stack (the ring 0 stack). Seems easier to use the alt_stack
+// mechanism for both ring 0 and ring 3.
+
+void do_pf_tss(unsigned long error_code)
+{
+ pf_count++;
+ save = test;
+
+#ifndef __x86_64__
+ tss.eflags |= X86_EFLAGS_AC;
+#endif
+}
+
+extern void pf_tss(void);
+asm ("pf_tss:\n"
+#ifdef __x86_64__
+ // no task on x86_64, save/restore caller-save regs
+ "push %rax; push %rcx; push %rdx; push %rsi; push %rdi\n"
+ "push %r8; push %r9; push %r10; push %r11\n"
+ "mov 9*8(%rsp),%rsi\n"
+#endif
+ "call do_pf_tss\n"
+#ifdef __x86_64__
+ "pop %r11; pop %r10; pop %r9; pop %r8\n"
+ "pop %rdi; pop %rsi; pop %rdx; pop %rcx; pop %rax\n"
+#endif
+ "add $"S", %"R "sp\n"
+#ifdef __x86_64__
+ "orl $" xstr(X86_EFLAGS_AC) ", 2*"S"(%"R "sp)\n" // set EFLAGS.AC and retry
+#endif
+ "iret"W" \n\t"
+ "jmp pf_tss\n\t");
+
+
+#define USER_BASE (1 << 24)
+#define USER_VAR(v) (*((__typeof__(&(v))) (((unsigned long)&v) + USER_BASE)))
+
+static void init_test(int i)
+{
+ pf_count = 0;
+ if (i) {
+ invlpg(&test);
+ invlpg(&USER_VAR(test));
+ }
+}
+
+int main(int ac, char **av)
+{
+ unsigned long i;
+
+ if (!(cpuid_indexed(7, 0).b & (1 << X86_FEATURE_SMAP))) {
+ printf("SMAP not enabled, exiting\n");
+ exit(1);
+ }
+
+ setup_vm();
+ setup_alt_stack();
+ set_intr_alt_stack(14, pf_tss);
+
+ // Map first 16MB as supervisor pages
+ for (i = 0; i < USER_BASE; i += PAGE_SIZE) {
+ *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~PTE_USER;
+ invlpg((void *)i);
+ }
+
+ // Present the same 16MB as user pages in the 16MB-32MB range
+ for (i = USER_BASE; i < 2 * USER_BASE; i += PAGE_SIZE) {
+ *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~USER_BASE;
+ invlpg((void *)i);
+ }
+
+ clac();
+ write_cr4(read_cr4() | X86_CR4_SMAP);
+ write_cr3(read_cr3());
+
+ for (i = 0; i < 2; i++) {
+ if (i)
+ printf("testing with INVLPG\n");
+ else
+ printf("testing without INVLPG\n");
+
+ init_test(i);
+ clac();
+ test = 42;
+ report("write to supervisor page", pf_count == 0 && test == 42);
+
+ init_test(i);
+ stac();
+ (void)USER_VAR(test);
+ report("read from user page with AC=1", pf_count == 0);
+
+ init_test(i);
+ clac();
+ (void)USER_VAR(test);
+ report("read from user page with AC=0", pf_count == 1 && save == 42);
+
+ init_test(i);
+ stac();
+ save = 0;
+ USER_VAR(test) = 43;
+ report("write to user page with AC=1", pf_count == 0 && test == 43);
+
+ init_test(i);
+ clac();
+ USER_VAR(test) = 44;
+ report("read from user page with AC=0", pf_count == 1 && test == 44 && save == 43);
+
+ init_test(i);
+ stac();
+ test = -1;
+ asm("or $(" xstr(USER_BASE) "), %"R "sp \n"
+ "push $44 \n "
+ "decl test\n"
+ "and $~(" xstr(USER_BASE) "), %"R "sp \n"
+ "pop %"R "ax\n"
+ "movl %eax, test");
+ report("write to user stack with AC=1", pf_count == 0 && test == 44);
+
+ init_test(i);
+ clac();
+ test = -1;
+ asm("or $(" xstr(USER_BASE) "), %"R "sp \n"
+ "push $45 \n "
+ "decl test\n"
+ "and $~(" xstr(USER_BASE) "), %"R "sp \n"
+ "pop %"R "ax\n"
+ "movl %eax, test");
+ report("write to user stack with AC=0", pf_count == 1 && test == 45 && save == -1);
+
+ /* This would be trapped by SMEP */
+ init_test(i);
+ clac();
+ asm("jmp 1f + "xstr(USER_BASE)" \n"
+ "1: jmp 2f - "xstr(USER_BASE)" \n"
+ "2:");
+ report("executing on user page with AC=0", pf_count == 0);
+ }
+
+ // TODO: implicit kernel access from ring 3 (e.g. int)
+
+ return report_summary();
+}
Test various combinations of the AC bit and reading/writing into user pages at CPL=0. One notable missing test is implicit kernel reads and writes (e.g. reading the IDT/GDT/LDT/TSS). The interesting part of this is that AC must be ignored in ring 3; the processor always behaves as if AC=0. I skipped this because QEMU doesn't emulate this correctly, and because right now there's no kvm-unit-tests infrastructure to run code in ring 3 at all. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- config-x86-common.mak | 4 +- lib/x86/processor.h | 13 ++++- x86/smap.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 2 deletions(-) create mode 100644 x86/smap.c