diff mbox series

[kvm-unit-tests] x86: Add tests for PKS

Message ID 20201105081805.5674-9-chenyi.qiang@intel.com (mailing list archive)
State New, archived
Headers show
Series [kvm-unit-tests] x86: Add tests for PKS | expand

Commit Message

Chenyi Qiang Nov. 5, 2020, 8:18 a.m. UTC
This unit-test is intended to test the KVM support for Protection Keys
for Supervisor Pages (PKS). If CR4.PKS is set in long mode, supervisor
pkeys are checked in addition to normal paging protections and Access or
Write can be disabled via a MSR update without TLB flushes when
permissions change.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
---
 lib/x86/msr.h       |   1 +
 lib/x86/processor.h |   2 +
 x86/Makefile.x86_64 |   1 +
 x86/pks.c           | 146 ++++++++++++++++++++++++++++++++++++++++++++
 x86/unittests.cfg   |   5 ++
 5 files changed, 155 insertions(+)
 create mode 100644 x86/pks.c

Comments

Thomas Huth Jan. 18, 2021, 5:45 p.m. UTC | #1
On 05/11/2020 09.18, Chenyi Qiang wrote:
> This unit-test is intended to test the KVM support for Protection Keys
> for Supervisor Pages (PKS). If CR4.PKS is set in long mode, supervisor
> pkeys are checked in addition to normal paging protections and Access or
> Write can be disabled via a MSR update without TLB flushes when
> permissions change.
> 
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> ---
>   lib/x86/msr.h       |   1 +
>   lib/x86/processor.h |   2 +
>   x86/Makefile.x86_64 |   1 +
>   x86/pks.c           | 146 ++++++++++++++++++++++++++++++++++++++++++++
>   x86/unittests.cfg   |   5 ++
>   5 files changed, 155 insertions(+)
>   create mode 100644 x86/pks.c
> 
> diff --git a/lib/x86/msr.h b/lib/x86/msr.h
> index 6ef5502..e36934b 100644
> --- a/lib/x86/msr.h
> +++ b/lib/x86/msr.h
> @@ -209,6 +209,7 @@
>   #define MSR_IA32_EBL_CR_POWERON		0x0000002a
>   #define MSR_IA32_FEATURE_CONTROL        0x0000003a
>   #define MSR_IA32_TSC_ADJUST		0x0000003b
> +#define MSR_IA32_PKRS			0x000006e1
>   
>   #define FEATURE_CONTROL_LOCKED				(1<<0)
>   #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX	(1<<1)
> diff --git a/lib/x86/processor.h b/lib/x86/processor.h
> index 74a2498..985fdd0 100644
> --- a/lib/x86/processor.h
> +++ b/lib/x86/processor.h
> @@ -50,6 +50,7 @@
>   #define X86_CR4_SMEP   0x00100000
>   #define X86_CR4_SMAP   0x00200000
>   #define X86_CR4_PKE    0x00400000
> +#define X86_CR4_PKS    0x01000000
>   
>   #define X86_EFLAGS_CF    0x00000001
>   #define X86_EFLAGS_FIXED 0x00000002
> @@ -157,6 +158,7 @@ static inline u8 cpuid_maxphyaddr(void)
>   #define	X86_FEATURE_RDPID		(CPUID(0x7, 0, ECX, 22))
>   #define	X86_FEATURE_SPEC_CTRL		(CPUID(0x7, 0, EDX, 26))
>   #define	X86_FEATURE_ARCH_CAPABILITIES	(CPUID(0x7, 0, EDX, 29))
> +#define	X86_FEATURE_PKS			(CPUID(0x7, 0, ECX, 31))
>   #define	X86_FEATURE_NX			(CPUID(0x80000001, 0, EDX, 20))
>   #define	X86_FEATURE_RDPRU		(CPUID(0x80000008, 0, EBX, 4))
>   
> diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
> index af61d85..3a353df 100644
> --- a/x86/Makefile.x86_64
> +++ b/x86/Makefile.x86_64
> @@ -20,6 +20,7 @@ tests += $(TEST_DIR)/tscdeadline_latency.flat
>   tests += $(TEST_DIR)/intel-iommu.flat
>   tests += $(TEST_DIR)/vmware_backdoors.flat
>   tests += $(TEST_DIR)/rdpru.flat
> +tests += $(TEST_DIR)/pks.flat
>   
>   include $(SRCDIR)/$(TEST_DIR)/Makefile.common
>   
> diff --git a/x86/pks.c b/x86/pks.c
> new file mode 100644
> index 0000000..a3044cf
> --- /dev/null
> +++ b/x86/pks.c
> @@ -0,0 +1,146 @@
> +#include "libcflat.h"
> +#include "x86/desc.h"
> +#include "x86/processor.h"
> +#include "x86/vm.h"
> +#include "x86/msr.h"
> +
> +#define CR0_WP_MASK      (1UL << 16)
> +#define PTE_PKEY_BIT     59
> +#define SUPER_BASE        (1 << 24)
> +#define SUPER_VAR(v)      (*((__typeof__(&(v))) (((unsigned long)&v) + SUPER_BASE)))
> +
> +volatile int pf_count = 0;
> +volatile unsigned save;
> +volatile unsigned test;
> +
> +static void set_cr0_wp(int wp)
> +{
> +    unsigned long cr0 = read_cr0();
> +
> +    cr0 &= ~CR0_WP_MASK;
> +    if (wp)
> +        cr0 |= CR0_WP_MASK;
> +    write_cr0(cr0);
> +}
> +
> +void do_pf_tss(unsigned long error_code);
> +void do_pf_tss(unsigned long error_code)
> +{
> +    printf("#PF handler, error code: 0x%lx\n", error_code);
> +    pf_count++;
> +    save = test;
> +    wrmsr(MSR_IA32_PKRS, 0);
> +}
> +
> +extern void pf_tss(void);
> +
> +asm ("pf_tss: \n\t"
> +#ifdef __x86_64__
> +    // no task on x86_64, save/restore caller-save regs
> +    "push %rax; push %rcx; push %rdx; push %rsi; push %rdi\n"
> +    "push %r8; push %r9; push %r10; push %r11\n"
> +    "mov 9*8(%rsp), %rdi\n"
> +#endif
> +    "call do_pf_tss \n\t"
> +#ifdef __x86_64__
> +    "pop %r11; pop %r10; pop %r9; pop %r8\n"
> +    "pop %rdi; pop %rsi; pop %rdx; pop %rcx; pop %rax\n"
> +#endif
> +    "add $"S", %"R "sp\n\t" // discard error code
> +    "iret"W" \n\t"
> +    "jmp pf_tss\n\t"
> +    );
> +
> +static void init_test(void)
> +{
> +    pf_count = 0;
> +
> +    invlpg(&test);
> +    invlpg(&SUPER_VAR(test));
> +    wrmsr(MSR_IA32_PKRS, 0);
> +    set_cr0_wp(0);
> +}
> +
> +int main(int ac, char **av)
> +{
> +    unsigned long i;
> +    unsigned int pkey = 0x2;
> +    unsigned int pkrs_ad = 0x10;
> +    unsigned int pkrs_wd = 0x20;
> +
> +    if (!this_cpu_has(X86_FEATURE_PKS)) {
> +        printf("PKS not enabled\n");
> +        return report_summary();
> +    }
> +
> +    setup_vm();
> +    setup_alt_stack();
> +    set_intr_alt_stack(14, pf_tss);
> +    wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_LMA);
> +
> +    // First 16MB are user pages
> +    for (i = 0; i < SUPER_BASE; i += PAGE_SIZE) {
> +        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= ((unsigned long)pkey << PTE_PKEY_BIT);
> +        invlpg((void *)i);
> +    }
> +
> +    // Present the same 16MB as supervisor pages in the 16MB-32MB range
> +    for (i = SUPER_BASE; i < 2 * SUPER_BASE; i += PAGE_SIZE) {
> +        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~SUPER_BASE;
> +        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~PT_USER_MASK;
> +        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= ((unsigned long)pkey << PTE_PKEY_BIT);
> +        invlpg((void *)i);
> +    }
> +
> +    write_cr4(read_cr4() | X86_CR4_PKS);
> +    write_cr3(read_cr3());
> +
> +    init_test();
> +    set_cr0_wp(1);
> +    wrmsr(MSR_IA32_PKRS, pkrs_ad);
> +    SUPER_VAR(test) = 21;
> +    report(pf_count == 1 && test == 21 && save == 0,
> +           "write to supervisor page when pkrs is ad and wp == 1");
> +
> +    init_test();
> +    set_cr0_wp(0);
> +    wrmsr(MSR_IA32_PKRS, pkrs_ad);
> +    SUPER_VAR(test) = 22;
> +    report(pf_count == 1 && test == 22 && save == 21,
> +           "write to supervisor page when pkrs is ad and wp == 0");
> +
> +    init_test();
> +    set_cr0_wp(1);
> +    wrmsr(MSR_IA32_PKRS, pkrs_wd);
> +    SUPER_VAR(test) = 23;
> +    report(pf_count == 1 && test == 23 && save == 22,
> +           "write to supervisor page when pkrs is wd and wp == 1");
> +
> +    init_test();
> +    set_cr0_wp(0);
> +    wrmsr(MSR_IA32_PKRS, pkrs_wd);
> +    SUPER_VAR(test) = 24;
> +    report(pf_count == 0 && test == 24,
> +           "write to supervisor page when pkrs is wd and wp == 0");
> +
> +    init_test();
> +    set_cr0_wp(0);
> +    wrmsr(MSR_IA32_PKRS, pkrs_wd);
> +    test = 25;
> +    report(pf_count == 0 && test == 25,
> +           "write to user page when pkrs is wd and wp == 0");
> +
> +    init_test();
> +    set_cr0_wp(1);
> +    wrmsr(MSR_IA32_PKRS, pkrs_wd);
> +    test = 26;
> +    report(pf_count == 0 && test == 26,
> +           "write to user page when pkrs is wd and wp == 1");
> +
> +    init_test();
> +    wrmsr(MSR_IA32_PKRS, pkrs_ad);
> +    (void)((__typeof__(&(test))) (((unsigned long)&test)));
> +    report(pf_count == 0, "read from user page when pkrs is ad");
> +
> +    return report_summary();
> +}
> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
> index 3a79151..b75419e 100644
> --- a/x86/unittests.cfg
> +++ b/x86/unittests.cfg
> @@ -127,6 +127,11 @@ file = pku.flat
>   arch = x86_64
>   extra_params = -cpu host
>   
> +[pks]
> +file = pks.flat
> +arch = x86_64
> +extra_params = -cpu host
> +
>   [asyncpf]
>   file = asyncpf.flat
>   extra_params = -m 2048
> 

Ping? ... Paolo, I think this one fell through the cracks?

  Thomas
Paolo Bonzini Jan. 18, 2021, 6:27 p.m. UTC | #2
On 18/01/21 18:45, Thomas Huth wrote:
> On 05/11/2020 09.18, Chenyi Qiang wrote:
>> This unit-test is intended to test the KVM support for Protection Keys
>> for Supervisor Pages (PKS). If CR4.PKS is set in long mode, supervisor
>> pkeys are checked in addition to normal paging protections and Access or
>> Write can be disabled via a MSR update without TLB flushes when
>> permissions change.
>>
>> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
>> ---
>>   lib/x86/msr.h       |   1 +
>>   lib/x86/processor.h |   2 +
>>   x86/Makefile.x86_64 |   1 +
>>   x86/pks.c           | 146 ++++++++++++++++++++++++++++++++++++++++++++
>>   x86/unittests.cfg   |   5 ++
>>   5 files changed, 155 insertions(+)
>>   create mode 100644 x86/pks.c
>>
>> diff --git a/lib/x86/msr.h b/lib/x86/msr.h
>> index 6ef5502..e36934b 100644
>> --- a/lib/x86/msr.h
>> +++ b/lib/x86/msr.h
>> @@ -209,6 +209,7 @@
>>   #define MSR_IA32_EBL_CR_POWERON        0x0000002a
>>   #define MSR_IA32_FEATURE_CONTROL        0x0000003a
>>   #define MSR_IA32_TSC_ADJUST        0x0000003b
>> +#define MSR_IA32_PKRS            0x000006e1
>>   #define FEATURE_CONTROL_LOCKED                (1<<0)
>>   #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX    (1<<1)
>> diff --git a/lib/x86/processor.h b/lib/x86/processor.h
>> index 74a2498..985fdd0 100644
>> --- a/lib/x86/processor.h
>> +++ b/lib/x86/processor.h
>> @@ -50,6 +50,7 @@
>>   #define X86_CR4_SMEP   0x00100000
>>   #define X86_CR4_SMAP   0x00200000
>>   #define X86_CR4_PKE    0x00400000
>> +#define X86_CR4_PKS    0x01000000
>>   #define X86_EFLAGS_CF    0x00000001
>>   #define X86_EFLAGS_FIXED 0x00000002
>> @@ -157,6 +158,7 @@ static inline u8 cpuid_maxphyaddr(void)
>>   #define    X86_FEATURE_RDPID        (CPUID(0x7, 0, ECX, 22))
>>   #define    X86_FEATURE_SPEC_CTRL        (CPUID(0x7, 0, EDX, 26))
>>   #define    X86_FEATURE_ARCH_CAPABILITIES    (CPUID(0x7, 0, EDX, 29))
>> +#define    X86_FEATURE_PKS            (CPUID(0x7, 0, ECX, 31))
>>   #define    X86_FEATURE_NX            (CPUID(0x80000001, 0, EDX, 20))
>>   #define    X86_FEATURE_RDPRU        (CPUID(0x80000008, 0, EBX, 4))
>> diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
>> index af61d85..3a353df 100644
>> --- a/x86/Makefile.x86_64
>> +++ b/x86/Makefile.x86_64
>> @@ -20,6 +20,7 @@ tests += $(TEST_DIR)/tscdeadline_latency.flat
>>   tests += $(TEST_DIR)/intel-iommu.flat
>>   tests += $(TEST_DIR)/vmware_backdoors.flat
>>   tests += $(TEST_DIR)/rdpru.flat
>> +tests += $(TEST_DIR)/pks.flat
>>   include $(SRCDIR)/$(TEST_DIR)/Makefile.common
>> diff --git a/x86/pks.c b/x86/pks.c
>> new file mode 100644
>> index 0000000..a3044cf
>> --- /dev/null
>> +++ b/x86/pks.c
>> @@ -0,0 +1,146 @@
>> +#include "libcflat.h"
>> +#include "x86/desc.h"
>> +#include "x86/processor.h"
>> +#include "x86/vm.h"
>> +#include "x86/msr.h"
>> +
>> +#define CR0_WP_MASK      (1UL << 16)
>> +#define PTE_PKEY_BIT     59
>> +#define SUPER_BASE        (1 << 24)
>> +#define SUPER_VAR(v)      (*((__typeof__(&(v))) (((unsigned long)&v) 
>> + SUPER_BASE)))
>> +
>> +volatile int pf_count = 0;
>> +volatile unsigned save;
>> +volatile unsigned test;
>> +
>> +static void set_cr0_wp(int wp)
>> +{
>> +    unsigned long cr0 = read_cr0();
>> +
>> +    cr0 &= ~CR0_WP_MASK;
>> +    if (wp)
>> +        cr0 |= CR0_WP_MASK;
>> +    write_cr0(cr0);
>> +}
>> +
>> +void do_pf_tss(unsigned long error_code);
>> +void do_pf_tss(unsigned long error_code)
>> +{
>> +    printf("#PF handler, error code: 0x%lx\n", error_code);
>> +    pf_count++;
>> +    save = test;
>> +    wrmsr(MSR_IA32_PKRS, 0);
>> +}
>> +
>> +extern void pf_tss(void);
>> +
>> +asm ("pf_tss: \n\t"
>> +#ifdef __x86_64__
>> +    // no task on x86_64, save/restore caller-save regs
>> +    "push %rax; push %rcx; push %rdx; push %rsi; push %rdi\n"
>> +    "push %r8; push %r9; push %r10; push %r11\n"
>> +    "mov 9*8(%rsp), %rdi\n"
>> +#endif
>> +    "call do_pf_tss \n\t"
>> +#ifdef __x86_64__
>> +    "pop %r11; pop %r10; pop %r9; pop %r8\n"
>> +    "pop %rdi; pop %rsi; pop %rdx; pop %rcx; pop %rax\n"
>> +#endif
>> +    "add $"S", %"R "sp\n\t" // discard error code
>> +    "iret"W" \n\t"
>> +    "jmp pf_tss\n\t"
>> +    );
>> +
>> +static void init_test(void)
>> +{
>> +    pf_count = 0;
>> +
>> +    invlpg(&test);
>> +    invlpg(&SUPER_VAR(test));
>> +    wrmsr(MSR_IA32_PKRS, 0);
>> +    set_cr0_wp(0);
>> +}
>> +
>> +int main(int ac, char **av)
>> +{
>> +    unsigned long i;
>> +    unsigned int pkey = 0x2;
>> +    unsigned int pkrs_ad = 0x10;
>> +    unsigned int pkrs_wd = 0x20;
>> +
>> +    if (!this_cpu_has(X86_FEATURE_PKS)) {
>> +        printf("PKS not enabled\n");
>> +        return report_summary();
>> +    }
>> +
>> +    setup_vm();
>> +    setup_alt_stack();
>> +    set_intr_alt_stack(14, pf_tss);
>> +    wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_LMA);
>> +
>> +    // First 16MB are user pages
>> +    for (i = 0; i < SUPER_BASE; i += PAGE_SIZE) {
>> +        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= 
>> ((unsigned long)pkey << PTE_PKEY_BIT);
>> +        invlpg((void *)i);
>> +    }
>> +
>> +    // Present the same 16MB as supervisor pages in the 16MB-32MB range
>> +    for (i = SUPER_BASE; i < 2 * SUPER_BASE; i += PAGE_SIZE) {
>> +        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= 
>> ~SUPER_BASE;
>> +        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= 
>> ~PT_USER_MASK;
>> +        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= 
>> ((unsigned long)pkey << PTE_PKEY_BIT);
>> +        invlpg((void *)i);
>> +    }
>> +
>> +    write_cr4(read_cr4() | X86_CR4_PKS);
>> +    write_cr3(read_cr3());
>> +
>> +    init_test();
>> +    set_cr0_wp(1);
>> +    wrmsr(MSR_IA32_PKRS, pkrs_ad);
>> +    SUPER_VAR(test) = 21;
>> +    report(pf_count == 1 && test == 21 && save == 0,
>> +           "write to supervisor page when pkrs is ad and wp == 1");
>> +
>> +    init_test();
>> +    set_cr0_wp(0);
>> +    wrmsr(MSR_IA32_PKRS, pkrs_ad);
>> +    SUPER_VAR(test) = 22;
>> +    report(pf_count == 1 && test == 22 && save == 21,
>> +           "write to supervisor page when pkrs is ad and wp == 0");
>> +
>> +    init_test();
>> +    set_cr0_wp(1);
>> +    wrmsr(MSR_IA32_PKRS, pkrs_wd);
>> +    SUPER_VAR(test) = 23;
>> +    report(pf_count == 1 && test == 23 && save == 22,
>> +           "write to supervisor page when pkrs is wd and wp == 1");
>> +
>> +    init_test();
>> +    set_cr0_wp(0);
>> +    wrmsr(MSR_IA32_PKRS, pkrs_wd);
>> +    SUPER_VAR(test) = 24;
>> +    report(pf_count == 0 && test == 24,
>> +           "write to supervisor page when pkrs is wd and wp == 0");
>> +
>> +    init_test();
>> +    set_cr0_wp(0);
>> +    wrmsr(MSR_IA32_PKRS, pkrs_wd);
>> +    test = 25;
>> +    report(pf_count == 0 && test == 25,
>> +           "write to user page when pkrs is wd and wp == 0");
>> +
>> +    init_test();
>> +    set_cr0_wp(1);
>> +    wrmsr(MSR_IA32_PKRS, pkrs_wd);
>> +    test = 26;
>> +    report(pf_count == 0 && test == 26,
>> +           "write to user page when pkrs is wd and wp == 1");
>> +
>> +    init_test();
>> +    wrmsr(MSR_IA32_PKRS, pkrs_ad);
>> +    (void)((__typeof__(&(test))) (((unsigned long)&test)));
>> +    report(pf_count == 0, "read from user page when pkrs is ad");
>> +
>> +    return report_summary();
>> +}
>> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
>> index 3a79151..b75419e 100644
>> --- a/x86/unittests.cfg
>> +++ b/x86/unittests.cfg
>> @@ -127,6 +127,11 @@ file = pku.flat
>>   arch = x86_64
>>   extra_params = -cpu host
>> +[pks]
>> +file = pks.flat
>> +arch = x86_64
>> +extra_params = -cpu host
>> +
>>   [asyncpf]
>>   file = asyncpf.flat
>>   extra_params = -m 2048
>>
> 
> Ping? ... Paolo, I think this one fell through the cracks?
> 
>   Thomas
> 

No, it's just that the KVM patches haven't been merged yet (and there's 
no QEMU implementation yet).  But I'm getting to it.

Paolo
Chenyi Qiang Jan. 19, 2021, 7:41 a.m. UTC | #3
On 1/19/2021 2:27 AM, Paolo Bonzini wrote:
> On 18/01/21 18:45, Thomas Huth wrote:
>> On 05/11/2020 09.18, Chenyi Qiang wrote:
>>> This unit-test is intended to test the KVM support for Protection Keys
>>> for Supervisor Pages (PKS). If CR4.PKS is set in long mode, supervisor
>>> pkeys are checked in addition to normal paging protections and Access or
>>> Write can be disabled via a MSR update without TLB flushes when
>>> permissions change.
>>>
>>> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
>>> ---
>>>   lib/x86/msr.h       |   1 +
>>>   lib/x86/processor.h |   2 +
>>>   x86/Makefile.x86_64 |   1 +
>>>   x86/pks.c           | 146 ++++++++++++++++++++++++++++++++++++++++++++
>>>   x86/unittests.cfg   |   5 ++
>>>   5 files changed, 155 insertions(+)
>>>   create mode 100644 x86/pks.c
>>>
>>> diff --git a/lib/x86/msr.h b/lib/x86/msr.h
>>> index 6ef5502..e36934b 100644
>>> --- a/lib/x86/msr.h
>>> +++ b/lib/x86/msr.h
>>> @@ -209,6 +209,7 @@
>>>   #define MSR_IA32_EBL_CR_POWERON        0x0000002a
>>>   #define MSR_IA32_FEATURE_CONTROL        0x0000003a
>>>   #define MSR_IA32_TSC_ADJUST        0x0000003b
>>> +#define MSR_IA32_PKRS            0x000006e1
>>>   #define FEATURE_CONTROL_LOCKED                (1<<0)
>>>   #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX    (1<<1)
>>> diff --git a/lib/x86/processor.h b/lib/x86/processor.h
>>> index 74a2498..985fdd0 100644
>>> --- a/lib/x86/processor.h
>>> +++ b/lib/x86/processor.h
>>> @@ -50,6 +50,7 @@
>>>   #define X86_CR4_SMEP   0x00100000
>>>   #define X86_CR4_SMAP   0x00200000
>>>   #define X86_CR4_PKE    0x00400000
>>> +#define X86_CR4_PKS    0x01000000
>>>   #define X86_EFLAGS_CF    0x00000001
>>>   #define X86_EFLAGS_FIXED 0x00000002
>>> @@ -157,6 +158,7 @@ static inline u8 cpuid_maxphyaddr(void)
>>>   #define    X86_FEATURE_RDPID        (CPUID(0x7, 0, ECX, 22))
>>>   #define    X86_FEATURE_SPEC_CTRL        (CPUID(0x7, 0, EDX, 26))
>>>   #define    X86_FEATURE_ARCH_CAPABILITIES    (CPUID(0x7, 0, EDX, 29))
>>> +#define    X86_FEATURE_PKS            (CPUID(0x7, 0, ECX, 31))
>>>   #define    X86_FEATURE_NX            (CPUID(0x80000001, 0, EDX, 20))
>>>   #define    X86_FEATURE_RDPRU        (CPUID(0x80000008, 0, EBX, 4))
>>> diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
>>> index af61d85..3a353df 100644
>>> --- a/x86/Makefile.x86_64
>>> +++ b/x86/Makefile.x86_64
>>> @@ -20,6 +20,7 @@ tests += $(TEST_DIR)/tscdeadline_latency.flat
>>>   tests += $(TEST_DIR)/intel-iommu.flat
>>>   tests += $(TEST_DIR)/vmware_backdoors.flat
>>>   tests += $(TEST_DIR)/rdpru.flat
>>> +tests += $(TEST_DIR)/pks.flat
>>>   include $(SRCDIR)/$(TEST_DIR)/Makefile.common
>>> diff --git a/x86/pks.c b/x86/pks.c
>>> new file mode 100644
>>> index 0000000..a3044cf
>>> --- /dev/null
>>> +++ b/x86/pks.c
>>> @@ -0,0 +1,146 @@
>>> +#include "libcflat.h"
>>> +#include "x86/desc.h"
>>> +#include "x86/processor.h"
>>> +#include "x86/vm.h"
>>> +#include "x86/msr.h"
>>> +
>>> +#define CR0_WP_MASK      (1UL << 16)
>>> +#define PTE_PKEY_BIT     59
>>> +#define SUPER_BASE        (1 << 24)
>>> +#define SUPER_VAR(v)      (*((__typeof__(&(v))) (((unsigned long)&v) 
>>> + SUPER_BASE)))
>>> +
>>> +volatile int pf_count = 0;
>>> +volatile unsigned save;
>>> +volatile unsigned test;
>>> +
>>> +static void set_cr0_wp(int wp)
>>> +{
>>> +    unsigned long cr0 = read_cr0();
>>> +
>>> +    cr0 &= ~CR0_WP_MASK;
>>> +    if (wp)
>>> +        cr0 |= CR0_WP_MASK;
>>> +    write_cr0(cr0);
>>> +}
>>> +
>>> +void do_pf_tss(unsigned long error_code);
>>> +void do_pf_tss(unsigned long error_code)
>>> +{
>>> +    printf("#PF handler, error code: 0x%lx\n", error_code);
>>> +    pf_count++;
>>> +    save = test;
>>> +    wrmsr(MSR_IA32_PKRS, 0);
>>> +}
>>> +
>>> +extern void pf_tss(void);
>>> +
>>> +asm ("pf_tss: \n\t"
>>> +#ifdef __x86_64__
>>> +    // no task on x86_64, save/restore caller-save regs
>>> +    "push %rax; push %rcx; push %rdx; push %rsi; push %rdi\n"
>>> +    "push %r8; push %r9; push %r10; push %r11\n"
>>> +    "mov 9*8(%rsp), %rdi\n"
>>> +#endif
>>> +    "call do_pf_tss \n\t"
>>> +#ifdef __x86_64__
>>> +    "pop %r11; pop %r10; pop %r9; pop %r8\n"
>>> +    "pop %rdi; pop %rsi; pop %rdx; pop %rcx; pop %rax\n"
>>> +#endif
>>> +    "add $"S", %"R "sp\n\t" // discard error code
>>> +    "iret"W" \n\t"
>>> +    "jmp pf_tss\n\t"
>>> +    );
>>> +
>>> +static void init_test(void)
>>> +{
>>> +    pf_count = 0;
>>> +
>>> +    invlpg(&test);
>>> +    invlpg(&SUPER_VAR(test));
>>> +    wrmsr(MSR_IA32_PKRS, 0);
>>> +    set_cr0_wp(0);
>>> +}
>>> +
>>> +int main(int ac, char **av)
>>> +{
>>> +    unsigned long i;
>>> +    unsigned int pkey = 0x2;
>>> +    unsigned int pkrs_ad = 0x10;
>>> +    unsigned int pkrs_wd = 0x20;
>>> +
>>> +    if (!this_cpu_has(X86_FEATURE_PKS)) {
>>> +        printf("PKS not enabled\n");
>>> +        return report_summary();
>>> +    }
>>> +
>>> +    setup_vm();
>>> +    setup_alt_stack();
>>> +    set_intr_alt_stack(14, pf_tss);
>>> +    wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_LMA);
>>> +
>>> +    // First 16MB are user pages
>>> +    for (i = 0; i < SUPER_BASE; i += PAGE_SIZE) {
>>> +        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= 
>>> ((unsigned long)pkey << PTE_PKEY_BIT);
>>> +        invlpg((void *)i);
>>> +    }
>>> +
>>> +    // Present the same 16MB as supervisor pages in the 16MB-32MB range
>>> +    for (i = SUPER_BASE; i < 2 * SUPER_BASE; i += PAGE_SIZE) {
>>> +        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= 
>>> ~SUPER_BASE;
>>> +        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= 
>>> ~PT_USER_MASK;
>>> +        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= 
>>> ((unsigned long)pkey << PTE_PKEY_BIT);
>>> +        invlpg((void *)i);
>>> +    }
>>> +
>>> +    write_cr4(read_cr4() | X86_CR4_PKS);
>>> +    write_cr3(read_cr3());
>>> +
>>> +    init_test();
>>> +    set_cr0_wp(1);
>>> +    wrmsr(MSR_IA32_PKRS, pkrs_ad);
>>> +    SUPER_VAR(test) = 21;
>>> +    report(pf_count == 1 && test == 21 && save == 0,
>>> +           "write to supervisor page when pkrs is ad and wp == 1");
>>> +
>>> +    init_test();
>>> +    set_cr0_wp(0);
>>> +    wrmsr(MSR_IA32_PKRS, pkrs_ad);
>>> +    SUPER_VAR(test) = 22;
>>> +    report(pf_count == 1 && test == 22 && save == 21,
>>> +           "write to supervisor page when pkrs is ad and wp == 0");
>>> +
>>> +    init_test();
>>> +    set_cr0_wp(1);
>>> +    wrmsr(MSR_IA32_PKRS, pkrs_wd);
>>> +    SUPER_VAR(test) = 23;
>>> +    report(pf_count == 1 && test == 23 && save == 22,
>>> +           "write to supervisor page when pkrs is wd and wp == 1");
>>> +
>>> +    init_test();
>>> +    set_cr0_wp(0);
>>> +    wrmsr(MSR_IA32_PKRS, pkrs_wd);
>>> +    SUPER_VAR(test) = 24;
>>> +    report(pf_count == 0 && test == 24,
>>> +           "write to supervisor page when pkrs is wd and wp == 0");
>>> +
>>> +    init_test();
>>> +    set_cr0_wp(0);
>>> +    wrmsr(MSR_IA32_PKRS, pkrs_wd);
>>> +    test = 25;
>>> +    report(pf_count == 0 && test == 25,
>>> +           "write to user page when pkrs is wd and wp == 0");
>>> +
>>> +    init_test();
>>> +    set_cr0_wp(1);
>>> +    wrmsr(MSR_IA32_PKRS, pkrs_wd);
>>> +    test = 26;
>>> +    report(pf_count == 0 && test == 26,
>>> +           "write to user page when pkrs is wd and wp == 1");
>>> +
>>> +    init_test();
>>> +    wrmsr(MSR_IA32_PKRS, pkrs_ad);
>>> +    (void)((__typeof__(&(test))) (((unsigned long)&test)));
>>> +    report(pf_count == 0, "read from user page when pkrs is ad");
>>> +
>>> +    return report_summary();
>>> +}
>>> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
>>> index 3a79151..b75419e 100644
>>> --- a/x86/unittests.cfg
>>> +++ b/x86/unittests.cfg
>>> @@ -127,6 +127,11 @@ file = pku.flat
>>>   arch = x86_64
>>>   extra_params = -cpu host
>>> +[pks]
>>> +file = pks.flat
>>> +arch = x86_64
>>> +extra_params = -cpu host
>>> +
>>>   [asyncpf]
>>>   file = asyncpf.flat
>>>   extra_params = -m 2048
>>>
>>
>> Ping? ... Paolo, I think this one fell through the cracks?
>>
>>   Thomas
>>
> 
> No, it's just that the KVM patches haven't been merged yet (and there's 
> no QEMU implementation yet).  But I'm getting to it.
> 
> Paolo
> 

Hi Paolo,

Thank you for your time. I was just thinking about resending this patch 
series to ping you although no changes will be added. I really hope to 
get the comments from you.

Do you want me to resend a new non-RFC version as well as the QEMU 
implementation? or you review this RFC series first?

Thanks
Chenyi
Paolo Bonzini Jan. 27, 2021, 9:46 a.m. UTC | #4
On 19/01/21 08:41, Chenyi Qiang wrote:
>>
> 
> Hi Paolo,
> 
> Thank you for your time. I was just thinking about resending this patch 
> series to ping you although no changes will be added. I really hope to 
> get the comments from you.
> 
> Do you want me to resend a new non-RFC version as well as the QEMU 
> implementation? or you review this RFC series first?
> 
> Thanks
> Chenyi

Hi,

I have reviewed the KVM implementation and actually I have also 
implemented PKRS in QEMU's binary translation.  I'll send a patch and 
commit this one too, since it can be tested.

For QEMU you'll still have to post the KVM parts (getting/setting the 
PKRS MSR) since I don't have a way to test those.

Paolo
diff mbox series

Patch

diff --git a/lib/x86/msr.h b/lib/x86/msr.h
index 6ef5502..e36934b 100644
--- a/lib/x86/msr.h
+++ b/lib/x86/msr.h
@@ -209,6 +209,7 @@ 
 #define MSR_IA32_EBL_CR_POWERON		0x0000002a
 #define MSR_IA32_FEATURE_CONTROL        0x0000003a
 #define MSR_IA32_TSC_ADJUST		0x0000003b
+#define MSR_IA32_PKRS			0x000006e1
 
 #define FEATURE_CONTROL_LOCKED				(1<<0)
 #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX	(1<<1)
diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index 74a2498..985fdd0 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h
@@ -50,6 +50,7 @@ 
 #define X86_CR4_SMEP   0x00100000
 #define X86_CR4_SMAP   0x00200000
 #define X86_CR4_PKE    0x00400000
+#define X86_CR4_PKS    0x01000000
 
 #define X86_EFLAGS_CF    0x00000001
 #define X86_EFLAGS_FIXED 0x00000002
@@ -157,6 +158,7 @@  static inline u8 cpuid_maxphyaddr(void)
 #define	X86_FEATURE_RDPID		(CPUID(0x7, 0, ECX, 22))
 #define	X86_FEATURE_SPEC_CTRL		(CPUID(0x7, 0, EDX, 26))
 #define	X86_FEATURE_ARCH_CAPABILITIES	(CPUID(0x7, 0, EDX, 29))
+#define	X86_FEATURE_PKS			(CPUID(0x7, 0, ECX, 31))
 #define	X86_FEATURE_NX			(CPUID(0x80000001, 0, EDX, 20))
 #define	X86_FEATURE_RDPRU		(CPUID(0x80000008, 0, EBX, 4))
 
diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
index af61d85..3a353df 100644
--- a/x86/Makefile.x86_64
+++ b/x86/Makefile.x86_64
@@ -20,6 +20,7 @@  tests += $(TEST_DIR)/tscdeadline_latency.flat
 tests += $(TEST_DIR)/intel-iommu.flat
 tests += $(TEST_DIR)/vmware_backdoors.flat
 tests += $(TEST_DIR)/rdpru.flat
+tests += $(TEST_DIR)/pks.flat
 
 include $(SRCDIR)/$(TEST_DIR)/Makefile.common
 
diff --git a/x86/pks.c b/x86/pks.c
new file mode 100644
index 0000000..a3044cf
--- /dev/null
+++ b/x86/pks.c
@@ -0,0 +1,146 @@ 
+#include "libcflat.h"
+#include "x86/desc.h"
+#include "x86/processor.h"
+#include "x86/vm.h"
+#include "x86/msr.h"
+
+#define CR0_WP_MASK      (1UL << 16)
+#define PTE_PKEY_BIT     59
+#define SUPER_BASE        (1 << 24)
+#define SUPER_VAR(v)      (*((__typeof__(&(v))) (((unsigned long)&v) + SUPER_BASE)))
+
+volatile int pf_count = 0;
+volatile unsigned save;
+volatile unsigned test;
+
+static void set_cr0_wp(int wp)
+{
+    unsigned long cr0 = read_cr0();
+
+    cr0 &= ~CR0_WP_MASK;
+    if (wp)
+        cr0 |= CR0_WP_MASK;
+    write_cr0(cr0);
+}
+
+void do_pf_tss(unsigned long error_code);
+void do_pf_tss(unsigned long error_code)
+{
+    printf("#PF handler, error code: 0x%lx\n", error_code);
+    pf_count++;
+    save = test;
+    wrmsr(MSR_IA32_PKRS, 0);
+}
+
+extern void pf_tss(void);
+
+asm ("pf_tss: \n\t"
+#ifdef __x86_64__
+    // no task on x86_64, save/restore caller-save regs
+    "push %rax; push %rcx; push %rdx; push %rsi; push %rdi\n"
+    "push %r8; push %r9; push %r10; push %r11\n"
+    "mov 9*8(%rsp), %rdi\n"
+#endif
+    "call do_pf_tss \n\t"
+#ifdef __x86_64__
+    "pop %r11; pop %r10; pop %r9; pop %r8\n"
+    "pop %rdi; pop %rsi; pop %rdx; pop %rcx; pop %rax\n"
+#endif
+    "add $"S", %"R "sp\n\t" // discard error code
+    "iret"W" \n\t"
+    "jmp pf_tss\n\t"
+    );
+
+static void init_test(void)
+{
+    pf_count = 0;
+
+    invlpg(&test);
+    invlpg(&SUPER_VAR(test));
+    wrmsr(MSR_IA32_PKRS, 0);
+    set_cr0_wp(0);
+}
+
+int main(int ac, char **av)
+{
+    unsigned long i;
+    unsigned int pkey = 0x2;
+    unsigned int pkrs_ad = 0x10;
+    unsigned int pkrs_wd = 0x20;
+
+    if (!this_cpu_has(X86_FEATURE_PKS)) {
+        printf("PKS not enabled\n");
+        return report_summary();
+    }
+
+    setup_vm();
+    setup_alt_stack();
+    set_intr_alt_stack(14, pf_tss);
+    wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_LMA);
+
+    // First 16MB are user pages
+    for (i = 0; i < SUPER_BASE; i += PAGE_SIZE) {
+        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= ((unsigned long)pkey << PTE_PKEY_BIT);
+        invlpg((void *)i);
+    }
+
+    // Present the same 16MB as supervisor pages in the 16MB-32MB range
+    for (i = SUPER_BASE; i < 2 * SUPER_BASE; i += PAGE_SIZE) {
+        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~SUPER_BASE;
+        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~PT_USER_MASK;
+        *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= ((unsigned long)pkey << PTE_PKEY_BIT);
+        invlpg((void *)i);
+    }
+
+    write_cr4(read_cr4() | X86_CR4_PKS);
+    write_cr3(read_cr3());
+
+    init_test();
+    set_cr0_wp(1);
+    wrmsr(MSR_IA32_PKRS, pkrs_ad);
+    SUPER_VAR(test) = 21;
+    report(pf_count == 1 && test == 21 && save == 0,
+           "write to supervisor page when pkrs is ad and wp == 1");
+
+    init_test();
+    set_cr0_wp(0);
+    wrmsr(MSR_IA32_PKRS, pkrs_ad);
+    SUPER_VAR(test) = 22;
+    report(pf_count == 1 && test == 22 && save == 21,
+           "write to supervisor page when pkrs is ad and wp == 0");
+
+    init_test();
+    set_cr0_wp(1);
+    wrmsr(MSR_IA32_PKRS, pkrs_wd);
+    SUPER_VAR(test) = 23;
+    report(pf_count == 1 && test == 23 && save == 22,
+           "write to supervisor page when pkrs is wd and wp == 1");
+
+    init_test();
+    set_cr0_wp(0);
+    wrmsr(MSR_IA32_PKRS, pkrs_wd);
+    SUPER_VAR(test) = 24;
+    report(pf_count == 0 && test == 24,
+           "write to supervisor page when pkrs is wd and wp == 0");
+
+    init_test();
+    set_cr0_wp(0);
+    wrmsr(MSR_IA32_PKRS, pkrs_wd);
+    test = 25;
+    report(pf_count == 0 && test == 25,
+           "write to user page when pkrs is wd and wp == 0");
+
+    init_test();
+    set_cr0_wp(1);
+    wrmsr(MSR_IA32_PKRS, pkrs_wd);
+    test = 26;
+    report(pf_count == 0 && test == 26,
+           "write to user page when pkrs is wd and wp == 1");
+
+    init_test();
+    wrmsr(MSR_IA32_PKRS, pkrs_ad);
+    (void)((__typeof__(&(test))) (((unsigned long)&test)));
+    report(pf_count == 0, "read from user page when pkrs is ad");
+
+    return report_summary();
+}
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index 3a79151..b75419e 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -127,6 +127,11 @@  file = pku.flat
 arch = x86_64
 extra_params = -cpu host
 
+[pks]
+file = pks.flat
+arch = x86_64
+extra_params = -cpu host
+
 [asyncpf]
 file = asyncpf.flat
 extra_params = -m 2048