Message ID | 20200713043908.39605-1-namit@vmware.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [kvm-unit-tests] x86: svm: low CR3 bits are not MBZ | expand |
On 7/12/20 9:39 PM, Nadav Amit wrote: > The low CR3 bits are reserved but not MBZ according to tha APM. The > tests should therefore not check that they cause failed VM-entry. Tests > on bare-metal show they do not. > > Signed-off-by: Nadav Amit <namit@vmware.com> > --- > x86/svm.h | 4 +--- > x86/svm_tests.c | 26 +------------------------- > 2 files changed, 2 insertions(+), 28 deletions(-) > > diff --git a/x86/svm.h b/x86/svm.h > index f8e7429..15e0f18 100644 > --- a/x86/svm.h > +++ b/x86/svm.h > @@ -325,9 +325,7 @@ struct __attribute__ ((__packed__)) vmcb { > #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) > > #define SVM_CR0_RESERVED_MASK 0xffffffff00000000U > -#define SVM_CR3_LEGACY_RESERVED_MASK 0xfe7U > -#define SVM_CR3_LEGACY_PAE_RESERVED_MASK 0x7U > -#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U > +#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000000U > #define SVM_CR4_LEGACY_RESERVED_MASK 0xff88f000U > #define SVM_CR4_RESERVED_MASK 0xffffffffff88f000U > #define SVM_DR6_RESERVED_MASK 0xffffffffffff1ff0U > diff --git a/x86/svm_tests.c b/x86/svm_tests.c > index 3b0d019..1908c7c 100644 > --- a/x86/svm_tests.c > +++ b/x86/svm_tests.c > @@ -2007,38 +2007,14 @@ static void test_cr3(void) > { > /* > * CR3 MBZ bits based on different modes: > - * [2:0] - legacy PAE > - * [2:0], [11:5] - legacy non-PAE > - * [2:0], [11:5], [63:52] - long mode > + * [63:52] - long mode > */ > u64 cr3_saved = vmcb->save.cr3; > - u64 cr4_saved = vmcb->save.cr4; > - u64 cr4 = cr4_saved; > - u64 efer_saved = vmcb->save.efer; > - u64 efer = efer_saved; > > - efer &= ~EFER_LME; > - vmcb->save.efer = efer; > - cr4 |= X86_CR4_PAE; > - vmcb->save.cr4 = cr4; > - SVM_TEST_CR_RESERVED_BITS(0, 2, 1, 3, cr3_saved, > - SVM_CR3_LEGACY_PAE_RESERVED_MASK); > - > - cr4 = cr4_saved & ~X86_CR4_PAE; > - vmcb->save.cr4 = cr4; > - SVM_TEST_CR_RESERVED_BITS(0, 11, 1, 3, cr3_saved, > - SVM_CR3_LEGACY_RESERVED_MASK); > - > - cr4 |= X86_CR4_PAE; > - vmcb->save.cr4 = cr4; > - efer |= EFER_LME; > - vmcb->save.efer = efer; > SVM_TEST_CR_RESERVED_BITS(0, 63, 1, 3, cr3_saved, > SVM_CR3_LONG_RESERVED_MASK); > > - vmcb->save.cr4 = cr4_saved; > vmcb->save.cr3 = cr3_saved; > - vmcb->save.efer = efer_saved; > } > > static void test_cr4(void) APM says, "Reserved Bits. Reserved fields should be cleared to 0 by software when writing CR3." If processor allows these bits to be left non-zero, "should be cleared to 0" means it's not mandatory then. I am wondering what this "should be" actually means :-) !
> On Jul 13, 2020, at 4:06 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: > > > On 7/12/20 9:39 PM, Nadav Amit wrote: >> The low CR3 bits are reserved but not MBZ according to tha APM. The >> tests should therefore not check that they cause failed VM-entry. Tests >> on bare-metal show they do not. >> >> Signed-off-by: Nadav Amit <namit@vmware.com> >> --- >> x86/svm.h | 4 +--- >> x86/svm_tests.c | 26 +------------------------- >> 2 files changed, 2 insertions(+), 28 deletions(-) >> >> diff --git a/x86/svm.h b/x86/svm.h >> index f8e7429..15e0f18 100644 >> --- a/x86/svm.h >> +++ b/x86/svm.h >> @@ -325,9 +325,7 @@ struct __attribute__ ((__packed__)) vmcb { >> #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) >> #define SVM_CR0_RESERVED_MASK 0xffffffff00000000U >> -#define SVM_CR3_LEGACY_RESERVED_MASK 0xfe7U >> -#define SVM_CR3_LEGACY_PAE_RESERVED_MASK 0x7U >> -#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U >> +#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000000U >> #define SVM_CR4_LEGACY_RESERVED_MASK 0xff88f000U >> #define SVM_CR4_RESERVED_MASK 0xffffffffff88f000U >> #define SVM_DR6_RESERVED_MASK 0xffffffffffff1ff0U >> diff --git a/x86/svm_tests.c b/x86/svm_tests.c >> index 3b0d019..1908c7c 100644 >> --- a/x86/svm_tests.c >> +++ b/x86/svm_tests.c >> @@ -2007,38 +2007,14 @@ static void test_cr3(void) >> { >> /* >> * CR3 MBZ bits based on different modes: >> - * [2:0] - legacy PAE >> - * [2:0], [11:5] - legacy non-PAE >> - * [2:0], [11:5], [63:52] - long mode >> + * [63:52] - long mode >> */ >> u64 cr3_saved = vmcb->save.cr3; >> - u64 cr4_saved = vmcb->save.cr4; >> - u64 cr4 = cr4_saved; >> - u64 efer_saved = vmcb->save.efer; >> - u64 efer = efer_saved; >> - efer &= ~EFER_LME; >> - vmcb->save.efer = efer; >> - cr4 |= X86_CR4_PAE; >> - vmcb->save.cr4 = cr4; >> - SVM_TEST_CR_RESERVED_BITS(0, 2, 1, 3, cr3_saved, >> - SVM_CR3_LEGACY_PAE_RESERVED_MASK); >> - >> - cr4 = cr4_saved & ~X86_CR4_PAE; >> - vmcb->save.cr4 = cr4; >> - SVM_TEST_CR_RESERVED_BITS(0, 11, 1, 3, cr3_saved, >> - SVM_CR3_LEGACY_RESERVED_MASK); >> - >> - cr4 |= X86_CR4_PAE; >> - vmcb->save.cr4 = cr4; >> - efer |= EFER_LME; >> - vmcb->save.efer = efer; >> SVM_TEST_CR_RESERVED_BITS(0, 63, 1, 3, cr3_saved, >> SVM_CR3_LONG_RESERVED_MASK); >> - vmcb->save.cr4 = cr4_saved; >> vmcb->save.cr3 = cr3_saved; >> - vmcb->save.efer = efer_saved; >> } >> static void test_cr4(void) > > APM says, > > "Reserved Bits. Reserved fields should be cleared to 0 by software when writing CR3." > > If processor allows these bits to be left non-zero, "should be cleared to 0" means it's not mandatory then. I am wondering what this "should be" actually means :-) ! I really tested it, so I guess we “should” not argue about it. ;-) Anyhow, according to APM Figure 5-16 (“Control Register 3 (CR3)-Long Mode”), bits 52:63 are “reserved, MBZ” and others are just marked as “Reserved”. So it seems they are not the same.
On 7/13/20 4:11 PM, Nadav Amit wrote: >> On Jul 13, 2020, at 4:06 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: >> >> >> On 7/12/20 9:39 PM, Nadav Amit wrote: >>> The low CR3 bits are reserved but not MBZ according to tha APM. The >>> tests should therefore not check that they cause failed VM-entry. Tests >>> on bare-metal show they do not. >>> >>> Signed-off-by: Nadav Amit <namit@vmware.com> >>> --- >>> x86/svm.h | 4 +--- >>> x86/svm_tests.c | 26 +------------------------- >>> 2 files changed, 2 insertions(+), 28 deletions(-) >>> >>> diff --git a/x86/svm.h b/x86/svm.h >>> index f8e7429..15e0f18 100644 >>> --- a/x86/svm.h >>> +++ b/x86/svm.h >>> @@ -325,9 +325,7 @@ struct __attribute__ ((__packed__)) vmcb { >>> #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) >>> #define SVM_CR0_RESERVED_MASK 0xffffffff00000000U >>> -#define SVM_CR3_LEGACY_RESERVED_MASK 0xfe7U >>> -#define SVM_CR3_LEGACY_PAE_RESERVED_MASK 0x7U >>> -#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U >>> +#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000000U >>> #define SVM_CR4_LEGACY_RESERVED_MASK 0xff88f000U >>> #define SVM_CR4_RESERVED_MASK 0xffffffffff88f000U >>> #define SVM_DR6_RESERVED_MASK 0xffffffffffff1ff0U >>> diff --git a/x86/svm_tests.c b/x86/svm_tests.c >>> index 3b0d019..1908c7c 100644 >>> --- a/x86/svm_tests.c >>> +++ b/x86/svm_tests.c >>> @@ -2007,38 +2007,14 @@ static void test_cr3(void) >>> { >>> /* >>> * CR3 MBZ bits based on different modes: >>> - * [2:0] - legacy PAE >>> - * [2:0], [11:5] - legacy non-PAE >>> - * [2:0], [11:5], [63:52] - long mode >>> + * [63:52] - long mode >>> */ >>> u64 cr3_saved = vmcb->save.cr3; >>> - u64 cr4_saved = vmcb->save.cr4; >>> - u64 cr4 = cr4_saved; >>> - u64 efer_saved = vmcb->save.efer; >>> - u64 efer = efer_saved; >>> - efer &= ~EFER_LME; >>> - vmcb->save.efer = efer; >>> - cr4 |= X86_CR4_PAE; >>> - vmcb->save.cr4 = cr4; >>> - SVM_TEST_CR_RESERVED_BITS(0, 2, 1, 3, cr3_saved, >>> - SVM_CR3_LEGACY_PAE_RESERVED_MASK); >>> - >>> - cr4 = cr4_saved & ~X86_CR4_PAE; >>> - vmcb->save.cr4 = cr4; >>> - SVM_TEST_CR_RESERVED_BITS(0, 11, 1, 3, cr3_saved, >>> - SVM_CR3_LEGACY_RESERVED_MASK); >>> - >>> - cr4 |= X86_CR4_PAE; >>> - vmcb->save.cr4 = cr4; >>> - efer |= EFER_LME; >>> - vmcb->save.efer = efer; >>> SVM_TEST_CR_RESERVED_BITS(0, 63, 1, 3, cr3_saved, >>> SVM_CR3_LONG_RESERVED_MASK); >>> - vmcb->save.cr4 = cr4_saved; >>> vmcb->save.cr3 = cr3_saved; >>> - vmcb->save.efer = efer_saved; >>> } >>> static void test_cr4(void) >> APM says, >> >> "Reserved Bits. Reserved fields should be cleared to 0 by software when writing CR3." >> >> If processor allows these bits to be left non-zero, "should be cleared to 0" means it's not mandatory then. I am wondering what this "should be" actually means :-) ! > I really tested it, so I guess we “should” not argue about it. ;-) No, I am not arguing over your test results. :-) > > Anyhow, according to APM Figure 5-16 (“Control Register 3 (CR3)-Long Mode”), > bits 52:63 are “reserved, MBZ” and others are just marked as “Reserved”. So > it seems they are not the same. > I am just saying that the APM language "should be cleared to 0" is misleading if the processor doesn't enforce it.
> On Jul 13, 2020, at 4:17 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: > > > On 7/13/20 4:11 PM, Nadav Amit wrote: >>> On Jul 13, 2020, at 4:06 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: >>> >>> >>> On 7/12/20 9:39 PM, Nadav Amit wrote: >>>> The low CR3 bits are reserved but not MBZ according to tha APM. The >>>> tests should therefore not check that they cause failed VM-entry. Tests >>>> on bare-metal show they do not. >>>> >>>> Signed-off-by: Nadav Amit <namit@vmware.com> >>>> --- >>>> x86/svm.h | 4 +--- >>>> x86/svm_tests.c | 26 +------------------------- >>>> 2 files changed, 2 insertions(+), 28 deletions(-) >>>> >>>> diff --git a/x86/svm.h b/x86/svm.h >>>> index f8e7429..15e0f18 100644 >>>> --- a/x86/svm.h >>>> +++ b/x86/svm.h >>>> @@ -325,9 +325,7 @@ struct __attribute__ ((__packed__)) vmcb { >>>> #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) >>>> #define SVM_CR0_RESERVED_MASK 0xffffffff00000000U >>>> -#define SVM_CR3_LEGACY_RESERVED_MASK 0xfe7U >>>> -#define SVM_CR3_LEGACY_PAE_RESERVED_MASK 0x7U >>>> -#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U >>>> +#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000000U >>>> #define SVM_CR4_LEGACY_RESERVED_MASK 0xff88f000U >>>> #define SVM_CR4_RESERVED_MASK 0xffffffffff88f000U >>>> #define SVM_DR6_RESERVED_MASK 0xffffffffffff1ff0U >>>> diff --git a/x86/svm_tests.c b/x86/svm_tests.c >>>> index 3b0d019..1908c7c 100644 >>>> --- a/x86/svm_tests.c >>>> +++ b/x86/svm_tests.c >>>> @@ -2007,38 +2007,14 @@ static void test_cr3(void) >>>> { >>>> /* >>>> * CR3 MBZ bits based on different modes: >>>> - * [2:0] - legacy PAE >>>> - * [2:0], [11:5] - legacy non-PAE >>>> - * [2:0], [11:5], [63:52] - long mode >>>> + * [63:52] - long mode >>>> */ >>>> u64 cr3_saved = vmcb->save.cr3; >>>> - u64 cr4_saved = vmcb->save.cr4; >>>> - u64 cr4 = cr4_saved; >>>> - u64 efer_saved = vmcb->save.efer; >>>> - u64 efer = efer_saved; >>>> - efer &= ~EFER_LME; >>>> - vmcb->save.efer = efer; >>>> - cr4 |= X86_CR4_PAE; >>>> - vmcb->save.cr4 = cr4; >>>> - SVM_TEST_CR_RESERVED_BITS(0, 2, 1, 3, cr3_saved, >>>> - SVM_CR3_LEGACY_PAE_RESERVED_MASK); >>>> - >>>> - cr4 = cr4_saved & ~X86_CR4_PAE; >>>> - vmcb->save.cr4 = cr4; >>>> - SVM_TEST_CR_RESERVED_BITS(0, 11, 1, 3, cr3_saved, >>>> - SVM_CR3_LEGACY_RESERVED_MASK); >>>> - >>>> - cr4 |= X86_CR4_PAE; >>>> - vmcb->save.cr4 = cr4; >>>> - efer |= EFER_LME; >>>> - vmcb->save.efer = efer; >>>> SVM_TEST_CR_RESERVED_BITS(0, 63, 1, 3, cr3_saved, >>>> SVM_CR3_LONG_RESERVED_MASK); >>>> - vmcb->save.cr4 = cr4_saved; >>>> vmcb->save.cr3 = cr3_saved; >>>> - vmcb->save.efer = efer_saved; >>>> } >>>> static void test_cr4(void) >>> APM says, >>> >>> "Reserved Bits. Reserved fields should be cleared to 0 by software when writing CR3." >>> >>> If processor allows these bits to be left non-zero, "should be cleared to 0" means it's not mandatory then. I am wondering what this "should be" actually means :-) ! >> I really tested it, so I guess we “should” not argue about it. ;-) > No, I am not arguing over your test results. :-) >> Anyhow, according to APM Figure 5-16 (“Control Register 3 (CR3)-Long Mode”), >> bits 52:63 are “reserved, MBZ” and others are just marked as “Reserved”. So >> it seems they are not the same. > I am just saying that the APM language "should be cleared to 0" is misleading if the processor doesn't enforce it. Just to ensure I am clear - I am not blaming you in any way. I also found the phrasing confusing. Having said that, if you (or anyone else) reintroduces “positive” tests, in which the VM CR3 is modified to ensure VM-entry succeeds when the reserved non-MBZ bits are set, please ensure the tests fails gracefully. The non-long-mode CR3 tests crashed since the VM page-tables were incompatible with the paging mode. In other words, instead of setting a VMMCALL instruction in the VM to trap immediately after entry, consider clearing the present-bits in the high levels of the NPT; or injecting some exception that would trigger exit during vectoring or something like that. P.S.: If it wasn’t clear, I am not going to fix KVM itself for some obvious reasons.
On 7/13/20 4:30 PM, Nadav Amit wrote: >> On Jul 13, 2020, at 4:17 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: >> >> >> On 7/13/20 4:11 PM, Nadav Amit wrote: >>>> On Jul 13, 2020, at 4:06 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: >>>> >>>> >>>> On 7/12/20 9:39 PM, Nadav Amit wrote: >>>>> The low CR3 bits are reserved but not MBZ according to tha APM. The >>>>> tests should therefore not check that they cause failed VM-entry. Tests >>>>> on bare-metal show they do not. >>>>> >>>>> Signed-off-by: Nadav Amit <namit@vmware.com> >>>>> --- >>>>> x86/svm.h | 4 +--- >>>>> x86/svm_tests.c | 26 +------------------------- >>>>> 2 files changed, 2 insertions(+), 28 deletions(-) >>>>> >>>>> diff --git a/x86/svm.h b/x86/svm.h >>>>> index f8e7429..15e0f18 100644 >>>>> --- a/x86/svm.h >>>>> +++ b/x86/svm.h >>>>> @@ -325,9 +325,7 @@ struct __attribute__ ((__packed__)) vmcb { >>>>> #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) >>>>> #define SVM_CR0_RESERVED_MASK 0xffffffff00000000U >>>>> -#define SVM_CR3_LEGACY_RESERVED_MASK 0xfe7U >>>>> -#define SVM_CR3_LEGACY_PAE_RESERVED_MASK 0x7U >>>>> -#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U >>>>> +#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000000U >>>>> #define SVM_CR4_LEGACY_RESERVED_MASK 0xff88f000U >>>>> #define SVM_CR4_RESERVED_MASK 0xffffffffff88f000U >>>>> #define SVM_DR6_RESERVED_MASK 0xffffffffffff1ff0U >>>>> diff --git a/x86/svm_tests.c b/x86/svm_tests.c >>>>> index 3b0d019..1908c7c 100644 >>>>> --- a/x86/svm_tests.c >>>>> +++ b/x86/svm_tests.c >>>>> @@ -2007,38 +2007,14 @@ static void test_cr3(void) >>>>> { >>>>> /* >>>>> * CR3 MBZ bits based on different modes: >>>>> - * [2:0] - legacy PAE >>>>> - * [2:0], [11:5] - legacy non-PAE >>>>> - * [2:0], [11:5], [63:52] - long mode >>>>> + * [63:52] - long mode >>>>> */ >>>>> u64 cr3_saved = vmcb->save.cr3; >>>>> - u64 cr4_saved = vmcb->save.cr4; >>>>> - u64 cr4 = cr4_saved; >>>>> - u64 efer_saved = vmcb->save.efer; >>>>> - u64 efer = efer_saved; >>>>> - efer &= ~EFER_LME; >>>>> - vmcb->save.efer = efer; >>>>> - cr4 |= X86_CR4_PAE; >>>>> - vmcb->save.cr4 = cr4; >>>>> - SVM_TEST_CR_RESERVED_BITS(0, 2, 1, 3, cr3_saved, >>>>> - SVM_CR3_LEGACY_PAE_RESERVED_MASK); >>>>> - >>>>> - cr4 = cr4_saved & ~X86_CR4_PAE; >>>>> - vmcb->save.cr4 = cr4; >>>>> - SVM_TEST_CR_RESERVED_BITS(0, 11, 1, 3, cr3_saved, >>>>> - SVM_CR3_LEGACY_RESERVED_MASK); >>>>> - >>>>> - cr4 |= X86_CR4_PAE; >>>>> - vmcb->save.cr4 = cr4; >>>>> - efer |= EFER_LME; >>>>> - vmcb->save.efer = efer; >>>>> SVM_TEST_CR_RESERVED_BITS(0, 63, 1, 3, cr3_saved, >>>>> SVM_CR3_LONG_RESERVED_MASK); >>>>> - vmcb->save.cr4 = cr4_saved; >>>>> vmcb->save.cr3 = cr3_saved; >>>>> - vmcb->save.efer = efer_saved; >>>>> } >>>>> static void test_cr4(void) >>>> APM says, >>>> >>>> "Reserved Bits. Reserved fields should be cleared to 0 by software when writing CR3." >>>> >>>> If processor allows these bits to be left non-zero, "should be cleared to 0" means it's not mandatory then. I am wondering what this "should be" actually means :-) ! >>> I really tested it, so I guess we “should” not argue about it. ;-) >> No, I am not arguing over your test results. :-) >>> Anyhow, according to APM Figure 5-16 (“Control Register 3 (CR3)-Long Mode”), >>> bits 52:63 are “reserved, MBZ” and others are just marked as “Reserved”. So >>> it seems they are not the same. >> I am just saying that the APM language "should be cleared to 0" is misleading if the processor doesn't enforce it. > Just to ensure I am clear - I am not blaming you in any way. I also found > the phrasing confusing. > > Having said that, if you (or anyone else) reintroduces “positive” tests, in > which the VM CR3 is modified to ensure VM-entry succeeds when the reserved > non-MBZ bits are set, please ensure the tests fails gracefully. The > non-long-mode CR3 tests crashed since the VM page-tables were incompatible > with the paging mode. > > In other words, instead of setting a VMMCALL instruction in the VM to trap > immediately after entry, consider clearing the present-bits in the high > levels of the NPT; or injecting some exception that would trigger exit > during vectoring or something like that. > > P.S.: If it wasn’t clear, I am not going to fix KVM itself for some obvious > reasons. > I think since the APM is not clear, re-adding any test that tests those bits, is like adding a test with "undefined behavior" to me. Paolo, Should I send a KVM patch to remove checks for those non-MBZ reserved bits ?
> On Jul 15, 2020, at 3:21 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: > > > On 7/13/20 4:30 PM, Nadav Amit wrote: >>> On Jul 13, 2020, at 4:17 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: >>> >>> [snip] >>> I am just saying that the APM language "should be cleared to 0" is misleading if the processor doesn't enforce it. >> Just to ensure I am clear - I am not blaming you in any way. I also found >> the phrasing confusing. >> >> Having said that, if you (or anyone else) reintroduces “positive” tests, in >> which the VM CR3 is modified to ensure VM-entry succeeds when the reserved >> non-MBZ bits are set, please ensure the tests fails gracefully. The >> non-long-mode CR3 tests crashed since the VM page-tables were incompatible >> with the paging mode. >> >> In other words, instead of setting a VMMCALL instruction in the VM to trap >> immediately after entry, consider clearing the present-bits in the high >> levels of the NPT; or injecting some exception that would trigger exit >> during vectoring or something like that. >> >> P.S.: If it wasn’t clear, I am not going to fix KVM itself for some obvious >> reasons. > I think since the APM is not clear, re-adding any test that tests those bits, is like adding a test with "undefined behavior" to me. > > > Paolo, Should I send a KVM patch to remove checks for those non-MBZ reserved bits ? Which non-MBZ reserved bits (other than those that I addressed) do you refer to?
On 7/15/20 3:27 PM, Nadav Amit wrote: >> On Jul 15, 2020, at 3:21 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: >> >> >> On 7/13/20 4:30 PM, Nadav Amit wrote: >>>> On Jul 13, 2020, at 4:17 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: >>>> >>>> > [snip] > >>>> I am just saying that the APM language "should be cleared to 0" is misleading if the processor doesn't enforce it. >>> Just to ensure I am clear - I am not blaming you in any way. I also found >>> the phrasing confusing. >>> >>> Having said that, if you (or anyone else) reintroduces “positive” tests, in >>> which the VM CR3 is modified to ensure VM-entry succeeds when the reserved >>> non-MBZ bits are set, please ensure the tests fails gracefully. The >>> non-long-mode CR3 tests crashed since the VM page-tables were incompatible >>> with the paging mode. >>> >>> In other words, instead of setting a VMMCALL instruction in the VM to trap >>> immediately after entry, consider clearing the present-bits in the high >>> levels of the NPT; or injecting some exception that would trigger exit >>> during vectoring or something like that. >>> >>> P.S.: If it wasn’t clear, I am not going to fix KVM itself for some obvious >>> reasons. >> I think since the APM is not clear, re-adding any test that tests those bits, is like adding a test with "undefined behavior" to me. >> >> >> Paolo, Should I send a KVM patch to remove checks for those non-MBZ reserved bits ? > Which non-MBZ reserved bits (other than those that I addressed) do you refer > to? > I am referring to, "[PATCH 2/3 v4] KVM: nSVM: Check that MBZ bits in CR3 and CR4 are not set on vmrun of nested guests" in which I added the following: +#define MSR_CR3_LEGACY_RESERVED_MASK 0xfe7U +#define MSR_CR3_LEGACY_PAE_RESERVED_MASK 0x7U +#define MSR_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U
> On Jul 15, 2020, at 3:39 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: > > > On 7/15/20 3:27 PM, Nadav Amit wrote: >>> On Jul 15, 2020, at 3:21 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: >>> >>> >>> On 7/13/20 4:30 PM, Nadav Amit wrote: >>>>> On Jul 13, 2020, at 4:17 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: >> [snip] >> >>>>> I am just saying that the APM language "should be cleared to 0" is misleading if the processor doesn't enforce it. >>>> Just to ensure I am clear - I am not blaming you in any way. I also found >>>> the phrasing confusing. >>>> >>>> Having said that, if you (or anyone else) reintroduces “positive” tests, in >>>> which the VM CR3 is modified to ensure VM-entry succeeds when the reserved >>>> non-MBZ bits are set, please ensure the tests fails gracefully. The >>>> non-long-mode CR3 tests crashed since the VM page-tables were incompatible >>>> with the paging mode. >>>> >>>> In other words, instead of setting a VMMCALL instruction in the VM to trap >>>> immediately after entry, consider clearing the present-bits in the high >>>> levels of the NPT; or injecting some exception that would trigger exit >>>> during vectoring or something like that. >>>> >>>> P.S.: If it wasn’t clear, I am not going to fix KVM itself for some obvious >>>> reasons. >>> I think since the APM is not clear, re-adding any test that tests those bits, is like adding a test with "undefined behavior" to me. >>> >>> >>> Paolo, Should I send a KVM patch to remove checks for those non-MBZ reserved bits ? >> Which non-MBZ reserved bits (other than those that I addressed) do you refer >> to? > I am referring to, > > "[PATCH 2/3 v4] KVM: nSVM: Check that MBZ bits in CR3 and CR4 are not set on vmrun of nested guests" > > in which I added the following: > > > +#define MSR_CR3_LEGACY_RESERVED_MASK 0xfe7U > +#define MSR_CR3_LEGACY_PAE_RESERVED_MASK 0x7U > +#define MSR_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U Oh, you refer to KVM, not kvm-unit-tests... That’s out of my scope ;-)
On Wed, Jul 15, 2020 at 3:40 PM Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: > > > On 7/15/20 3:27 PM, Nadav Amit wrote: > >> On Jul 15, 2020, at 3:21 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: > >> > >> > >> On 7/13/20 4:30 PM, Nadav Amit wrote: > >>>> On Jul 13, 2020, at 4:17 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: > >>>> > >>>> > > [snip] > > > >>>> I am just saying that the APM language "should be cleared to 0" is misleading if the processor doesn't enforce it. > >>> Just to ensure I am clear - I am not blaming you in any way. I also found > >>> the phrasing confusing. > >>> > >>> Having said that, if you (or anyone else) reintroduces “positive” tests, in > >>> which the VM CR3 is modified to ensure VM-entry succeeds when the reserved > >>> non-MBZ bits are set, please ensure the tests fails gracefully. The > >>> non-long-mode CR3 tests crashed since the VM page-tables were incompatible > >>> with the paging mode. > >>> > >>> In other words, instead of setting a VMMCALL instruction in the VM to trap > >>> immediately after entry, consider clearing the present-bits in the high > >>> levels of the NPT; or injecting some exception that would trigger exit > >>> during vectoring or something like that. > >>> > >>> P.S.: If it wasn’t clear, I am not going to fix KVM itself for some obvious > >>> reasons. > >> I think since the APM is not clear, re-adding any test that tests those bits, is like adding a test with "undefined behavior" to me. > >> > >> > >> Paolo, Should I send a KVM patch to remove checks for those non-MBZ reserved bits ? > > Which non-MBZ reserved bits (other than those that I addressed) do you refer > > to? > > > I am referring to, > > "[PATCH 2/3 v4] KVM: nSVM: Check that MBZ bits in CR3 and CR4 are > not set on vmrun of nested guests" > > in which I added the following: > > > +#define MSR_CR3_LEGACY_RESERVED_MASK 0xfe7U > +#define MSR_CR3_LEGACY_PAE_RESERVED_MASK 0x7U > +#define MSR_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U In my experience, the APM generally distinguishes between "reserved" and "reserved, MBZ." The low bits you have indicated for CR3 are marked only as "reserved" in Figures 3-4, 3-5, and 3-6 of the APM, volume 2. Only bits 63:52 are marked as "reserved, MBZ." (In fact, Figure 3-6 of the May 2020 version of the APM, revision 3.35, also calls out bits 11:0 as the PCID when CR4.PCIDE is set.) Of course, you could always test the behavior. :-)
On 13/07/20 06:39, Nadav Amit wrote: > The low CR3 bits are reserved but not MBZ according to tha APM. The > tests should therefore not check that they cause failed VM-entry. Tests > on bare-metal show they do not. > > Signed-off-by: Nadav Amit <namit@vmware.com> > --- > x86/svm.h | 4 +--- > x86/svm_tests.c | 26 +------------------------- > 2 files changed, 2 insertions(+), 28 deletions(-) > > diff --git a/x86/svm.h b/x86/svm.h > index f8e7429..15e0f18 100644 > --- a/x86/svm.h > +++ b/x86/svm.h > @@ -325,9 +325,7 @@ struct __attribute__ ((__packed__)) vmcb { > #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) > > #define SVM_CR0_RESERVED_MASK 0xffffffff00000000U > -#define SVM_CR3_LEGACY_RESERVED_MASK 0xfe7U > -#define SVM_CR3_LEGACY_PAE_RESERVED_MASK 0x7U > -#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U > +#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000000U > #define SVM_CR4_LEGACY_RESERVED_MASK 0xff88f000U > #define SVM_CR4_RESERVED_MASK 0xffffffffff88f000U > #define SVM_DR6_RESERVED_MASK 0xffffffffffff1ff0U > diff --git a/x86/svm_tests.c b/x86/svm_tests.c > index 3b0d019..1908c7c 100644 > --- a/x86/svm_tests.c > +++ b/x86/svm_tests.c > @@ -2007,38 +2007,14 @@ static void test_cr3(void) > { > /* > * CR3 MBZ bits based on different modes: > - * [2:0] - legacy PAE > - * [2:0], [11:5] - legacy non-PAE > - * [2:0], [11:5], [63:52] - long mode > + * [63:52] - long mode > */ > u64 cr3_saved = vmcb->save.cr3; > - u64 cr4_saved = vmcb->save.cr4; > - u64 cr4 = cr4_saved; > - u64 efer_saved = vmcb->save.efer; > - u64 efer = efer_saved; > > - efer &= ~EFER_LME; > - vmcb->save.efer = efer; > - cr4 |= X86_CR4_PAE; > - vmcb->save.cr4 = cr4; > - SVM_TEST_CR_RESERVED_BITS(0, 2, 1, 3, cr3_saved, > - SVM_CR3_LEGACY_PAE_RESERVED_MASK); > - > - cr4 = cr4_saved & ~X86_CR4_PAE; > - vmcb->save.cr4 = cr4; > - SVM_TEST_CR_RESERVED_BITS(0, 11, 1, 3, cr3_saved, > - SVM_CR3_LEGACY_RESERVED_MASK); > - > - cr4 |= X86_CR4_PAE; > - vmcb->save.cr4 = cr4; > - efer |= EFER_LME; > - vmcb->save.efer = efer; > SVM_TEST_CR_RESERVED_BITS(0, 63, 1, 3, cr3_saved, > SVM_CR3_LONG_RESERVED_MASK); > > - vmcb->save.cr4 = cr4_saved; > vmcb->save.cr3 = cr3_saved; > - vmcb->save.efer = efer_saved; > } > > static void test_cr4(void) > Queued, thanks. Paolo
On 16/07/20 00:21, Krish Sadhukhan wrote: > Paolo, Should I send a KVM patch to remove checks for those non-MBZ > reserved bits ? Yes, please. Paolo
On 7/15/20 4:12 PM, Jim Mattson wrote: > On Wed, Jul 15, 2020 at 3:40 PM Krish Sadhukhan > <krish.sadhukhan@oracle.com> wrote: >> >> On 7/15/20 3:27 PM, Nadav Amit wrote: >>>> On Jul 15, 2020, at 3:21 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: >>>> >>>> >>>> On 7/13/20 4:30 PM, Nadav Amit wrote: >>>>>> On Jul 13, 2020, at 4:17 PM, Krish Sadhukhan <krish.sadhukhan@oracle.com> wrote: >>>>>> >>>>>> >>> [snip] >>> >>>>>> I am just saying that the APM language "should be cleared to 0" is misleading if the processor doesn't enforce it. >>>>> Just to ensure I am clear - I am not blaming you in any way. I also found >>>>> the phrasing confusing. >>>>> >>>>> Having said that, if you (or anyone else) reintroduces “positive” tests, in >>>>> which the VM CR3 is modified to ensure VM-entry succeeds when the reserved >>>>> non-MBZ bits are set, please ensure the tests fails gracefully. The >>>>> non-long-mode CR3 tests crashed since the VM page-tables were incompatible >>>>> with the paging mode. >>>>> >>>>> In other words, instead of setting a VMMCALL instruction in the VM to trap >>>>> immediately after entry, consider clearing the present-bits in the high >>>>> levels of the NPT; or injecting some exception that would trigger exit >>>>> during vectoring or something like that. >>>>> >>>>> P.S.: If it wasn’t clear, I am not going to fix KVM itself for some obvious >>>>> reasons. >>>> I think since the APM is not clear, re-adding any test that tests those bits, is like adding a test with "undefined behavior" to me. >>>> >>>> >>>> Paolo, Should I send a KVM patch to remove checks for those non-MBZ reserved bits ? >>> Which non-MBZ reserved bits (other than those that I addressed) do you refer >>> to? >>> >> I am referring to, >> >> "[PATCH 2/3 v4] KVM: nSVM: Check that MBZ bits in CR3 and CR4 are >> not set on vmrun of nested guests" >> >> in which I added the following: >> >> >> +#define MSR_CR3_LEGACY_RESERVED_MASK 0xfe7U >> +#define MSR_CR3_LEGACY_PAE_RESERVED_MASK 0x7U >> +#define MSR_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U > In my experience, the APM generally distinguishes between "reserved" > and "reserved, MBZ." The low bits you have indicated for CR3 are > marked only as "reserved" in Figures 3-4, 3-5, and 3-6 of the APM, > volume 2. Only bits 63:52 are marked as "reserved, MBZ." (In fact, > Figure 3-6 of the May 2020 version of the APM, revision 3.35, also > calls out bits 11:0 as the PCID when CR4.PCIDE is set.) > > Of course, you could always test the behavior. :-) I did some experiments on the processor behavior on an Epyc 2 system via KVM: 1. MBZ bits: VMRUN passes even if these bits are set to 1 and guest is exiting with exit code of SVM_EXIT_VMMCALL. According to the APM, this settting should constitute an invalid guest state and hence I should get and exit code of SVM_EXIT_ERR. There's no KVM check in place for these CR3 bits, so the check is all done in hardware. 2. non-MBZ reserved bits: Based on Nadav Amit's suggestion, I set the 'not present' bit in an upper level NPT in order to trigger an NPF and I did get an exit code of SVM_EXIT_NPF when I set any of these bits. I am hoping that the processor has done the consistency check before it tripped on NPF and not the other way around, so that our test is useful : In PAE-legacy and non-PAE-legacy modes, the guest doesn't exit with SVM_EXIT_VMMCALL when these bits are set to 0. I am not sure if I am missing any special setting for the PAE-legacy and non-PAE-legacy modes. In long-mode, however, the processor seems to behave as per APM, i.e., guest exits with SVM_EXIT_VMMCALL when these bits are set to 0.
On 05/08/20 01:13, Krish Sadhukhan wrote: >> > > I did some experiments on the processor behavior on an Epyc 2 system via > KVM: > >   1. MBZ bits: VMRUN passes even if these bits are set to 1 and > guest is exiting with exit code of         SVM_EXIT_VMMCALL. > According to the APM, this settting should constitute an invalid guest > state and hence I should get and exit code of SVM_EXIT_ERR. There's no > KVM check in place for these CR3 bits, so the check is all done in > hardware. > >   2. non-MBZ reserved bits: Based on Nadav Amit's suggestion, I set > the 'not present' bit in an upper level NPT in order to trigger an NPF > and I did get an exit code of SVM_EXIT_NPF when I set any of these bits. > I am hoping that the processor has done the consistency check before it > tripped on NPF and not the other way around, so that our test is useful : > >    In PAE-legacy and non-PAE-legacy modes, the guest doesn't exit > with SVM_EXIT_VMMCALL when these bits are set to 0. I am not sure if I > am missing any special setting for the PAE-legacy and non-PAE-legacy > modes. In long-mode, however, the processor seems to behave as per APM, > i.e., guest exits with SVM_EXIT_VMMCALL when these bits are set to 0. Are you going to send patches for this? Thanks, Paolo
On 8/17/20 11:38 PM, Paolo Bonzini wrote: > On 05/08/20 01:13, Krish Sadhukhan wrote: >> I did some experiments on the processor behavior on an Epyc 2 system via >> KVM: >> >>   1. MBZ bits: VMRUN passes even if these bits are set to 1 and >> guest is exiting with exit code of         SVM_EXIT_VMMCALL. >> According to the APM, this settting should constitute an invalid guest >> state and hence I should get and exit code of SVM_EXIT_ERR. There's no >> KVM check in place for these CR3 bits, so the check is all done in >> hardware. >> >>   2. non-MBZ reserved bits: Based on Nadav Amit's suggestion, I set >> the 'not present' bit in an upper level NPT in order to trigger an NPF >> and I did get an exit code of SVM_EXIT_NPF when I set any of these bits. >> I am hoping that the processor has done the consistency check before it >> tripped on NPF and not the other way around, so that our test is useful : >> >>    In PAE-legacy and non-PAE-legacy modes, the guest doesn't exit >> with SVM_EXIT_VMMCALL when these bits are set to 0. I am not sure if I >> am missing any special setting for the PAE-legacy and non-PAE-legacy >> modes. In long-mode, however, the processor seems to behave as per APM, >> i.e., guest exits with SVM_EXIT_VMMCALL when these bits are set to 0. > Are you going to send patches for this? Yes, I am working on it. I need to complete some more investigation. > > Thanks, > > Paolo >
On 8/18/20 11:25 AM, Krish Sadhukhan wrote: > > On 8/17/20 11:38 PM, Paolo Bonzini wrote: >> On 05/08/20 01:13, Krish Sadhukhan wrote: >>> I did some experiments on the processor behavior on an Epyc 2 system >>> via >>> KVM: >>> >>>   1. MBZ bits: VMRUN passes even if these bits are set to 1 and >>> guest is exiting with exit code of         SVM_EXIT_VMMCALL. >>> According to the APM, this settting should constitute an invalid guest >>> state and hence I should get and exit code of SVM_EXIT_ERR. There's no >>> KVM check in place for these CR3 bits, so the check is all done in >>> hardware. >>> >>>   2. non-MBZ reserved bits: Based on Nadav Amit's suggestion, I >>> set >>> the 'not present' bit in an upper level NPT in order to trigger an NPF >>> and I did get an exit code of SVM_EXIT_NPF when I set any of these >>> bits. >>> I am hoping that the processor has done the consistency check before it >>> tripped on NPF and not the other way around, so that our test is >>> useful : >>> >>>    In PAE-legacy and non-PAE-legacy modes, the guest doesn't exit >>> with SVM_EXIT_VMMCALL when these bits are set to 0. I am not sure if I >>> am missing any special setting for the PAE-legacy and non-PAE-legacy >>> modes. In long-mode, however, the processor seems to behave as per APM, >>> i.e., guest exits with SVM_EXIT_VMMCALL when these bits are set to 0. >> Are you going to send patches for this? > > > Yes, I am working on it. I need to complete some more investigation. I have sent out a patch for testing the non-MBZ reserved bits in long mode. I haven't been able to find a reliable way to test the non-MBZ reserved bits in legacy (PAE and non-PAE) modes. In long mode if I set any MBZ bit and an in valid NPT entry, I get VMEXIT_INVALID before VMEXIT_NPF. But I am not sure if this same method of testing is working when a non-MBZ reserved bit is set. It seems that consistency checking is not enforced on these low-order reserved bits. My goal is to get past the consistency checking phase and then trigger a VMEXIT_NPF during translation of guest pages in NPT. I created a 3-level page table for legacy PAE mode (as per APM) and tried VMRUN with a non-MBZ reserved bit set, I am getting VMEXIT_NPF but the EXITINFO1 field contains the nested guest's CR3. So I am not entirely sure if I have gotten past the consistency checking phase. If there's a better way to test these bits, please let me know. >> >> Thanks, >> >> Paolo >>
diff --git a/x86/svm.h b/x86/svm.h index f8e7429..15e0f18 100644 --- a/x86/svm.h +++ b/x86/svm.h @@ -325,9 +325,7 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) #define SVM_CR0_RESERVED_MASK 0xffffffff00000000U -#define SVM_CR3_LEGACY_RESERVED_MASK 0xfe7U -#define SVM_CR3_LEGACY_PAE_RESERVED_MASK 0x7U -#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U +#define SVM_CR3_LONG_RESERVED_MASK 0xfff0000000000000U #define SVM_CR4_LEGACY_RESERVED_MASK 0xff88f000U #define SVM_CR4_RESERVED_MASK 0xffffffffff88f000U #define SVM_DR6_RESERVED_MASK 0xffffffffffff1ff0U diff --git a/x86/svm_tests.c b/x86/svm_tests.c index 3b0d019..1908c7c 100644 --- a/x86/svm_tests.c +++ b/x86/svm_tests.c @@ -2007,38 +2007,14 @@ static void test_cr3(void) { /* * CR3 MBZ bits based on different modes: - * [2:0] - legacy PAE - * [2:0], [11:5] - legacy non-PAE - * [2:0], [11:5], [63:52] - long mode + * [63:52] - long mode */ u64 cr3_saved = vmcb->save.cr3; - u64 cr4_saved = vmcb->save.cr4; - u64 cr4 = cr4_saved; - u64 efer_saved = vmcb->save.efer; - u64 efer = efer_saved; - efer &= ~EFER_LME; - vmcb->save.efer = efer; - cr4 |= X86_CR4_PAE; - vmcb->save.cr4 = cr4; - SVM_TEST_CR_RESERVED_BITS(0, 2, 1, 3, cr3_saved, - SVM_CR3_LEGACY_PAE_RESERVED_MASK); - - cr4 = cr4_saved & ~X86_CR4_PAE; - vmcb->save.cr4 = cr4; - SVM_TEST_CR_RESERVED_BITS(0, 11, 1, 3, cr3_saved, - SVM_CR3_LEGACY_RESERVED_MASK); - - cr4 |= X86_CR4_PAE; - vmcb->save.cr4 = cr4; - efer |= EFER_LME; - vmcb->save.efer = efer; SVM_TEST_CR_RESERVED_BITS(0, 63, 1, 3, cr3_saved, SVM_CR3_LONG_RESERVED_MASK); - vmcb->save.cr4 = cr4_saved; vmcb->save.cr3 = cr3_saved; - vmcb->save.efer = efer_saved; } static void test_cr4(void)
The low CR3 bits are reserved but not MBZ according to tha APM. The tests should therefore not check that they cause failed VM-entry. Tests on bare-metal show they do not. Signed-off-by: Nadav Amit <namit@vmware.com> --- x86/svm.h | 4 +--- x86/svm_tests.c | 26 +------------------------- 2 files changed, 2 insertions(+), 28 deletions(-)