diff mbox

[4/4] SVM: clean up svm_vmcb_isvalid()

Message ID 592E8B74020000780015DFCC@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich May 31, 2017, 7:23 a.m. UTC
- correct CR3 and CR4 checks
- add vcpu parameter (to include in log messages) and constify vmcb one
- use bool/true/false
- use accessors
- adjust formatting

Signed-off-by: Jan Beulich <jbeulich@suse.com>
SVM: clean up svm_vmcb_isvalid()

- correct CR3 and CR4 checks
- add vcpu parameter (to include in log messages) and constify vmcb one
- use bool/true/false
- use accessors
- adjust formatting

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -658,13 +658,13 @@ static int nsvm_vmcb_prepare4vmrun(struc
     /* Cleanbits */
     n2vmcb->cleanbits.bytes = 0;
 
-    rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
+    rc = svm_vmcb_isvalid(__func__, ns_vmcb, v, true);
     if (rc) {
         gdprintk(XENLOG_ERR, "virtual vmcb invalid\n");
         return NSVM_ERROR_VVMCB;
     }
 
-    rc = svm_vmcb_isvalid(__func__, n2vmcb, 1);
+    rc = svm_vmcb_isvalid(__func__, n2vmcb, v, true);
     if (rc) {
         gdprintk(XENLOG_ERR, "n2vmcb invalid\n");
         return NSVM_ERROR_VMENTRY;
--- a/xen/arch/x86/hvm/svm/svmdebug.c
+++ b/xen/arch/x86/hvm/svm/svmdebug.c
@@ -16,6 +16,7 @@
  *
  */
 
+#include <xen/sched.h>
 #include <asm/processor.h>
 #include <asm/msr-index.h>
 #include <asm/hvm/svm/svmdebug.h>
@@ -87,93 +88,76 @@ void svm_vmcb_dump(const char *from, con
     svm_dump_sel("  TR", &vmcb->tr);
 }
 
-bool_t
-svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
-                 bool_t verbose)
+bool svm_vmcb_isvalid(const char *from, const struct vmcb_struct *vmcb,
+                      const struct vcpu *v, bool verbose)
 {
-    bool_t ret = 0; /* ok */
+    bool ret = false; /* ok */
 
-#define PRINTF(...) \
-    if (verbose) { ret = 1; printk("%s: ", from); printk(__VA_ARGS__); \
-    } else return 1;
+#define PRINTF(fmt, args...) do { \
+    if ( !verbose ) return true; \
+    ret = true; \
+    printk(XENLOG_GUEST "%pv[%s]: " fmt, v, from, ## args); \
+} while (0)
 
-    if ((vmcb->_efer & EFER_SVME) == 0) {
-        PRINTF("EFER: SVME bit not set (%#"PRIx64")\n", vmcb->_efer);
-    }
+    if ( !(vmcb_get_efer(vmcb) & EFER_SVME) )
+        PRINTF("EFER: SVME bit not set (%#"PRIx64")\n", vmcb_get_efer(vmcb));
 
-    if ((vmcb->_cr0 & X86_CR0_CD) == 0 && (vmcb->_cr0 & X86_CR0_NW) != 0) {
+    if ( !(vmcb_get_cr0(vmcb) & X86_CR0_CD) && (vmcb_get_cr0(vmcb) & X86_CR0_NW) )
         PRINTF("CR0: CD bit is zero and NW bit set (%#"PRIx64")\n",
-                vmcb->_cr0);
-    }
+                vmcb_get_cr0(vmcb));
 
-    if ((vmcb->_cr0 >> 32U) != 0) {
+    if ( vmcb_get_cr0(vmcb) >> 32 )
         PRINTF("CR0: bits [63:32] are not zero (%#"PRIx64")\n",
-                vmcb->_cr0);
-    }
+                vmcb_get_cr0(vmcb));
 
-    if ((vmcb->_cr3 & 0x7) != 0) {
-        PRINTF("CR3: MBZ bits are set (%#"PRIx64")\n", vmcb->_cr3);
-    }
-    if ((vmcb->_efer & EFER_LMA) && (vmcb->_cr3 & 0xfe) != 0) {
-        PRINTF("CR3: MBZ bits are set (%#"PRIx64")\n", vmcb->_cr3);
-    }
+    if ( (vmcb_get_cr3(vmcb) & 0x7) ||
+         ((!(vmcb_get_cr4(vmcb) & X86_CR4_PAE) ||
+           (vmcb_get_efer(vmcb) & EFER_LMA)) &&
+          (vmcb_get_cr3(vmcb) & 0xfe0)) ||
+         ((vmcb_get_efer(vmcb) & EFER_LMA) &&
+          (vmcb_get_cr3(vmcb) >> v->domain->arch.cpuid->extd.maxphysaddr)) )
+        PRINTF("CR3: MBZ bits are set (%#"PRIx64")\n", vmcb_get_cr3(vmcb));
 
-    if ((vmcb->_cr4 >> 19U) != 0) {
-        PRINTF("CR4: bits [63:19] are not zero (%#"PRIx64")\n",
-                vmcb->_cr4);
-    }
-
-    if (((vmcb->_cr4 >> 11U) & 0x7fU) != 0) {
-        PRINTF("CR4: bits [17:11] are not zero (%#"PRIx64")\n",
-                vmcb->_cr4);
-    }
+    if ( vmcb_get_cr4(vmcb) & ~hvm_cr4_guest_valid_bits(v, false) )
+        PRINTF("CR4: MBZ bits are set (%#"PRIx64")\n", vmcb_get_cr4(vmcb));
 
-    if ((vmcb->_dr6 >> 32U) != 0) {
+    if ( vmcb_get_dr6(vmcb) >> 32 )
         PRINTF("DR6: bits [63:32] are not zero (%#"PRIx64")\n",
-                vmcb->_dr6);
-    }
+                vmcb_get_dr6(vmcb));
 
-    if ((vmcb->_dr7 >> 32U) != 0) {
+    if ( vmcb_get_dr7(vmcb) >> 32 )
         PRINTF("DR7: bits [63:32] are not zero (%#"PRIx64")\n",
-                vmcb->_dr7);
-    }
+                vmcb_get_dr7(vmcb));
 
-    if ((vmcb->_efer >> 15U) != 0) {
+    if ( vmcb_get_efer(vmcb) >> 15U )
         PRINTF("EFER: bits [63:15] are not zero (%#"PRIx64")\n",
-                vmcb->_efer);
-    }
-
-    if ((vmcb->_efer & EFER_LME) != 0 && ((vmcb->_cr0 & X86_CR0_PG) != 0)) {
-        if ((vmcb->_cr4 & X86_CR4_PAE) == 0) {
-            PRINTF("EFER_LME and CR0.PG are both set and CR4.PAE is zero.\n");
-        }
-        if ((vmcb->_cr0 & X86_CR0_PE) == 0) {
-            PRINTF("EFER_LME and CR0.PG are both set and CR0.PE is zero.\n");
-        }
-    }
+                vmcb_get_efer(vmcb));
 
-    if ((vmcb->_efer & EFER_LME) != 0
-        && (vmcb->_cr0 & X86_CR0_PG) != 0
-        && (vmcb->_cr4 & X86_CR4_PAE) != 0
-        && (vmcb->cs.attr.fields.l != 0)
-        && (vmcb->cs.attr.fields.db != 0))
+    if ( (vmcb_get_efer(vmcb) & EFER_LME) && (vmcb_get_cr0(vmcb) & X86_CR0_PG) )
     {
-        PRINTF("EFER_LME, CR0.PG, CR4.PAE, CS.L and CS.D are all non-zero.\n");
+        if ( !(vmcb_get_cr4(vmcb) & X86_CR4_PAE) )
+            PRINTF("EFER_LME and CR0.PG are both set and CR4.PAE is zero\n");
+        if ( !(vmcb_get_cr0(vmcb) & X86_CR0_PE) )
+            PRINTF("EFER_LME and CR0.PG are both set and CR0.PE is zero\n");
     }
 
-    if ((vmcb->_general2_intercepts & GENERAL2_INTERCEPT_VMRUN) == 0) {
+    if ( (vmcb_get_efer(vmcb) & EFER_LME) &&
+         (vmcb_get_cr0(vmcb) & X86_CR0_PG) &&
+         (vmcb_get_cr4(vmcb) & X86_CR4_PAE) &&
+         vmcb->cs.attr.fields.l &&
+         vmcb->cs.attr.fields.db )
+        PRINTF("EFER_LME, CR0.PG, CR4.PAE, CS.L and CS.D are all non-zero\n");
+
+    if ( !(vmcb_get_general2_intercepts(vmcb) & GENERAL2_INTERCEPT_VMRUN) )
         PRINTF("GENERAL2_INTERCEPT: VMRUN intercept bit is clear (%#"PRIx32")\n",
-            vmcb->_general2_intercepts);
-    }
+            vmcb_get_general2_intercepts(vmcb));
 
-    if (vmcb->eventinj.fields.resvd1 != 0) {
+    if ( vmcb->eventinj.fields.resvd1 )
         PRINTF("eventinj: MBZ bits are set (%#"PRIx64")\n",
                 vmcb->eventinj.bytes);
-    }
 
-    if (vmcb->_np_enable && vmcb->_h_cr3 == 0) {
+    if ( vmcb_get_np_enable(vmcb) && !vmcb_get_h_cr3(vmcb) )
         PRINTF("nested paging enabled but host cr3 is 0\n");
-    }
 
 #undef PRINTF
     return ret;
--- a/xen/include/asm-x86/hvm/svm/svmdebug.h
+++ b/xen/include/asm-x86/hvm/svm/svmdebug.h
@@ -23,7 +23,7 @@
 #include <asm/hvm/svm/vmcb.h>
 
 void svm_vmcb_dump(const char *from, const struct vmcb_struct *vmcb);
-bool_t svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
-                        bool_t verbose);
+bool svm_vmcb_isvalid(const char *from, const struct vmcb_struct *vmcb,
+                      const struct vcpu *v, bool verbose);
 
 #endif /* __ASM_X86_HVM_SVM_SVMDEBUG_H__ */

Comments

Andrew Cooper May 31, 2017, 12:14 p.m. UTC | #1
On 31/05/17 08:23, Jan Beulich wrote:
> - correct CR3 and CR4 checks
> - add vcpu parameter (to include in log messages) and constify vmcb one
> - use bool/true/false
> - use accessors
> - adjust formatting
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/hvm/svm/nestedsvm.c
> +++ b/xen/arch/x86/hvm/svm/nestedsvm.c
> @@ -658,13 +658,13 @@ static int nsvm_vmcb_prepare4vmrun(struc
>      /* Cleanbits */
>      n2vmcb->cleanbits.bytes = 0;
>  
> -    rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
> +    rc = svm_vmcb_isvalid(__func__, ns_vmcb, v, true);
>      if (rc) {
>          gdprintk(XENLOG_ERR, "virtual vmcb invalid\n");
>          return NSVM_ERROR_VVMCB;
>      }
>  
> -    rc = svm_vmcb_isvalid(__func__, n2vmcb, 1);
> +    rc = svm_vmcb_isvalid(__func__, n2vmcb, v, true);

As these are the only two callsites, I don't think the __func__ or
verbose parameters are useful.  I'd just drop them.

>      if (rc) {
>          gdprintk(XENLOG_ERR, "n2vmcb invalid\n");
>          return NSVM_ERROR_VMENTRY;
> --- a/xen/arch/x86/hvm/svm/svmdebug.c
> +++ b/xen/arch/x86/hvm/svm/svmdebug.c
> @@ -16,6 +16,7 @@
>   *
>   */
>  
> +#include <xen/sched.h>
>  #include <asm/processor.h>
>  #include <asm/msr-index.h>
>  #include <asm/hvm/svm/svmdebug.h>
> @@ -87,93 +88,76 @@ void svm_vmcb_dump(const char *from, con
>      svm_dump_sel("  TR", &vmcb->tr);
>  }
>  
> -bool_t
> -svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
> -                 bool_t verbose)
> +bool svm_vmcb_isvalid(const char *from, const struct vmcb_struct *vmcb,
> +                      const struct vcpu *v, bool verbose)
>  {
> -    bool_t ret = 0; /* ok */
> +    bool ret = false; /* ok */
>  
> -#define PRINTF(...) \
> -    if (verbose) { ret = 1; printk("%s: ", from); printk(__VA_ARGS__); \
> -    } else return 1;
> +#define PRINTF(fmt, args...) do { \
> +    if ( !verbose ) return true; \
> +    ret = true; \
> +    printk(XENLOG_GUEST "%pv[%s]: " fmt, v, from, ## args); \
> +} while (0)
>  
> -    if ((vmcb->_efer & EFER_SVME) == 0) {
> -        PRINTF("EFER: SVME bit not set (%#"PRIx64")\n", vmcb->_efer);
> -    }
> +    if ( !(vmcb_get_efer(vmcb) & EFER_SVME) )
> +        PRINTF("EFER: SVME bit not set (%#"PRIx64")\n", vmcb_get_efer(vmcb));
>  
> -    if ((vmcb->_cr0 & X86_CR0_CD) == 0 && (vmcb->_cr0 & X86_CR0_NW) != 0) {
> +    if ( !(vmcb_get_cr0(vmcb) & X86_CR0_CD) && (vmcb_get_cr0(vmcb) & X86_CR0_NW) )
>          PRINTF("CR0: CD bit is zero and NW bit set (%#"PRIx64")\n",
> -                vmcb->_cr0);
> -    }
> +                vmcb_get_cr0(vmcb));
>  
> -    if ((vmcb->_cr0 >> 32U) != 0) {
> +    if ( vmcb_get_cr0(vmcb) >> 32 )
>          PRINTF("CR0: bits [63:32] are not zero (%#"PRIx64")\n",
> -                vmcb->_cr0);
> -    }
> +                vmcb_get_cr0(vmcb));
>  
> -    if ((vmcb->_cr3 & 0x7) != 0) {
> -        PRINTF("CR3: MBZ bits are set (%#"PRIx64")\n", vmcb->_cr3);
> -    }
> -    if ((vmcb->_efer & EFER_LMA) && (vmcb->_cr3 & 0xfe) != 0) {
> -        PRINTF("CR3: MBZ bits are set (%#"PRIx64")\n", vmcb->_cr3);
> -    }
> +    if ( (vmcb_get_cr3(vmcb) & 0x7) ||
> +         ((!(vmcb_get_cr4(vmcb) & X86_CR4_PAE) ||
> +           (vmcb_get_efer(vmcb) & EFER_LMA)) &&
> +          (vmcb_get_cr3(vmcb) & 0xfe0)) ||
> +         ((vmcb_get_efer(vmcb) & EFER_LMA) &&
> +          (vmcb_get_cr3(vmcb) >> v->domain->arch.cpuid->extd.maxphysaddr)) )
> +        PRINTF("CR3: MBZ bits are set (%#"PRIx64")\n", vmcb_get_cr3(vmcb));

Is any of this correct if CR0.PG is clear?  It was my understanding that
outside of paged mode, all bits are software available.

This is certainly the behaviour of hvm_set_cr3() (which itself has
further knock-on bugs resulting in vmentry failures, due to insufficient
CR3 checks when enabling CR0.PG)

>  
> -    if ((vmcb->_cr4 >> 19U) != 0) {
> -        PRINTF("CR4: bits [63:19] are not zero (%#"PRIx64")\n",
> -                vmcb->_cr4);
> -    }
> -
> -    if (((vmcb->_cr4 >> 11U) & 0x7fU) != 0) {
> -        PRINTF("CR4: bits [17:11] are not zero (%#"PRIx64")\n",
> -                vmcb->_cr4);
> -    }
> +    if ( vmcb_get_cr4(vmcb) & ~hvm_cr4_guest_valid_bits(v, false) )
> +        PRINTF("CR4: MBZ bits are set (%#"PRIx64")\n", vmcb_get_cr4(vmcb));

This isn't quite MBZ bits.  It also includes bits disallowed by the
chosen CPUID policy.  I'd suggest "CR4: Invalid bits are set
(%#"PRIx64", valid: %#"PRIx64")\n" and print both values.

>  
> -    if ((vmcb->_dr6 >> 32U) != 0) {
> +    if ( vmcb_get_dr6(vmcb) >> 32 )
>          PRINTF("DR6: bits [63:32] are not zero (%#"PRIx64")\n",
> -                vmcb->_dr6);
> -    }
> +                vmcb_get_dr6(vmcb));
>  
> -    if ((vmcb->_dr7 >> 32U) != 0) {
> +    if ( vmcb_get_dr7(vmcb) >> 32 )
>          PRINTF("DR7: bits [63:32] are not zero (%#"PRIx64")\n",
> -                vmcb->_dr7);
> -    }
> +                vmcb_get_dr7(vmcb));
>  
> -    if ((vmcb->_efer >> 15U) != 0) {
> +    if ( vmcb_get_efer(vmcb) >> 15U )
>          PRINTF("EFER: bits [63:15] are not zero (%#"PRIx64")\n",
> -                vmcb->_efer);
> -    }

I don't see any justification for this particular check (even before
your modification).  The manual states "Any MBZ bit of EFER is set", so
I'd recommend using hvm_efer_valid() here, which also subsumes some of
the other checks.

> -
> -    if ((vmcb->_efer & EFER_LME) != 0 && ((vmcb->_cr0 & X86_CR0_PG) != 0)) {
> -        if ((vmcb->_cr4 & X86_CR4_PAE) == 0) {
> -            PRINTF("EFER_LME and CR0.PG are both set and CR4.PAE is zero.\n");
> -        }
> -        if ((vmcb->_cr0 & X86_CR0_PE) == 0) {
> -            PRINTF("EFER_LME and CR0.PG are both set and CR0.PE is zero.\n");
> -        }
> -    }
> +                vmcb_get_efer(vmcb));
>  
> -    if ((vmcb->_efer & EFER_LME) != 0
> -        && (vmcb->_cr0 & X86_CR0_PG) != 0
> -        && (vmcb->_cr4 & X86_CR4_PAE) != 0
> -        && (vmcb->cs.attr.fields.l != 0)
> -        && (vmcb->cs.attr.fields.db != 0))
> +    if ( (vmcb_get_efer(vmcb) & EFER_LME) && (vmcb_get_cr0(vmcb) & X86_CR0_PG) )
>      {
> -        PRINTF("EFER_LME, CR0.PG, CR4.PAE, CS.L and CS.D are all non-zero.\n");
> +        if ( !(vmcb_get_cr4(vmcb) & X86_CR4_PAE) )
> +            PRINTF("EFER_LME and CR0.PG are both set and CR4.PAE is zero\n");
> +        if ( !(vmcb_get_cr0(vmcb) & X86_CR0_PE) )
> +            PRINTF("EFER_LME and CR0.PG are both set and CR0.PE is zero\n");
>      }
>  
> -    if ((vmcb->_general2_intercepts & GENERAL2_INTERCEPT_VMRUN) == 0) {
> +    if ( (vmcb_get_efer(vmcb) & EFER_LME) &&
> +         (vmcb_get_cr0(vmcb) & X86_CR0_PG) &&
> +         (vmcb_get_cr4(vmcb) & X86_CR4_PAE) &&
> +         vmcb->cs.attr.fields.l &&
> +         vmcb->cs.attr.fields.db )
> +        PRINTF("EFER_LME, CR0.PG, CR4.PAE, CS.L and CS.D are all non-zero\n");
> +
> +    if ( !(vmcb_get_general2_intercepts(vmcb) & GENERAL2_INTERCEPT_VMRUN) )
>          PRINTF("GENERAL2_INTERCEPT: VMRUN intercept bit is clear (%#"PRIx32")\n",
> -            vmcb->_general2_intercepts);
> -    }
> +            vmcb_get_general2_intercepts(vmcb));
>  
> -    if (vmcb->eventinj.fields.resvd1 != 0) {
> +    if ( vmcb->eventinj.fields.resvd1 )
>          PRINTF("eventinj: MBZ bits are set (%#"PRIx64")\n",
>                  vmcb->eventinj.bytes);
> -    }
>  
> -    if (vmcb->_np_enable && vmcb->_h_cr3 == 0) {
> +    if ( vmcb_get_np_enable(vmcb) && !vmcb_get_h_cr3(vmcb) )
>          PRINTF("nested paging enabled but host cr3 is 0\n");

I also can't see anything in the manual about this being invalid.

The only relevant restriction I can spot is nested paging is not
permitted if host paging is disabled.  A host cr3 value of 0 can
legitimately be used for paging suitable PTEs are written into mfn0.

~Andrew
Boris Ostrovsky May 31, 2017, 2:50 p.m. UTC | #2
On 05/31/2017 08:14 AM, Andrew Cooper wrote:
> On 31/05/17 08:23, Jan Beulich wrote:
>> - correct CR3 and CR4 checks
>> - add vcpu parameter (to include in log messages) and constify vmcb one
>> - use bool/true/false
>> - use accessors
>> - adjust formatting
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>
>> --- a/xen/arch/x86/hvm/svm/nestedsvm.c
>> +++ b/xen/arch/x86/hvm/svm/nestedsvm.c
>> @@ -658,13 +658,13 @@ static int nsvm_vmcb_prepare4vmrun(struc
>>      /* Cleanbits */
>>      n2vmcb->cleanbits.bytes = 0;
>>  
>> -    rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
>> +    rc = svm_vmcb_isvalid(__func__, ns_vmcb, v, true);
>>      if (rc) {
>>          gdprintk(XENLOG_ERR, "virtual vmcb invalid\n");
>>          return NSVM_ERROR_VVMCB;
>>      }
>>  
>> -    rc = svm_vmcb_isvalid(__func__, n2vmcb, 1);
>> +    rc = svm_vmcb_isvalid(__func__, n2vmcb, v, true);
> As these are the only two callsites, I don't think the __func__ or
> verbose parameters are useful.  I'd just drop them.

I actually think keeping this is useful. We indeed have only two
invocations but someone debugging a problem may want to add a few more.

-boris
Andrew Cooper May 31, 2017, 3:32 p.m. UTC | #3
On 31/05/17 15:50, Boris Ostrovsky wrote:
> On 05/31/2017 08:14 AM, Andrew Cooper wrote:
>> On 31/05/17 08:23, Jan Beulich wrote:
>>> - correct CR3 and CR4 checks
>>> - add vcpu parameter (to include in log messages) and constify vmcb one
>>> - use bool/true/false
>>> - use accessors
>>> - adjust formatting
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>
>>> --- a/xen/arch/x86/hvm/svm/nestedsvm.c
>>> +++ b/xen/arch/x86/hvm/svm/nestedsvm.c
>>> @@ -658,13 +658,13 @@ static int nsvm_vmcb_prepare4vmrun(struc
>>>      /* Cleanbits */
>>>      n2vmcb->cleanbits.bytes = 0;
>>>  
>>> -    rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
>>> +    rc = svm_vmcb_isvalid(__func__, ns_vmcb, v, true);
>>>      if (rc) {
>>>          gdprintk(XENLOG_ERR, "virtual vmcb invalid\n");
>>>          return NSVM_ERROR_VVMCB;
>>>      }
>>>  
>>> -    rc = svm_vmcb_isvalid(__func__, n2vmcb, 1);
>>> +    rc = svm_vmcb_isvalid(__func__, n2vmcb, v, true);
>> As these are the only two callsites, I don't think the __func__ or
>> verbose parameters are useful.  I'd just drop them.
> I actually think keeping this is useful. We indeed have only two
> invocations but someone debugging a problem may want to add a few more.

Why?  Its clear where it is being called from by the following "$FOO
invalid" log message.

~Andrew
Boris Ostrovsky May 31, 2017, 3:44 p.m. UTC | #4
On 05/31/2017 11:32 AM, Andrew Cooper wrote:
> On 31/05/17 15:50, Boris Ostrovsky wrote:
>> On 05/31/2017 08:14 AM, Andrew Cooper wrote:
>>> On 31/05/17 08:23, Jan Beulich wrote:
>>>> - correct CR3 and CR4 checks
>>>> - add vcpu parameter (to include in log messages) and constify vmcb one
>>>> - use bool/true/false
>>>> - use accessors
>>>> - adjust formatting
>>>>
>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>>
>>>> --- a/xen/arch/x86/hvm/svm/nestedsvm.c
>>>> +++ b/xen/arch/x86/hvm/svm/nestedsvm.c
>>>> @@ -658,13 +658,13 @@ static int nsvm_vmcb_prepare4vmrun(struc
>>>>      /* Cleanbits */
>>>>      n2vmcb->cleanbits.bytes = 0;
>>>>  
>>>> -    rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
>>>> +    rc = svm_vmcb_isvalid(__func__, ns_vmcb, v, true);
>>>>      if (rc) {
>>>>          gdprintk(XENLOG_ERR, "virtual vmcb invalid\n");
>>>>          return NSVM_ERROR_VVMCB;
>>>>      }
>>>>  
>>>> -    rc = svm_vmcb_isvalid(__func__, n2vmcb, 1);
>>>> +    rc = svm_vmcb_isvalid(__func__, n2vmcb, v, true);
>>> As these are the only two callsites, I don't think the __func__ or
>>> verbose parameters are useful.  I'd just drop them.
>> I actually think keeping this is useful. We indeed have only two
>> invocations but someone debugging a problem may want to add a few more.
> Why?  Its clear where it is being called from by the following "$FOO
> invalid" log message.

You won't have to print anything at the call site if this is kept. (I am
not suggesting to get rid of existing printks, this would only be useful
for one-off debugging).

-boris
Jan Beulich June 1, 2017, 1:06 p.m. UTC | #5
>>> On 31.05.17 at 14:14, <andrew.cooper3@citrix.com> wrote:
> On 31/05/17 08:23, Jan Beulich wrote:
>> -    if ((vmcb->_cr3 & 0x7) != 0) {
>> -        PRINTF("CR3: MBZ bits are set (%#"PRIx64")\n", vmcb->_cr3);
>> -    }
>> -    if ((vmcb->_efer & EFER_LMA) && (vmcb->_cr3 & 0xfe) != 0) {
>> -        PRINTF("CR3: MBZ bits are set (%#"PRIx64")\n", vmcb->_cr3);
>> -    }
>> +    if ( (vmcb_get_cr3(vmcb) & 0x7) ||
>> +         ((!(vmcb_get_cr4(vmcb) & X86_CR4_PAE) ||
>> +           (vmcb_get_efer(vmcb) & EFER_LMA)) &&
>> +          (vmcb_get_cr3(vmcb) & 0xfe0)) ||
>> +         ((vmcb_get_efer(vmcb) & EFER_LMA) &&
>> +          (vmcb_get_cr3(vmcb) >> v->domain->arch.cpuid->extd.maxphysaddr)) )
>> +        PRINTF("CR3: MBZ bits are set (%#"PRIx64")\n", vmcb_get_cr3(vmcb));
> 
> Is any of this correct if CR0.PG is clear?  It was my understanding that
> outside of paged mode, all bits are software available.
> 
> This is certainly the behaviour of hvm_set_cr3() (which itself has
> further knock-on bugs resulting in vmentry failures, due to insufficient
> CR3 checks when enabling CR0.PG)

I've changed it, but I'm not entirely convinced this is a good idea
for the case when someone means to use this for adhoc
debugging, as generally it is a hint of a problem if any of these
fail.

>> -    if ((vmcb->_efer >> 15U) != 0) {
>> +    if ( vmcb_get_efer(vmcb) >> 15U )
>>          PRINTF("EFER: bits [63:15] are not zero (%#"PRIx64")\n",
>> -                vmcb->_efer);
>> -    }
> 
> I don't see any justification for this particular check (even before
> your modification).  The manual states "Any MBZ bit of EFER is set", so
> I'd recommend using hvm_efer_valid() here, which also subsumes some of
> the other checks.

I can certainly add a call to hvm_efer_valid(), but that won't replace
the existing check, as that function only checks known bits and
ignores all others. Perhaps we could (should) change that, but not
in this patch. I've tightened the existing check though to check for
all undefined bits, not just those upwards from bit 15.

>> -    if (vmcb->_np_enable && vmcb->_h_cr3 == 0) {
>> +    if ( vmcb_get_np_enable(vmcb) && !vmcb_get_h_cr3(vmcb) )
>>          PRINTF("nested paging enabled but host cr3 is 0\n");
> 
> I also can't see anything in the manual about this being invalid.
> 
> The only relevant restriction I can spot is nested paging is not
> permitted if host paging is disabled.  A host cr3 value of 0 can
> legitimately be used for paging suitable PTEs are written into mfn0.

There's no h_cr0 field, so it's not clear to me how host paging
state could be determined (or how one could even talk of it
being disabled). I also couldn't find the statement you say you
have spotted. So best I can do is simply delete this check.

Jan
diff mbox

Patch

--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -658,13 +658,13 @@  static int nsvm_vmcb_prepare4vmrun(struc
     /* Cleanbits */
     n2vmcb->cleanbits.bytes = 0;
 
-    rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
+    rc = svm_vmcb_isvalid(__func__, ns_vmcb, v, true);
     if (rc) {
         gdprintk(XENLOG_ERR, "virtual vmcb invalid\n");
         return NSVM_ERROR_VVMCB;
     }
 
-    rc = svm_vmcb_isvalid(__func__, n2vmcb, 1);
+    rc = svm_vmcb_isvalid(__func__, n2vmcb, v, true);
     if (rc) {
         gdprintk(XENLOG_ERR, "n2vmcb invalid\n");
         return NSVM_ERROR_VMENTRY;
--- a/xen/arch/x86/hvm/svm/svmdebug.c
+++ b/xen/arch/x86/hvm/svm/svmdebug.c
@@ -16,6 +16,7 @@ 
  *
  */
 
+#include <xen/sched.h>
 #include <asm/processor.h>
 #include <asm/msr-index.h>
 #include <asm/hvm/svm/svmdebug.h>
@@ -87,93 +88,76 @@  void svm_vmcb_dump(const char *from, con
     svm_dump_sel("  TR", &vmcb->tr);
 }
 
-bool_t
-svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
-                 bool_t verbose)
+bool svm_vmcb_isvalid(const char *from, const struct vmcb_struct *vmcb,
+                      const struct vcpu *v, bool verbose)
 {
-    bool_t ret = 0; /* ok */
+    bool ret = false; /* ok */
 
-#define PRINTF(...) \
-    if (verbose) { ret = 1; printk("%s: ", from); printk(__VA_ARGS__); \
-    } else return 1;
+#define PRINTF(fmt, args...) do { \
+    if ( !verbose ) return true; \
+    ret = true; \
+    printk(XENLOG_GUEST "%pv[%s]: " fmt, v, from, ## args); \
+} while (0)
 
-    if ((vmcb->_efer & EFER_SVME) == 0) {
-        PRINTF("EFER: SVME bit not set (%#"PRIx64")\n", vmcb->_efer);
-    }
+    if ( !(vmcb_get_efer(vmcb) & EFER_SVME) )
+        PRINTF("EFER: SVME bit not set (%#"PRIx64")\n", vmcb_get_efer(vmcb));
 
-    if ((vmcb->_cr0 & X86_CR0_CD) == 0 && (vmcb->_cr0 & X86_CR0_NW) != 0) {
+    if ( !(vmcb_get_cr0(vmcb) & X86_CR0_CD) && (vmcb_get_cr0(vmcb) & X86_CR0_NW) )
         PRINTF("CR0: CD bit is zero and NW bit set (%#"PRIx64")\n",
-                vmcb->_cr0);
-    }
+                vmcb_get_cr0(vmcb));
 
-    if ((vmcb->_cr0 >> 32U) != 0) {
+    if ( vmcb_get_cr0(vmcb) >> 32 )
         PRINTF("CR0: bits [63:32] are not zero (%#"PRIx64")\n",
-                vmcb->_cr0);
-    }
+                vmcb_get_cr0(vmcb));
 
-    if ((vmcb->_cr3 & 0x7) != 0) {
-        PRINTF("CR3: MBZ bits are set (%#"PRIx64")\n", vmcb->_cr3);
-    }
-    if ((vmcb->_efer & EFER_LMA) && (vmcb->_cr3 & 0xfe) != 0) {
-        PRINTF("CR3: MBZ bits are set (%#"PRIx64")\n", vmcb->_cr3);
-    }
+    if ( (vmcb_get_cr3(vmcb) & 0x7) ||
+         ((!(vmcb_get_cr4(vmcb) & X86_CR4_PAE) ||
+           (vmcb_get_efer(vmcb) & EFER_LMA)) &&
+          (vmcb_get_cr3(vmcb) & 0xfe0)) ||
+         ((vmcb_get_efer(vmcb) & EFER_LMA) &&
+          (vmcb_get_cr3(vmcb) >> v->domain->arch.cpuid->extd.maxphysaddr)) )
+        PRINTF("CR3: MBZ bits are set (%#"PRIx64")\n", vmcb_get_cr3(vmcb));
 
-    if ((vmcb->_cr4 >> 19U) != 0) {
-        PRINTF("CR4: bits [63:19] are not zero (%#"PRIx64")\n",
-                vmcb->_cr4);
-    }
-
-    if (((vmcb->_cr4 >> 11U) & 0x7fU) != 0) {
-        PRINTF("CR4: bits [17:11] are not zero (%#"PRIx64")\n",
-                vmcb->_cr4);
-    }
+    if ( vmcb_get_cr4(vmcb) & ~hvm_cr4_guest_valid_bits(v, false) )
+        PRINTF("CR4: MBZ bits are set (%#"PRIx64")\n", vmcb_get_cr4(vmcb));
 
-    if ((vmcb->_dr6 >> 32U) != 0) {
+    if ( vmcb_get_dr6(vmcb) >> 32 )
         PRINTF("DR6: bits [63:32] are not zero (%#"PRIx64")\n",
-                vmcb->_dr6);
-    }
+                vmcb_get_dr6(vmcb));
 
-    if ((vmcb->_dr7 >> 32U) != 0) {
+    if ( vmcb_get_dr7(vmcb) >> 32 )
         PRINTF("DR7: bits [63:32] are not zero (%#"PRIx64")\n",
-                vmcb->_dr7);
-    }
+                vmcb_get_dr7(vmcb));
 
-    if ((vmcb->_efer >> 15U) != 0) {
+    if ( vmcb_get_efer(vmcb) >> 15U )
         PRINTF("EFER: bits [63:15] are not zero (%#"PRIx64")\n",
-                vmcb->_efer);
-    }
-
-    if ((vmcb->_efer & EFER_LME) != 0 && ((vmcb->_cr0 & X86_CR0_PG) != 0)) {
-        if ((vmcb->_cr4 & X86_CR4_PAE) == 0) {
-            PRINTF("EFER_LME and CR0.PG are both set and CR4.PAE is zero.\n");
-        }
-        if ((vmcb->_cr0 & X86_CR0_PE) == 0) {
-            PRINTF("EFER_LME and CR0.PG are both set and CR0.PE is zero.\n");
-        }
-    }
+                vmcb_get_efer(vmcb));
 
-    if ((vmcb->_efer & EFER_LME) != 0
-        && (vmcb->_cr0 & X86_CR0_PG) != 0
-        && (vmcb->_cr4 & X86_CR4_PAE) != 0
-        && (vmcb->cs.attr.fields.l != 0)
-        && (vmcb->cs.attr.fields.db != 0))
+    if ( (vmcb_get_efer(vmcb) & EFER_LME) && (vmcb_get_cr0(vmcb) & X86_CR0_PG) )
     {
-        PRINTF("EFER_LME, CR0.PG, CR4.PAE, CS.L and CS.D are all non-zero.\n");
+        if ( !(vmcb_get_cr4(vmcb) & X86_CR4_PAE) )
+            PRINTF("EFER_LME and CR0.PG are both set and CR4.PAE is zero\n");
+        if ( !(vmcb_get_cr0(vmcb) & X86_CR0_PE) )
+            PRINTF("EFER_LME and CR0.PG are both set and CR0.PE is zero\n");
     }
 
-    if ((vmcb->_general2_intercepts & GENERAL2_INTERCEPT_VMRUN) == 0) {
+    if ( (vmcb_get_efer(vmcb) & EFER_LME) &&
+         (vmcb_get_cr0(vmcb) & X86_CR0_PG) &&
+         (vmcb_get_cr4(vmcb) & X86_CR4_PAE) &&
+         vmcb->cs.attr.fields.l &&
+         vmcb->cs.attr.fields.db )
+        PRINTF("EFER_LME, CR0.PG, CR4.PAE, CS.L and CS.D are all non-zero\n");
+
+    if ( !(vmcb_get_general2_intercepts(vmcb) & GENERAL2_INTERCEPT_VMRUN) )
         PRINTF("GENERAL2_INTERCEPT: VMRUN intercept bit is clear (%#"PRIx32")\n",
-            vmcb->_general2_intercepts);
-    }
+            vmcb_get_general2_intercepts(vmcb));
 
-    if (vmcb->eventinj.fields.resvd1 != 0) {
+    if ( vmcb->eventinj.fields.resvd1 )
         PRINTF("eventinj: MBZ bits are set (%#"PRIx64")\n",
                 vmcb->eventinj.bytes);
-    }
 
-    if (vmcb->_np_enable && vmcb->_h_cr3 == 0) {
+    if ( vmcb_get_np_enable(vmcb) && !vmcb_get_h_cr3(vmcb) )
         PRINTF("nested paging enabled but host cr3 is 0\n");
-    }
 
 #undef PRINTF
     return ret;
--- a/xen/include/asm-x86/hvm/svm/svmdebug.h
+++ b/xen/include/asm-x86/hvm/svm/svmdebug.h
@@ -23,7 +23,7 @@ 
 #include <asm/hvm/svm/vmcb.h>
 
 void svm_vmcb_dump(const char *from, const struct vmcb_struct *vmcb);
-bool_t svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
-                        bool_t verbose);
+bool svm_vmcb_isvalid(const char *from, const struct vmcb_struct *vmcb,
+                      const struct vcpu *v, bool verbose);
 
 #endif /* __ASM_X86_HVM_SVM_SVMDEBUG_H__ */