diff mbox series

[2/2] KVM: nVMX: trace nested VM-Enter failures detected by H/W

Message ID 20190308231645.25402-3-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: nVMX: add tracepoints for nested VM-Enter failures | expand

Commit Message

Sean Christopherson March 8, 2019, 11:16 p.m. UTC
Use the recently added tracepoint for logging nested VM-Enter failures
instead of spamming the kernel log when hardware detects a consistency
check failure.  Take the opportunity to print the name of the error code
instead of dumping the raw hex number, but limit the symbol table to
error codes that can reasonably be encountered by KVM.

Add an equivalent tracepoint in nested_vmx_check_vmentry_hw() so that
"invalid control field" errors aren't suppressed when nested early
checks are enabled.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/include/asm/vmx.h | 14 ++++++++++++++
 arch/x86/kvm/trace.h       |  9 ++++++---
 arch/x86/kvm/vmx/nested.c  | 14 +++++++++-----
 3 files changed, 29 insertions(+), 8 deletions(-)

Comments

Jim Mattson March 11, 2019, 9:42 p.m. UTC | #1
On Fri, Mar 8, 2019 at 3:16 PM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> Use the recently added tracepoint for logging nested VM-Enter failures
> instead of spamming the kernel log when hardware detects a consistency
> check failure.  Take the opportunity to print the name of the error code
> instead of dumping the raw hex number, but limit the symbol table to
> error codes that can reasonably be encountered by KVM.
>
> Add an equivalent tracepoint in nested_vmx_check_vmentry_hw() so that
> "invalid control field" errors aren't suppressed when nested early
> checks are enabled.
>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> ---
>  arch/x86/include/asm/vmx.h | 14 ++++++++++++++
>  arch/x86/kvm/trace.h       |  9 ++++++---
>  arch/x86/kvm/vmx/nested.c  | 14 +++++++++-----
>  3 files changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 4e4133e86484..038ab715307f 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -575,6 +575,20 @@ enum vm_instruction_error_number {
>         VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28,
>  };
>
> +/*
> + * VM-instruction errors that can be encountered on VM-Enter, used to trace
> + * nested VM-Enter failures reported by hardware.  Errors unique to VM-Enter
> + * from a SMI Transfer Monitor are not included as things have gone seriously
> + * sideways if we get one of those...
> + */
> +#define VMX_VMENTER_INSTRUCTION_ERRORS \
> +       { VMXERR_VMLAUNCH_NONCLEAR_VMCS,                "VMLAUNCH_NONCLEAR_VMCS" }, \
> +       { VMXERR_VMRESUME_NONLAUNCHED_VMCS,             "VMRESUME_NONLAUNCHED_VMCS" }, \
> +       { VMXERR_VMRESUME_AFTER_VMXOFF,                 "VMRESUME_AFTER_VMXOFF" }, \
> +       { VMXERR_ENTRY_INVALID_CONTROL_FIELD,           "VMENTRY_INVALID_CONTROL_FIELD" }, \
> +       { VMXERR_ENTRY_INVALID_HOST_STATE_FIELD,        "VMENTRY_INVALID_HOST_STATE_FIELD" }, \
> +       { VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS,        "VMENTRY_EVENTS_BLOCKED_BY_MOV_SS" }
> +
>  enum vmx_l1d_flush_state {
>         VMENTER_L1D_FLUSH_AUTO,
>         VMENTER_L1D_FLUSH_NEVER,
> diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
> index 166bea7e2e92..06eff8978810 100644
> --- a/arch/x86/kvm/trace.h
> +++ b/arch/x86/kvm/trace.h
> @@ -1467,18 +1467,21 @@ TRACE_EVENT(kvm_hv_send_ipi_ex,
>   * Tracepoint for failed nested VMX VM-Enter.
>   */
>  TRACE_EVENT(kvm_nested_vmenter_failed,
> -       TP_PROTO(const char *msg),
> -       TP_ARGS(msg),
> +       TP_PROTO(const char *msg, u32 err),
> +       TP_ARGS(msg, err),
>
>         TP_STRUCT__entry(
>                 __field(const char *, msg)
> +               __field(u32, err)
>         ),
>
>         TP_fast_assign(
>                 __entry->msg = msg;
> +               __entry->err = err;
>         ),
>
> -       TP_printk("%s", __entry->msg)
> +       TP_printk("%s%s", __entry->msg, !__entry->err ? "" :
> +               __print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
>  );
>
>  #endif /* _TRACE_KVM_H */
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 34cdbe86e99b..1a0ae908ac1a 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -23,7 +23,7 @@ module_param(nested_early_check, bool, S_IRUGO);
>  ({                                                                     \
>         bool failed = (consistency_check);                              \
>         if (failed)                                                     \
> -               trace_kvm_nested_vmenter_failed(#consistency_check);    \
> +               trace_kvm_nested_vmenter_failed(#consistency_check, 0); \
>         failed;                                                         \
>  })
>
> @@ -2719,6 +2719,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
>         struct vcpu_vmx *vmx = to_vmx(vcpu);
>         unsigned long cr3, cr4;
>         bool vm_fail;
> +       u32 vmxerr;
>
>         if (!nested_early_check)
>                 return 0;
> @@ -2790,8 +2791,10 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
>                 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
>
>         if (vm_fail) {
> -               WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
> -                            VMXERR_ENTRY_INVALID_CONTROL_FIELD);
> +               vmxerr = vmcs_read32(VM_INSTRUCTION_ERROR);
> +               trace_kvm_nested_vmenter_failed(
> +                       "early hardware check VM-instruction error: ", vmxerr);
> +               WARN_ON_ONCE(vmxerr != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
>                 return 1;
>         }
>
> @@ -5061,8 +5064,9 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
>                 return false;
>
>         if (unlikely(vmx->fail)) {
> -               pr_info_ratelimited("%s failed vm entry %x\n", __func__,
> -                                   vmcs_read32(VM_INSTRUCTION_ERROR));

I *love* the tracing, but I don't think we want to turn it on for
production. Can we keep the pr_info_ratelimited for when tracing is
disabled?

> +               trace_kvm_nested_vmenter_failed(
> +                       "hardware VM-instruction error: ",
> +                       vmcs_read32(VM_INSTRUCTION_ERROR));
>                 return true;
>         }
>
> --
> 2.21.0
>
Sean Christopherson March 14, 2019, 6:45 p.m. UTC | #2
On Mon, Mar 11, 2019 at 02:42:06PM -0700, Jim Mattson wrote:
> On Fri, Mar 8, 2019 at 3:16 PM Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
> >
> > Use the recently added tracepoint for logging nested VM-Enter failures
> > instead of spamming the kernel log when hardware detects a consistency
> > check failure.  Take the opportunity to print the name of the error code
> > instead of dumping the raw hex number, but limit the symbol table to
> > error codes that can reasonably be encountered by KVM.
> >
> > Add an equivalent tracepoint in nested_vmx_check_vmentry_hw() so that
> > "invalid control field" errors aren't suppressed when nested early
> > checks are enabled.
> >
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>

...

> > @@ -5061,8 +5064,9 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
> >                 return false;
> >
> >         if (unlikely(vmx->fail)) {
> > -               pr_info_ratelimited("%s failed vm entry %x\n", __func__,
> > -                                   vmcs_read32(VM_INSTRUCTION_ERROR));
> 
> I *love* the tracing, but I don't think we want to turn it on for
> production. Can we keep the pr_info_ratelimited for when tracing is
> disabled?

Could we drop it to pr_debug_ratelimited()?  Say "no" if it's at all
inconvenient to use debug instead of info.  The printing is nothing
more than a minor annoyance when I'm running unit tests, i.e. any kind
of actual use case trumps my partiality for a clean kernel log.

> 
> > +               trace_kvm_nested_vmenter_failed(
> > +                       "hardware VM-instruction error: ",
> > +                       vmcs_read32(VM_INSTRUCTION_ERROR));
> >                 return true;
> >         }
> >
> > --
> > 2.21.0
> >
Paolo Bonzini March 15, 2019, 6:15 p.m. UTC | #3
On 14/03/19 19:45, Sean Christopherson wrote:
>>>         if (unlikely(vmx->fail)) {
>>> -               pr_info_ratelimited("%s failed vm entry %x\n", __func__,
>>> -                                   vmcs_read32(VM_INSTRUCTION_ERROR));
>> I *love* the tracing, but I don't think we want to turn it on for
>> production. Can we keep the pr_info_ratelimited for when tracing is
>> disabled?
> Could we drop it to pr_debug_ratelimited()?  Say "no" if it's at all
> inconvenient to use debug instead of info.  The printing is nothing
> more than a minor annoyance when I'm running unit tests, i.e. any kind
> of actual use case trumps my partiality for a clean kernel log.
> 

I agree, it should be pr_debug_ratelimited (if anything).

Paolo
Jim Mattson March 15, 2019, 7:53 p.m. UTC | #4
On Fri, Mar 15, 2019 at 11:15 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 14/03/19 19:45, Sean Christopherson wrote:
> >>>         if (unlikely(vmx->fail)) {
> >>> -               pr_info_ratelimited("%s failed vm entry %x\n", __func__,
> >>> -                                   vmcs_read32(VM_INSTRUCTION_ERROR));
> >> I *love* the tracing, but I don't think we want to turn it on for
> >> production. Can we keep the pr_info_ratelimited for when tracing is
> >> disabled?
> > Could we drop it to pr_debug_ratelimited()?  Say "no" if it's at all
> > inconvenient to use debug instead of info.  The printing is nothing
> > more than a minor annoyance when I'm running unit tests, i.e. any kind
> > of actual use case trumps my partiality for a clean kernel log.
> >
>
> I agree, it should be pr_debug_ratelimited (if anything).

Let's go ahead and drop it, then.
Paolo Bonzini March 18, 2019, 9:47 a.m. UTC | #5
On 15/03/19 20:53, Jim Mattson wrote:
> On Fri, Mar 15, 2019 at 11:15 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>> On 14/03/19 19:45, Sean Christopherson wrote:
>>>>>         if (unlikely(vmx->fail)) {
>>>>> -               pr_info_ratelimited("%s failed vm entry %x\n", __func__,
>>>>> -                                   vmcs_read32(VM_INSTRUCTION_ERROR));
>>>> I *love* the tracing, but I don't think we want to turn it on for
>>>> production. Can we keep the pr_info_ratelimited for when tracing is
>>>> disabled?
>>> Could we drop it to pr_debug_ratelimited()?  Say "no" if it's at all
>>> inconvenient to use debug instead of info.  The printing is nothing
>>> more than a minor annoyance when I'm running unit tests, i.e. any kind
>>> of actual use case trumps my partiality for a clean kernel log.
>>>
>>
>> I agree, it should be pr_debug_ratelimited (if anything).
> 
> Let's go ahead and drop it, then.

I'll add at least a vcpu_stat.

Paolo
Sean Christopherson June 19, 2019, 7:47 p.m. UTC | #6
On Mon, Mar 18, 2019 at 10:47:03AM +0100, Paolo Bonzini wrote:
> On 15/03/19 20:53, Jim Mattson wrote:
> > On Fri, Mar 15, 2019 at 11:15 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
> >>
> >> On 14/03/19 19:45, Sean Christopherson wrote:
> >>>>>         if (unlikely(vmx->fail)) {
> >>>>> -               pr_info_ratelimited("%s failed vm entry %x\n", __func__,
> >>>>> -                                   vmcs_read32(VM_INSTRUCTION_ERROR));
> >>>> I *love* the tracing, but I don't think we want to turn it on for
> >>>> production. Can we keep the pr_info_ratelimited for when tracing is
> >>>> disabled?
> >>> Could we drop it to pr_debug_ratelimited()?  Say "no" if it's at all
> >>> inconvenient to use debug instead of info.  The printing is nothing
> >>> more than a minor annoyance when I'm running unit tests, i.e. any kind
> >>> of actual use case trumps my partiality for a clean kernel log.
> >>>
> >>
> >> I agree, it should be pr_debug_ratelimited (if anything).
> > 
> > Let's go ahead and drop it, then.
> 
> I'll add at least a vcpu_stat.

Paolo, is this one of those series that you need to Google, or are you
waiting on me for something? :-D
Paolo Bonzini July 2, 2019, 4:35 p.m. UTC | #7
On 19/06/19 21:47, Sean Christopherson wrote:
> are you waiting on me for something? :-D

Can you please rebase the patches?

Thanks,

Paolo
diff mbox series

Patch

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 4e4133e86484..038ab715307f 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -575,6 +575,20 @@  enum vm_instruction_error_number {
 	VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28,
 };
 
+/*
+ * VM-instruction errors that can be encountered on VM-Enter, used to trace
+ * nested VM-Enter failures reported by hardware.  Errors unique to VM-Enter
+ * from a SMI Transfer Monitor are not included as things have gone seriously
+ * sideways if we get one of those...
+ */
+#define VMX_VMENTER_INSTRUCTION_ERRORS \
+	{ VMXERR_VMLAUNCH_NONCLEAR_VMCS,		"VMLAUNCH_NONCLEAR_VMCS" }, \
+	{ VMXERR_VMRESUME_NONLAUNCHED_VMCS,		"VMRESUME_NONLAUNCHED_VMCS" }, \
+	{ VMXERR_VMRESUME_AFTER_VMXOFF,			"VMRESUME_AFTER_VMXOFF" }, \
+	{ VMXERR_ENTRY_INVALID_CONTROL_FIELD,		"VMENTRY_INVALID_CONTROL_FIELD" }, \
+	{ VMXERR_ENTRY_INVALID_HOST_STATE_FIELD,	"VMENTRY_INVALID_HOST_STATE_FIELD" }, \
+	{ VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS,	"VMENTRY_EVENTS_BLOCKED_BY_MOV_SS" }
+
 enum vmx_l1d_flush_state {
 	VMENTER_L1D_FLUSH_AUTO,
 	VMENTER_L1D_FLUSH_NEVER,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 166bea7e2e92..06eff8978810 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1467,18 +1467,21 @@  TRACE_EVENT(kvm_hv_send_ipi_ex,
  * Tracepoint for failed nested VMX VM-Enter.
  */
 TRACE_EVENT(kvm_nested_vmenter_failed,
-	TP_PROTO(const char *msg),
-	TP_ARGS(msg),
+	TP_PROTO(const char *msg, u32 err),
+	TP_ARGS(msg, err),
 
 	TP_STRUCT__entry(
 		__field(const char *, msg)
+		__field(u32, err)
 	),
 
 	TP_fast_assign(
 		__entry->msg = msg;
+		__entry->err = err;
 	),
 
-	TP_printk("%s", __entry->msg)
+	TP_printk("%s%s", __entry->msg, !__entry->err ? "" :
+		__print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
 );
 
 #endif /* _TRACE_KVM_H */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 34cdbe86e99b..1a0ae908ac1a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -23,7 +23,7 @@  module_param(nested_early_check, bool, S_IRUGO);
 ({									\
 	bool failed = (consistency_check);				\
 	if (failed)							\
-		trace_kvm_nested_vmenter_failed(#consistency_check);	\
+		trace_kvm_nested_vmenter_failed(#consistency_check, 0);	\
 	failed;								\
 })
 
@@ -2719,6 +2719,7 @@  static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long cr3, cr4;
 	bool vm_fail;
+	u32 vmxerr;
 
 	if (!nested_early_check)
 		return 0;
@@ -2790,8 +2791,10 @@  static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
 
 	if (vm_fail) {
-		WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
-			     VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+		vmxerr = vmcs_read32(VM_INSTRUCTION_ERROR);
+		trace_kvm_nested_vmenter_failed(
+			"early hardware check VM-instruction error: ", vmxerr);
+		WARN_ON_ONCE(vmxerr != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
 	}
 
@@ -5061,8 +5064,9 @@  bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 		return false;
 
 	if (unlikely(vmx->fail)) {
-		pr_info_ratelimited("%s failed vm entry %x\n", __func__,
-				    vmcs_read32(VM_INSTRUCTION_ERROR));
+		trace_kvm_nested_vmenter_failed(
+			"hardware VM-instruction error: ",
+			vmcs_read32(VM_INSTRUCTION_ERROR));
 		return true;
 	}