diff mbox

[Qemu-devel] E5-2620v2 - emulation stop error

Message ID 20150331164539.GD14262@potion.brq.redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Radim Krčmář March 31, 2015, 4:45 p.m. UTC
2015-03-31 17:56+0300, Andrey Korolyov:
> > Chasing the culprit this way could take a long time, so a new tracepoint
> > that shows if 0xef is set on entry would let us guess the bug faster ...
> >
> > Please provide a failing trace with the following patch:
>
> Thanks, please see below:
> 
> http://xdel.ru/downloads/kvm-e5v2-issue/new-tracepoint-fail-with-apicv.dat.gz

 qemu-system-x86-4022  [006]  255.915978:
  kvm_entry:            vcpu 0
  kvm_emulate_insn:     f0000:d275: ea 7a d2 00 f0
  kvm_emulate_insn:     f0000:d27a: 2e 0f 01 1e f0 6c
  kvm_emulate_insn:     f0000:d280: 31 c0
  kvm_emulate_insn:     f0000:d282: 8e e0
  kvm_emulate_insn:     f0000:d284: 8e e8
  kvm_emulate_insn:     f0000:d286: 8e c0
  kvm_emulate_insn:     f0000:d288: 8e d8
  kvm_emulate_insn:     f0000:d28a: 8e d0
  kvm_entry:            vcpu 0
  kvm_0xef:             irr clear, isr clear, vmcs 0x0
  kvm_exit:             reason EPT_VIOLATION rip 0x8dd0 info 184 0
  kvm_page_fault:       address f8dd0 error_code 184
  kvm_entry:            vcpu 0
  kvm_0xef:             irr clear, isr clear, vmcs 0x0
  kvm_exit:             reason EPT_VIOLATION rip 0x76d6 info 184 0
  kvm_page_fault:       address f76d6 error_code 184
  kvm_entry:            vcpu 0
  kvm_0xef:             irr clear, isr clear, vmcs 0x0
  kvm_exit:             reason EXCEPTION_NMI rip 0xd331 info 0 80000b0d
  kvm_userspace_exit:   reason KVM_EXIT_INTERNAL_ERROR (17)

Ok, nothing obvious here either ... I've desperately added all
information I know about.  Please run it again, thanks.

(The patch has to be applied instead of the previous one.)
---
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Andrey Korolyov March 31, 2015, 5:40 p.m. UTC | #1
On Tue, Mar 31, 2015 at 7:45 PM, Radim Kr?má? <rkrcmar@redhat.com> wrote:
> 2015-03-31 17:56+0300, Andrey Korolyov:
>> > Chasing the culprit this way could take a long time, so a new tracepoint
>> > that shows if 0xef is set on entry would let us guess the bug faster ...
>> >
>> > Please provide a failing trace with the following patch:
>>
>> Thanks, please see below:
>>
>> http://xdel.ru/downloads/kvm-e5v2-issue/new-tracepoint-fail-with-apicv.dat.gz
>
>  qemu-system-x86-4022  [006]  255.915978:
>   kvm_entry:            vcpu 0
>   kvm_emulate_insn:     f0000:d275: ea 7a d2 00 f0
>   kvm_emulate_insn:     f0000:d27a: 2e 0f 01 1e f0 6c
>   kvm_emulate_insn:     f0000:d280: 31 c0
>   kvm_emulate_insn:     f0000:d282: 8e e0
>   kvm_emulate_insn:     f0000:d284: 8e e8
>   kvm_emulate_insn:     f0000:d286: 8e c0
>   kvm_emulate_insn:     f0000:d288: 8e d8
>   kvm_emulate_insn:     f0000:d28a: 8e d0
>   kvm_entry:            vcpu 0
>   kvm_0xef:             irr clear, isr clear, vmcs 0x0
>   kvm_exit:             reason EPT_VIOLATION rip 0x8dd0 info 184 0
>   kvm_page_fault:       address f8dd0 error_code 184
>   kvm_entry:            vcpu 0
>   kvm_0xef:             irr clear, isr clear, vmcs 0x0
>   kvm_exit:             reason EPT_VIOLATION rip 0x76d6 info 184 0
>   kvm_page_fault:       address f76d6 error_code 184
>   kvm_entry:            vcpu 0
>   kvm_0xef:             irr clear, isr clear, vmcs 0x0
>   kvm_exit:             reason EXCEPTION_NMI rip 0xd331 info 0 80000b0d
>   kvm_userspace_exit:   reason KVM_EXIT_INTERNAL_ERROR (17)
>
> Ok, nothing obvious here either ... I've desperately added all
> information I know about.  Please run it again, thanks.
>
> (The patch has to be applied instead of the previous one.)
> ---
> diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
> index 7c7bc8bef21f..f986636ad9d0 100644
> --- a/arch/x86/kvm/trace.h
> +++ b/arch/x86/kvm/trace.h
> @@ -742,6 +742,41 @@ TRACE_EVENT(kvm_emulate_insn,
>  #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0)
>  #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1)
>
> +TRACE_EVENT(kvm_0xef,
> +       TP_PROTO(bool irr, bool isr, u32 info, bool on, bool pir, u16 status),
> +       TP_ARGS(irr, isr, info, on, pir, status),
> +
> +       TP_STRUCT__entry(
> +               __field(bool,  irr )
> +               __field(bool,  isr )
> +               __field(u32,   info)
> +               __field(bool,  on  )
> +               __field(bool,  pir )
> +               __field(u8,    rvi )
> +               __field(u8,    svi )
> +               ),
> +
> +       TP_fast_assign(
> +               __entry->irr  = irr;
> +               __entry->isr  = isr;
> +               __entry->info = info;
> +               __entry->on   = on;
> +               __entry->pir  = pir;
> +               __entry->rvi  = status & 0xff;
> +               __entry->svi  = status >> 8;
> +               ),
> +
> +       TP_printk("irr %s, isr %s, info 0x%x, on %s, pir %s, rvi 0x%x, svi 0x%x",
> +                 __entry->irr ? "set  " : "clear",
> +                 __entry->isr ? "set  " : "clear",
> +                 __entry->info,
> +                 __entry->on  ? "set  " : "clear",
> +                 __entry->pir ? "set  " : "clear",
> +                 __entry->rvi,
> +                 __entry->svi
> +                )
> +       );
> +
>  TRACE_EVENT(
>         vcpu_match_mmio,
>         TP_PROTO(gva_t gva, gpa_t gpa, bool write, bool gpa_match),
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index eee63dc33d89..b461edc93d53 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -5047,6 +5047,25 @@ static int handle_machine_check(struct kvm_vcpu *vcpu)
>         return 1;
>  }
>
> +#define VEC_POS(v) ((v) & (32 - 1))
> +#define REG_POS(v) (((v) >> 5) << 4)
> +static inline int apic_test_vector(int vec, void *bitmap)
> +{
> +       return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> +}
> +
> +static inline void random_trace(struct kvm_vcpu *vcpu)
> +{
> +       struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +       trace_kvm_0xef(apic_test_vector(0xef, vcpu->arch.apic->regs + APIC_IRR),
> +                      apic_test_vector(0xef, vcpu->arch.apic->regs + APIC_ISR),
> +                      vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
> +                      test_bit(POSTED_INTR_ON, (unsigned long *)&vmx->pi_desc.control),
> +                      test_bit(0xef, (unsigned long *)vmx->pi_desc.pir),
> +                      vmcs_read16(GUEST_INTR_STATUS));
> +}
> +
>  static int handle_exception(struct kvm_vcpu *vcpu)
>  {
>         struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -5077,6 +5096,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
>                 return 1;
>         }
>
> +       random_trace(vcpu);
> +
>         error_code = 0;
>         if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
>                 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
> @@ -8143,6 +8164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>         if (vmx->emulation_required)
>                 return;
>
> +       random_trace(vcpu);
> +
>         if (vmx->ple_window_dirty) {
>                 vmx->ple_window_dirty = false;
>                 vmcs_write32(PLE_WINDOW, vmx->ple_window);
> @@ -8312,6 +8335,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>         vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
>         trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
>
> +       random_trace(vcpu);
> +
>         /*
>          * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
>          * we did not inject a still-pending event to L1 now because of
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 32bf19ef3115..a45fa01bd354 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7881,3 +7881,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
> +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_0xef);

http://xdel.ru/downloads/kvm-e5v2-issue/another-tracepoint-fail-with-apicv.dat.gz

Something a bit more interesting, but the mess is happening just
*after* NMI firing.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bandan Das March 31, 2015, 6:01 p.m. UTC | #2
Andrey Korolyov <andrey@xdel.ru> writes:
...
> http://xdel.ru/downloads/kvm-e5v2-issue/another-tracepoint-fail-with-apicv.dat.gz
>
> Something a bit more interesting, but the mess is happening just
> *after* NMI firing.

What happens if NMI is turned off on the host ?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bandan Das March 31, 2015, 6:04 p.m. UTC | #3
Bandan Das <bsd@redhat.com> writes:

> Andrey Korolyov <andrey@xdel.ru> writes:
> ...
>> http://xdel.ru/downloads/kvm-e5v2-issue/another-tracepoint-fail-with-apicv.dat.gz
>>
>> Something a bit more interesting, but the mess is happening just
>> *after* NMI firing.
>
> What happens if NMI is turned off on the host ?

Sorry, I meant the watchdog..
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrey Korolyov March 31, 2015, 6:23 p.m. UTC | #4
On Tue, Mar 31, 2015 at 9:04 PM, Bandan Das <bsd@redhat.com> wrote:
> Bandan Das <bsd@redhat.com> writes:
>
>> Andrey Korolyov <andrey@xdel.ru> writes:
>> ...
>>> http://xdel.ru/downloads/kvm-e5v2-issue/another-tracepoint-fail-with-apicv.dat.gz
>>>
>>> Something a bit more interesting, but the mess is happening just
>>> *after* NMI firing.
>>
>> What happens if NMI is turned off on the host ?
>
> Sorry, I meant the watchdog..


Thanks, everything goes well (as it probably should go there):
http://xdel.ru/downloads/kvm-e5v2-issue/apicv-enabled-nmi-disabled.dat.gz
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář April 1, 2015, 11:49 a.m. UTC | #5
2015-03-31 21:23+0300, Andrey Korolyov:
> On Tue, Mar 31, 2015 at 9:04 PM, Bandan Das <bsd@redhat.com> wrote:
> > Bandan Das <bsd@redhat.com> writes:
> >> Andrey Korolyov <andrey@xdel.ru> writes:
> >> ...
> >>> http://xdel.ru/downloads/kvm-e5v2-issue/another-tracepoint-fail-with-apicv.dat.gz
> >>>
> >>> Something a bit more interesting, but the mess is happening just
> >>> *after* NMI firing.
> >>
> >> What happens if NMI is turned off on the host ?
> >
> > Sorry, I meant the watchdog..
> 
> Thanks, everything goes well (as it probably should go there):
> http://xdel.ru/downloads/kvm-e5v2-issue/apicv-enabled-nmi-disabled.dat.gz

Nice revelation!

KVM doesn't expect host's NMIs to look like this so it doesn't pass them
to the host.  What was the watchdog that casually sent NMIs?
(It worked after "nmi_watchdog=0" on the host?)

(Guest's NMI should have a different result as well.  NMI_EXCEPTION is
 an expected exit reason for guest's hard exceptions, they are then
 differentiated by intr_info and nothing hinted that this was a NMI.)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini April 1, 2015, 12:05 p.m. UTC | #6
On 01/04/2015 13:49, Radim Kr?má? wrote:
> 2015-03-31 21:23+0300, Andrey Korolyov:
>> On Tue, Mar 31, 2015 at 9:04 PM, Bandan Das <bsd@redhat.com> wrote:
>>> Bandan Das <bsd@redhat.com> writes:
>>>> Andrey Korolyov <andrey@xdel.ru> writes:
>>>> ...
>>>>> http://xdel.ru/downloads/kvm-e5v2-issue/another-tracepoint-fail-with-apicv.dat.gz
>>>>>
>>>>> Something a bit more interesting, but the mess is happening just
>>>>> *after* NMI firing.
>>>>
>>>> What happens if NMI is turned off on the host ?
>>>
>>> Sorry, I meant the watchdog..
>>
>> Thanks, everything goes well (as it probably should go there):
>> http://xdel.ru/downloads/kvm-e5v2-issue/apicv-enabled-nmi-disabled.dat.gz
> 
> Nice revelation!

Yes, pretty random but good to know.  Can you try again with the
nmi/nmi_handler tracepoint also?

Paolo

> KVM doesn't expect host's NMIs to look like this so it doesn't pass them
> to the host.  What was the watchdog that casually sent NMIs?
> (It worked after "nmi_watchdog=0" on the host?)
> 
> (Guest's NMI should have a different result as well.  NMI_EXCEPTION is
>  an expected exit reason for guest's hard exceptions, they are then
>  differentiated by intr_info and nothing hinted that this was a NMI.)
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrey Korolyov April 1, 2015, 12:26 p.m. UTC | #7
On Wed, Apr 1, 2015 at 2:49 PM, Radim Kr?má? <rkrcmar@redhat.com> wrote:
> 2015-03-31 21:23+0300, Andrey Korolyov:
>> On Tue, Mar 31, 2015 at 9:04 PM, Bandan Das <bsd@redhat.com> wrote:
>> > Bandan Das <bsd@redhat.com> writes:
>> >> Andrey Korolyov <andrey@xdel.ru> writes:
>> >> ...
>> >>> http://xdel.ru/downloads/kvm-e5v2-issue/another-tracepoint-fail-with-apicv.dat.gz
>> >>>
>> >>> Something a bit more interesting, but the mess is happening just
>> >>> *after* NMI firing.
>> >>
>> >> What happens if NMI is turned off on the host ?
>> >
>> > Sorry, I meant the watchdog..
>>
>> Thanks, everything goes well (as it probably should go there):
>> http://xdel.ru/downloads/kvm-e5v2-issue/apicv-enabled-nmi-disabled.dat.gz
>
> Nice revelation!
>
> KVM doesn't expect host's NMIs to look like this so it doesn't pass them
> to the host.  What was the watchdog that casually sent NMIs?
> (It worked after "nmi_watchdog=0" on the host?)
>
> (Guest's NMI should have a different result as well.  NMI_EXCEPTION is
>  an expected exit reason for guest's hard exceptions, they are then
>  differentiated by intr_info and nothing hinted that this was a NMI.)

Yes, I disabled host watchdog during runtime. Indeed guest-induced NMI
would look different and they had no reasons to be fired at this stage
inside guest. I`d suspect a hypervisor hardware misbehavior there but
have a very little idea on how APICv behavior (which is completely
microcode-dependent and CPU-dependent but decoupled from peripheral
hardware) may vary at this point, I am using 1.20140913.1 ucode
version from debian if this can matter. Will send trace suggested by
Paolo in a next couple of hours. Also it would be awesome to ask
hardware folks from Intel who can prove or disprove my abovementioned
statement (as I was unable to catch the problem on 2603v2 so far, this
hypothesis has some chance to be real).
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini April 1, 2015, 1:19 p.m. UTC | #8
On 01/04/2015 14:26, Andrey Korolyov wrote:
> Yes, I disabled host watchdog during runtime. Indeed guest-induced NMI
> would look different and they had no reasons to be fired at this stage
> inside guest. I`d suspect a hypervisor hardware misbehavior there but
> have a very little idea on how APICv behavior (which is completely
> microcode-dependent and CPU-dependent but decoupled from peripheral
> hardware) may vary at this point, I am using 1.20140913.1 ucode
> version from debian if this can matter. Will send trace suggested by
> Paolo in a next couple of hours. Also it would be awesome to ask
> hardware folks from Intel who can prove or disprove my abovementioned
> statement (as I was unable to catch the problem on 2603v2 so far, this
> hypothesis has some chance to be real).

Yes, the interaction with the NMI watchdog is unexpected and makes a
processor erratum somewhat more likely.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrey Korolyov April 1, 2015, 3:37 p.m. UTC | #9
On Wed, Apr 1, 2015 at 4:19 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>
> On 01/04/2015 14:26, Andrey Korolyov wrote:
>> Yes, I disabled host watchdog during runtime. Indeed guest-induced NMI
>> would look different and they had no reasons to be fired at this stage
>> inside guest. I`d suspect a hypervisor hardware misbehavior there but
>> have a very little idea on how APICv behavior (which is completely
>> microcode-dependent and CPU-dependent but decoupled from peripheral
>> hardware) may vary at this point, I am using 1.20140913.1 ucode
>> version from debian if this can matter. Will send trace suggested by
>> Paolo in a next couple of hours. Also it would be awesome to ask
>> hardware folks from Intel who can prove or disprove my abovementioned
>> statement (as I was unable to catch the problem on 2603v2 so far, this
>> hypothesis has some chance to be real).
>
> Yes, the interaction with the NMI watchdog is unexpected and makes a
> processor erratum somewhat more likely.
>
> Paolo


http://xdel.ru/downloads/kvm-e5v2-issue/trace-nmi-apicv-fail-at-reboot.dat.gz

err, no NMI entries nearby failure event, though capture should be correct:
/sys/kernel/debug/tracing/events/kvm*/filter
/sys/kernel/debug/tracing/events/*/kvm*/filter
/sys/kernel/debug/tracing/events/nmi*/filter
/sys/kernel/debug/tracing/events/*/nmi*/filter
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrey Korolyov April 1, 2015, 4:29 p.m. UTC | #10
On Wed, Apr 1, 2015 at 6:37 PM, Andrey Korolyov <andrey@xdel.ru> wrote:
> On Wed, Apr 1, 2015 at 4:19 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>>
>> On 01/04/2015 14:26, Andrey Korolyov wrote:
>>> Yes, I disabled host watchdog during runtime. Indeed guest-induced NMI
>>> would look different and they had no reasons to be fired at this stage
>>> inside guest. I`d suspect a hypervisor hardware misbehavior there but
>>> have a very little idea on how APICv behavior (which is completely
>>> microcode-dependent and CPU-dependent but decoupled from peripheral
>>> hardware) may vary at this point, I am using 1.20140913.1 ucode
>>> version from debian if this can matter. Will send trace suggested by
>>> Paolo in a next couple of hours. Also it would be awesome to ask
>>> hardware folks from Intel who can prove or disprove my abovementioned
>>> statement (as I was unable to catch the problem on 2603v2 so far, this
>>> hypothesis has some chance to be real).
>>
>> Yes, the interaction with the NMI watchdog is unexpected and makes a
>> processor erratum somewhat more likely.
>>
>> Paolo
>
>
> http://xdel.ru/downloads/kvm-e5v2-issue/trace-nmi-apicv-fail-at-reboot.dat.gz
>
> err, no NMI entries nearby failure event, though capture should be correct:
> /sys/kernel/debug/tracing/events/kvm*/filter
> /sys/kernel/debug/tracing/events/*/kvm*/filter
> /sys/kernel/debug/tracing/events/nmi*/filter
> /sys/kernel/debug/tracing/events/*/nmi*/filter

Moved 2603v2s back and issue is still here. I used wrong pattern for
the issue on a previous series of tests on those CPUs in the middle of
month, continuously respawning VMs when the real issue is hiding in
*first* reboot events starting from the hypervisor reboot (or module
load). So either it should be reproducible anywhere or this is not a
hardware issue (or it is related to the mainboard instead of CPU
itself :) ).
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrey Korolyov April 1, 2015, 10:58 p.m. UTC | #11
*putting my tinfoil hat on*

After thinking a little bit more, the observable behavior is a quite
good match for a bios-level hypervisor (hardware trojan in a modern
terminology), as it likely is sensitive to timing[1], does not appear
more than once per VM during boot cycle and seemingly does not regard
a fact if kvm-intel was reloaded once or twice (or more) and not
reproducible outside of domain of a single board model. If nobody has
a better suggestions to try on, I`ll do a couple of steps in a next
days:
- extract and compare bios to the vendor`s image with SPI programmer,
- extract and compare BMC image with public version (should be easy as well),
- try to analyze switch timings by writing sample code for a bare
hardware (there can be a hint that the L2 Linux guest can expose
larger execution time difference with L1 on host with top-level
hypervisor than on supposedly 'non-infected' one),
- try to analyze binary BIOS code itself, though it can be VERY
problematic, I am even not talking for same possibility for BMC.

Sorry for posting such a naive and stupid stuff in the public ml, but
I am really out of clues of what`s happening there and why it is not
reproducible anywhere else.

1. https://xakep.ru/2011/12/26/58104/ (russian text, but can be read
through g-translate without lack of details)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrey Korolyov April 5, 2015, 2:12 p.m. UTC | #12
A small update:

the behavior is caused by setting unrestricted_guest feature to N, I
had this feature disabled everywhere from approx. three years ago when
its enablement was one of suspects of the host crashes with
contemporary then KVM module. Also nVMX is likely to not work at all
and produce the same traces as in https://lkml.org/lkml/2014/7/17/12
without unrestricted_guest=1. I think this fact actually explaining
all real mode weirdness we`ve seen before and this should be probably
ended either by putting appropriate bits in a README or module
information or making strict dependency between
apicv/unrestricted_guest+nested/unrestricted_guest or fixing the issue
at its root if this is possible or appropriate solution. Thanks
everyone for keeping up with ideas through this thread!
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 7c7bc8bef21f..f986636ad9d0 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -742,6 +742,41 @@  TRACE_EVENT(kvm_emulate_insn,
 #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0)
 #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1)
 
+TRACE_EVENT(kvm_0xef,
+	TP_PROTO(bool irr, bool isr, u32 info, bool on, bool pir, u16 status),
+	TP_ARGS(irr, isr, info, on, pir, status),
+
+	TP_STRUCT__entry(
+		__field(bool,  irr )
+		__field(bool,  isr )
+		__field(u32,   info)
+		__field(bool,  on  )
+		__field(bool,  pir )
+		__field(u8,    rvi )
+		__field(u8,    svi )
+		),
+
+	TP_fast_assign(
+		__entry->irr  = irr;
+		__entry->isr  = isr;
+		__entry->info = info;
+		__entry->on   = on;
+		__entry->pir  = pir;
+		__entry->rvi  = status & 0xff;
+		__entry->svi  = status >> 8;
+		),
+
+	TP_printk("irr %s, isr %s, info 0x%x, on %s, pir %s, rvi 0x%x, svi 0x%x",
+	          __entry->irr ? "set  " : "clear",
+	          __entry->isr ? "set  " : "clear",
+	          __entry->info,
+	          __entry->on  ? "set  " : "clear",
+	          __entry->pir ? "set  " : "clear",
+	          __entry->rvi,
+	          __entry->svi
+	         )
+	);
+
 TRACE_EVENT(
 	vcpu_match_mmio,
 	TP_PROTO(gva_t gva, gpa_t gpa, bool write, bool gpa_match),
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index eee63dc33d89..b461edc93d53 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5047,6 +5047,25 @@  static int handle_machine_check(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+#define VEC_POS(v) ((v) & (32 - 1))
+#define REG_POS(v) (((v) >> 5) << 4)
+static inline int apic_test_vector(int vec, void *bitmap)
+{
+	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline void random_trace(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	trace_kvm_0xef(apic_test_vector(0xef, vcpu->arch.apic->regs + APIC_IRR),
+	               apic_test_vector(0xef, vcpu->arch.apic->regs + APIC_ISR),
+	               vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
+	               test_bit(POSTED_INTR_ON, (unsigned long *)&vmx->pi_desc.control),
+	               test_bit(0xef, (unsigned long *)vmx->pi_desc.pir),
+	               vmcs_read16(GUEST_INTR_STATUS));
+}
+
 static int handle_exception(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5077,6 +5096,8 @@  static int handle_exception(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
+	random_trace(vcpu);
+
 	error_code = 0;
 	if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
 		error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
@@ -8143,6 +8164,8 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (vmx->emulation_required)
 		return;
 
+	random_trace(vcpu);
+
 	if (vmx->ple_window_dirty) {
 		vmx->ple_window_dirty = false;
 		vmcs_write32(PLE_WINDOW, vmx->ple_window);
@@ -8312,6 +8335,8 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
 	trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
 
+	random_trace(vcpu);
+
 	/*
 	 * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
 	 * we did not inject a still-pending event to L1 now because of
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 32bf19ef3115..a45fa01bd354 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7881,3 +7881,4 @@  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_0xef);