Message ID | 20191213231646.88015-1-jmattson@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | kvm: x86: Add logical CPU to KVM_EXIT_FAIL_ENTRY info | expand |
> On 14 Dec 2019, at 1:16, Jim Mattson <jmattson@google.com> wrote: > > More often than not, a failed VM-entry in a production environment is > the result of a defective CPU (at least, insofar as Intel x86 is > concerned). To aid in identifying the bad hardware, add the logical > CPU to the information provided to userspace on a KVM exit with reason > KVM_EXIT_FAIL_ENTRY. The presence of this additional information is > indicated by a new capability, KVM_CAP_FAILED_ENTRY_CPU. > > Signed-off-by: Jim Mattson <jmattson@google.com> > Reviewed-by: Peter Shier <pshier@google.com> > Reviewed-by: Oliver Upton <oupton@google.com> > --- > Documentation/virt/kvm/api.txt | 1 + > arch/x86/kvm/svm.c | 1 + > arch/x86/kvm/vmx/vmx.c | 2 ++ > arch/x86/kvm/x86.c | 1 + > include/uapi/linux/kvm.h | 2 ++ > 5 files changed, 7 insertions(+) > > diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt > index ebb37b34dcfc..6e5d92406b65 100644 > --- a/Documentation/virt/kvm/api.txt > +++ b/Documentation/virt/kvm/api.txt > @@ -4245,6 +4245,7 @@ hardware_exit_reason. > /* KVM_EXIT_FAIL_ENTRY */ > struct { > __u64 hardware_entry_failure_reason; > + __u32 cpu; /* if KVM_CAP_FAILED_ENTRY_CPU */ > } fail_entry; > > If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 122d4ce3b1ab..4d06b2413c63 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -4980,6 +4980,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) > kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; > kvm_run->fail_entry.hardware_entry_failure_reason > = svm->vmcb->control.exit_code; > + kvm_run->fail_entry.cpu = raw_smp_processor_id(); Why not just use vcpu->cpu? Same for vmx_handle_exit() to be consistent. > dump_vmcb(vcpu); > return 0; > } > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index e3394c839dea..4d540b1c08e0 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -5846,6 +5846,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) > vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; > vcpu->run->fail_entry.hardware_entry_failure_reason > = exit_reason; > + vcpu->run->fail_entry.cpu = vmx->loaded_vmcs->cpu; > return 0; > } > > @@ -5854,6 +5855,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) > vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; > vcpu->run->fail_entry.hardware_entry_failure_reason > = vmcs_read32(VM_INSTRUCTION_ERROR); > + vcpu->run->fail_entry.cpu = vmx->loaded_vmcs->cpu; > return 0; > } > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index cf917139de6b..9e89a32056d1 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -3273,6 +3273,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_GET_MSR_FEATURES: > case KVM_CAP_MSR_PLATFORM_INFO: > case KVM_CAP_EXCEPTION_PAYLOAD: > + case KVM_CAP_FAILED_ENTRY_CPU: > r = 1; > break; > case KVM_CAP_SYNC_REGS: > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index f0a16b4adbbd..09ba7174456d 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -277,6 +277,7 @@ struct kvm_run { > /* KVM_EXIT_FAIL_ENTRY */ > struct { > __u64 hardware_entry_failure_reason; > + __u32 cpu; > } fail_entry; > /* KVM_EXIT_EXCEPTION */ > struct { > @@ -1009,6 +1010,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176 > #define KVM_CAP_ARM_NISV_TO_USER 177 > #define KVM_CAP_ARM_INJECT_EXT_DABT 178 > +#define KVM_CAP_FAILED_ENTRY_CPU 179 > > #ifdef KVM_CAP_IRQ_ROUTING > > -- > 2.24.1.735.g03f4e72817-goog >
On Fri, Dec 13, 2019 at 3:26 PM Liran Alon <liran.alon@oracle.com> wrote: > > > > > On 14 Dec 2019, at 1:16, Jim Mattson <jmattson@google.com> wrote: > > > > More often than not, a failed VM-entry in a production environment is > > the result of a defective CPU (at least, insofar as Intel x86 is > > concerned). To aid in identifying the bad hardware, add the logical > > CPU to the information provided to userspace on a KVM exit with reason > > KVM_EXIT_FAIL_ENTRY. The presence of this additional information is > > indicated by a new capability, KVM_CAP_FAILED_ENTRY_CPU. > > > > Signed-off-by: Jim Mattson <jmattson@google.com> > > Reviewed-by: Peter Shier <pshier@google.com> > > Reviewed-by: Oliver Upton <oupton@google.com> > > --- > > Documentation/virt/kvm/api.txt | 1 + > > arch/x86/kvm/svm.c | 1 + > > arch/x86/kvm/vmx/vmx.c | 2 ++ > > arch/x86/kvm/x86.c | 1 + > > include/uapi/linux/kvm.h | 2 ++ > > 5 files changed, 7 insertions(+) > > > > diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt > > index ebb37b34dcfc..6e5d92406b65 100644 > > --- a/Documentation/virt/kvm/api.txt > > +++ b/Documentation/virt/kvm/api.txt > > @@ -4245,6 +4245,7 @@ hardware_exit_reason. > > /* KVM_EXIT_FAIL_ENTRY */ > > struct { > > __u64 hardware_entry_failure_reason; > > + __u32 cpu; /* if KVM_CAP_FAILED_ENTRY_CPU */ > > } fail_entry; > > > > If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > > index 122d4ce3b1ab..4d06b2413c63 100644 > > --- a/arch/x86/kvm/svm.c > > +++ b/arch/x86/kvm/svm.c > > @@ -4980,6 +4980,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) > > kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; > > kvm_run->fail_entry.hardware_entry_failure_reason > > = svm->vmcb->control.exit_code; > > + kvm_run->fail_entry.cpu = raw_smp_processor_id(); > > Why not just use vcpu->cpu? > Same for vmx_handle_exit() to be consistent. Ah. Perfect. Thanks. > > dump_vmcb(vcpu); > > return 0; > > } > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > > index e3394c839dea..4d540b1c08e0 100644 > > --- a/arch/x86/kvm/vmx/vmx.c > > +++ b/arch/x86/kvm/vmx/vmx.c > > @@ -5846,6 +5846,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) > > vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; > > vcpu->run->fail_entry.hardware_entry_failure_reason > > = exit_reason; > > + vcpu->run->fail_entry.cpu = vmx->loaded_vmcs->cpu; > > return 0; > > } > > > > @@ -5854,6 +5855,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) > > vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; > > vcpu->run->fail_entry.hardware_entry_failure_reason > > = vmcs_read32(VM_INSTRUCTION_ERROR); > > + vcpu->run->fail_entry.cpu = vmx->loaded_vmcs->cpu; > > return 0; > > } > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index cf917139de6b..9e89a32056d1 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -3273,6 +3273,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > > case KVM_CAP_GET_MSR_FEATURES: > > case KVM_CAP_MSR_PLATFORM_INFO: > > case KVM_CAP_EXCEPTION_PAYLOAD: > > + case KVM_CAP_FAILED_ENTRY_CPU: > > r = 1; > > break; > > case KVM_CAP_SYNC_REGS: > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > > index f0a16b4adbbd..09ba7174456d 100644 > > --- a/include/uapi/linux/kvm.h > > +++ b/include/uapi/linux/kvm.h > > @@ -277,6 +277,7 @@ struct kvm_run { > > /* KVM_EXIT_FAIL_ENTRY */ > > struct { > > __u64 hardware_entry_failure_reason; > > + __u32 cpu; > > } fail_entry; > > /* KVM_EXIT_EXCEPTION */ > > struct { > > @@ -1009,6 +1010,7 @@ struct kvm_ppc_resize_hpt { > > #define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176 > > #define KVM_CAP_ARM_NISV_TO_USER 177 > > #define KVM_CAP_ARM_INJECT_EXT_DABT 178 > > +#define KVM_CAP_FAILED_ENTRY_CPU 179 > > > > #ifdef KVM_CAP_IRQ_ROUTING > > > > -- > > 2.24.1.735.g03f4e72817-goog > > >
On 14/12/19 00:16, Jim Mattson wrote: > More often than not, a failed VM-entry in a production environment is > the result of a defective CPU (at least, insofar as Intel x86 is > concerned). It's conforting that someone else got to the same conclusion as we did... Paolo > To aid in identifying the bad hardware, add the logical > CPU to the information provided to userspace on a KVM exit with reason > KVM_EXIT_FAIL_ENTRY. The presence of this additional information is > indicated by a new capability, KVM_CAP_FAILED_ENTRY_CPU. > > Signed-off-by: Jim Mattson <jmattson@google.com> > Reviewed-by: Peter Shier <pshier@google.com> > Reviewed-by: Oliver Upton <oupton@google.com>
diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt index ebb37b34dcfc..6e5d92406b65 100644 --- a/Documentation/virt/kvm/api.txt +++ b/Documentation/virt/kvm/api.txt @@ -4245,6 +4245,7 @@ hardware_exit_reason. /* KVM_EXIT_FAIL_ENTRY */ struct { __u64 hardware_entry_failure_reason; + __u32 cpu; /* if KVM_CAP_FAILED_ENTRY_CPU */ } fail_entry; If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 122d4ce3b1ab..4d06b2413c63 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4980,6 +4980,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason = svm->vmcb->control.exit_code; + kvm_run->fail_entry.cpu = raw_smp_processor_id(); dump_vmcb(vcpu); return 0; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e3394c839dea..4d540b1c08e0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5846,6 +5846,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; vcpu->run->fail_entry.hardware_entry_failure_reason = exit_reason; + vcpu->run->fail_entry.cpu = vmx->loaded_vmcs->cpu; return 0; } @@ -5854,6 +5855,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; vcpu->run->fail_entry.hardware_entry_failure_reason = vmcs_read32(VM_INSTRUCTION_ERROR); + vcpu->run->fail_entry.cpu = vmx->loaded_vmcs->cpu; return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cf917139de6b..9e89a32056d1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3273,6 +3273,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_GET_MSR_FEATURES: case KVM_CAP_MSR_PLATFORM_INFO: case KVM_CAP_EXCEPTION_PAYLOAD: + case KVM_CAP_FAILED_ENTRY_CPU: r = 1; break; case KVM_CAP_SYNC_REGS: diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f0a16b4adbbd..09ba7174456d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -277,6 +277,7 @@ struct kvm_run { /* KVM_EXIT_FAIL_ENTRY */ struct { __u64 hardware_entry_failure_reason; + __u32 cpu; } fail_entry; /* KVM_EXIT_EXCEPTION */ struct { @@ -1009,6 +1010,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176 #define KVM_CAP_ARM_NISV_TO_USER 177 #define KVM_CAP_ARM_INJECT_EXT_DABT 178 +#define KVM_CAP_FAILED_ENTRY_CPU 179 #ifdef KVM_CAP_IRQ_ROUTING