Message ID | 20180613181849.98192-1-marcorr@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 13/06/2018 20:18, Marc Orr wrote: > This patch is motivated by a syzkaller bug, where a bad VM-entry > interruption information field is generated in the VMCS02, which causes > the nested VM launch to fail. Then, KVM fails to resume L1. > > While KVM should be improved to correctly resume L1 execution after a > failed nested launch, this change is justified because the existing code > to resume L1 is flaky/ad-hoc and the test coverage for resuming L1 is > sparse. Please provide a testcase for tools/testing/selftests. Paolo
On 06/13/2018 11:18 AM, Marc Orr wrote: > This patch extends the checks done prior to a nested VM entry. > Specifically, it extends the check_vmentry_prereqs function with checks > for fields relevant to the VM-entry event injection information, as > described in the Intel SDM, volume 3. > > This patch is motivated by a syzkaller bug, where a bad VM-entry > interruption information field is generated in the VMCS02, which causes > the nested VM launch to fail. Then, KVM fails to resume L1. > > While KVM should be improved to correctly resume L1 execution after a > failed nested launch, this change is justified because the existing code > to resume L1 is flaky/ad-hoc and the test coverage for resuming L1 is > sparse. > > Signed-off-by: Marc Orr <marcorr@google.com> > --- > arch/x86/include/asm/vmx.h | 3 ++ > arch/x86/kvm/vmx.c | 67 ++++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/x86.h | 10 ++++++ > 3 files changed, 80 insertions(+) > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index 425e6b8b9547..6aa8499e1f62 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -114,6 +114,7 @@ > #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f > #define VMX_MISC_SAVE_EFER_LMA 0x00000020 > #define VMX_MISC_ACTIVITY_HLT 0x00000040 > +#define VMX_MISC_ZERO_LEN_INS 0x40000000 > > /* VMFUNC functions */ > #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 > @@ -351,11 +352,13 @@ enum vmcs_field { > #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK > > #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ > +#define INTR_TYPE_RESERVED (1 << 8) /* reserved */ > #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ > #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ > #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ > #define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ > #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ > +#define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */ > > /* GUEST_INTERRUPTIBILITY_INFO flags. */ > #define GUEST_INTR_STATE_STI 0x00000001 > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 48989f78be60..8b2c3909ae69 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -1705,6 +1705,17 @@ static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu) > MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; > } > > +static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu) > +{ > + return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS; > +} > + > +static inline bool nested_cpu_has_monitor_trap_flag(struct kvm_vcpu *vcpu) > +{ > + return to_vmx(vcpu)->nested.msrs.procbased_ctls_low & > + CPU_BASED_MONITOR_TRAP_FLAG; > +} > + > static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) > { > return vmcs12->cpu_based_vm_exec_control & bit; > @@ -11612,6 +11623,62 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > !nested_cr3_valid(vcpu, vmcs12->host_cr3)) > return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; > > + /* > + * From the Intel SDM, volume 3: > + * Fields relevant to VM-entry event injection must be set properly. > + * These fields are the VM-entry interruption-information field, the > + * VM-entry exception error code, and the VM-entry instruction length. > + */ > + if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) { > + u32 intr_info = vmcs12->vm_entry_intr_info_field; > + u8 nr = intr_info & INTR_INFO_VECTOR_MASK; Nit: I would rename 'nr' to 'vector' for better readability of the code. > + u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK; > + bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK; > + bool should_have_error_code; > + bool urg = nested_cpu_has2(vmcs12, > + SECONDARY_EXEC_UNRESTRICTED_GUEST); > + bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE; > + > + /* VM-entry interruption-info field: interruption type */ > + if (intr_type == INTR_TYPE_RESERVED || > + (intr_type == INTR_TYPE_OTHER_EVENT && > + !nested_cpu_has_monitor_trap_flag(vcpu))) > + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; > + > + /* VM-entry interruption-info field: vector */ > + if ((intr_type == INTR_TYPE_NMI_INTR && nr != NMI_VECTOR) || > + (intr_type == INTR_TYPE_HARD_EXCEPTION && nr > 31) || > + (intr_type == INTR_TYPE_OTHER_EVENT && nr != 0)) > + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; > + > + /* VM-entry interruption-info field: deliver error code */ > + should_have_error_code = > + intr_type == INTR_TYPE_HARD_EXCEPTION && > + x86_exception_has_error_code(nr, prot_mode); It's better to leave 'prot_mode' outside of the function as follows: should_have_error_code = intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode && x86_exception_has_error_code(nr); > + if (has_error_code != should_have_error_code) > + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; > + > + /* VM-entry interruption-info field: reserved bits */ > + if (intr_info & INTR_INFO_RESVD_BITS_MASK) > + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; > + > + /* VM-entry exception error code */ > + if (has_error_code && > + vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)) > + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; I would move this check above where 'delivery error code' is because this check is related to that. > + > + /* VM-entry instruction length */ > + switch (intr_type) { > + case INTR_TYPE_SOFT_EXCEPTION: > + case INTR_TYPE_SOFT_INTR: > + case INTR_TYPE_PRIV_SW_EXCEPTION: > + if ((vmcs12->vm_entry_instruction_len > 15) || > + (vmcs12->vm_entry_instruction_len == 0 && > + !nested_cpu_has_zero_length_injection(vcpu))) > + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; > + } > + } > + > return 0; > } > > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h > index 331993c49dae..7a23a37c3bb1 100644 > --- a/arch/x86/kvm/x86.h > +++ b/arch/x86/kvm/x86.h > @@ -110,6 +110,16 @@ static inline bool is_la57_mode(struct kvm_vcpu *vcpu) > #endif > } > > +static inline bool x86_exception_has_error_code(unsigned int nr, > + bool protected_mode) > +{ > + static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) | > + BIT(NP_VECTOR) | BIT(SS_VECTOR) | BIT(GP_VECTOR) | > + BIT(PF_VECTOR) | BIT(AC_VECTOR); > + > + return protected_mode && ((1U << nr) & exception_has_error_code); Should it be (nr & exception_has_error_code) instead of ((1U << nr) & exception_has_error_code) ? > +} > + > static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) > { > return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
> > + u32 intr_info = vmcs12->vm_entry_intr_info_field; > > + u8 nr = intr_info & INTR_INFO_VECTOR_MASK; > Nit: I would rename 'nr' to 'vector' for better readability of the code. SGTM, will do. I used nr to be consistent with other parts of the code. That said, I agree that vector is more explicit and clear. > > + /* VM-entry interruption-info field: deliver error code */ > > + should_have_error_code = > > + intr_type == INTR_TYPE_HARD_EXCEPTION && > > + x86_exception_has_error_code(nr, prot_mode); > It's better to leave 'prot_mode' outside of the function as follows: > > should_have_error_code = > intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode && > x86_exception_has_error_code(nr); I think we should leave the helper as is because an exception can only have an error code in protected mode. > > + /* VM-entry exception error code */ > > + if (has_error_code && > > + vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)) > > + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; > I would move this check above where 'delivery error code' is because > this check is related to that. Sure, will do. > > +static inline bool x86_exception_has_error_code(unsigned int nr, > > + bool protected_mode) > > +{ > > + static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) | > > + BIT(NP_VECTOR) | BIT(SS_VECTOR) | BIT(GP_VECTOR) | > > + BIT(PF_VECTOR) | BIT(AC_VECTOR); > > + > > + return protected_mode && ((1U << nr) & exception_has_error_code); > Should it be (nr & exception_has_error_code) instead of ((1U << nr) & > exception_has_error_code) ? No, nr is a vector's absolute number, as defined in arch/x86/include/uapi/asm/kvm.h. For example, '#define GP_VECTOR 13'.
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 425e6b8b9547..6aa8499e1f62 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -114,6 +114,7 @@ #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 +#define VMX_MISC_ZERO_LEN_INS 0x40000000 /* VMFUNC functions */ #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 @@ -351,11 +352,13 @@ enum vmcs_field { #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ +#define INTR_TYPE_RESERVED (1 << 8) /* reserved */ #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ #define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ +#define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */ /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define GUEST_INTR_STATE_STI 0x00000001 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 48989f78be60..8b2c3909ae69 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1705,6 +1705,17 @@ static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu) MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; } +static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu) +{ + return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS; +} + +static inline bool nested_cpu_has_monitor_trap_flag(struct kvm_vcpu *vcpu) +{ + return to_vmx(vcpu)->nested.msrs.procbased_ctls_low & + CPU_BASED_MONITOR_TRAP_FLAG; +} + static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) { return vmcs12->cpu_based_vm_exec_control & bit; @@ -11612,6 +11623,62 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) !nested_cr3_valid(vcpu, vmcs12->host_cr3)) return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; + /* + * From the Intel SDM, volume 3: + * Fields relevant to VM-entry event injection must be set properly. + * These fields are the VM-entry interruption-information field, the + * VM-entry exception error code, and the VM-entry instruction length. + */ + if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) { + u32 intr_info = vmcs12->vm_entry_intr_info_field; + u8 nr = intr_info & INTR_INFO_VECTOR_MASK; + u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK; + bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK; + bool should_have_error_code; + bool urg = nested_cpu_has2(vmcs12, + SECONDARY_EXEC_UNRESTRICTED_GUEST); + bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE; + + /* VM-entry interruption-info field: interruption type */ + if (intr_type == INTR_TYPE_RESERVED || + (intr_type == INTR_TYPE_OTHER_EVENT && + !nested_cpu_has_monitor_trap_flag(vcpu))) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry interruption-info field: vector */ + if ((intr_type == INTR_TYPE_NMI_INTR && nr != NMI_VECTOR) || + (intr_type == INTR_TYPE_HARD_EXCEPTION && nr > 31) || + (intr_type == INTR_TYPE_OTHER_EVENT && nr != 0)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry interruption-info field: deliver error code */ + should_have_error_code = + intr_type == INTR_TYPE_HARD_EXCEPTION && + x86_exception_has_error_code(nr, prot_mode); + if (has_error_code != should_have_error_code) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry interruption-info field: reserved bits */ + if (intr_info & INTR_INFO_RESVD_BITS_MASK) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry exception error code */ + if (has_error_code && + vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry instruction length */ + switch (intr_type) { + case INTR_TYPE_SOFT_EXCEPTION: + case INTR_TYPE_SOFT_INTR: + case INTR_TYPE_PRIV_SW_EXCEPTION: + if ((vmcs12->vm_entry_instruction_len > 15) || + (vmcs12->vm_entry_instruction_len == 0 && + !nested_cpu_has_zero_length_injection(vcpu))) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + } + } + return 0; } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 331993c49dae..7a23a37c3bb1 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -110,6 +110,16 @@ static inline bool is_la57_mode(struct kvm_vcpu *vcpu) #endif } +static inline bool x86_exception_has_error_code(unsigned int nr, + bool protected_mode) +{ + static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) | + BIT(NP_VECTOR) | BIT(SS_VECTOR) | BIT(GP_VECTOR) | + BIT(PF_VECTOR) | BIT(AC_VECTOR); + + return protected_mode && ((1U << nr) & exception_has_error_code); +} + static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) { return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
This patch extends the checks done prior to a nested VM entry. Specifically, it extends the check_vmentry_prereqs function with checks for fields relevant to the VM-entry event injection information, as described in the Intel SDM, volume 3. This patch is motivated by a syzkaller bug, where a bad VM-entry interruption information field is generated in the VMCS02, which causes the nested VM launch to fail. Then, KVM fails to resume L1. While KVM should be improved to correctly resume L1 execution after a failed nested launch, this change is justified because the existing code to resume L1 is flaky/ad-hoc and the test coverage for resuming L1 is sparse. Signed-off-by: Marc Orr <marcorr@google.com> --- arch/x86/include/asm/vmx.h | 3 ++ arch/x86/kvm/vmx.c | 67 ++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.h | 10 ++++++ 3 files changed, 80 insertions(+)