Message ID | 918aaa770de5d98cf81cce8b6cdb6faad32cbeb7.1614590788.git.kai.huang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM SGX virtualization support | expand |
On Mon, Mar 01, 2021, Kai Huang wrote: > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 50810d471462..df8e338267aa 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -1570,12 +1570,18 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) > > static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) > { > + if (to_vmx(vcpu)->exit_reason.enclave_mode) { > + kvm_queue_exception(vcpu, UD_VECTOR); Rereading my own code, I think it would be a good idea to add a comment here explaining that injecting #UD is technically wrong, but avoids giving guest userspace an easy way to DoS the guest. The EPT misconfig is a good example; guest userspace could have executed a simple MOV <reg>, <mem> instruction, in which case injecting a #UD is bizarre behavior. But, the alternative is exiting to userspace with KVM_INTERNAL_ERROR_EMULATION, which is all but guaranteed to kill the guest. If KVM, specifically handle_emulation_failure(), ever gains a more sophisticated mechanism for handling userspace emulation errors, this should be updated too. /* * Emulation of instructions in SGX enclaves is impossible as RIP does * not point tthe failing instruction, and even if it did, the code * stream is inaccessible. Inject #UD instead of exiting to userspace * so that guest userspace can't DoS the guest simply by triggering * emulation (enclaves are CPL3 only). */ > + return false; > + } > return true; > } ... > @@ -5384,6 +5415,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) > { > gpa_t gpa; > > + if (!vmx_can_emulate_instruction(vcpu, NULL, 0)) > + return 1; > + > /* > * A nested guest cannot optimize MMIO vmexits, because we have an > * nGPA here instead of the required GPA. > -- > 2.29.2 >
On Mon, 1 Mar 2021 08:52:13 -0800 Sean Christopherson wrote: > On Mon, Mar 01, 2021, Kai Huang wrote: > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > > index 50810d471462..df8e338267aa 100644 > > --- a/arch/x86/kvm/vmx/vmx.c > > +++ b/arch/x86/kvm/vmx/vmx.c > > @@ -1570,12 +1570,18 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) > > > > static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) > > { > > + if (to_vmx(vcpu)->exit_reason.enclave_mode) { > > + kvm_queue_exception(vcpu, UD_VECTOR); > > Rereading my own code, I think it would be a good idea to add a comment here > explaining that injecting #UD is technically wrong, but avoids giving guest > userspace an easy way to DoS the guest. The EPT misconfig is a good example; > guest userspace could have executed a simple MOV <reg>, <mem> instruction, in > which case injecting a #UD is bizarre behavior. But, the alternative is exiting > to userspace with KVM_INTERNAL_ERROR_EMULATION, which is all but guaranteed to > kill the guest. > > If KVM, specifically handle_emulation_failure(), ever gains a more sophisticated > mechanism for handling userspace emulation errors, this should be updated too. > > /* > * Emulation of instructions in SGX enclaves is impossible as RIP does > * not point tthe failing instruction, and even if it did, the code > * stream is inaccessible. Inject #UD instead of exiting to userspace > * so that guest userspace can't DoS the guest simply by triggering > * emulation (enclaves are CPL3 only). > */ Agreed. Will add above comment. > > > + return false; > > + } > > return true; > > } > > ... > > > @@ -5384,6 +5415,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) > > { > > gpa_t gpa; > > > > + if (!vmx_can_emulate_instruction(vcpu, NULL, 0)) > > + return 1; > > + > > /* > > * A nested guest cannot optimize MMIO vmexits, because we have an > > * nGPA here instead of the required GPA. > > -- > > 2.29.2 > >
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 358707f60d99..0ffaa3156a4e 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -373,6 +373,7 @@ enum vmcs_field { #define GUEST_INTR_STATE_MOV_SS 0x00000002 #define GUEST_INTR_STATE_SMI 0x00000004 #define GUEST_INTR_STATE_NMI 0x00000008 +#define GUEST_INTR_STATE_ENCLAVE_INTR 0x00000010 /* GUEST_ACTIVITY_STATE flags */ #define GUEST_ACTIVITY_ACTIVE 0 diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index b8e650a985e3..946d761adbd3 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -27,6 +27,7 @@ #define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 +#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE 0x08000000 #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index bcca0b80e0d0..28848e9f70e2 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4105,6 +4105,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, { /* update exit information fields: */ vmcs12->vm_exit_reason = vm_exit_reason; + if (to_vmx(vcpu)->exit_reason.enclave_mode) + vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE; vmcs12->exit_qualification = exit_qualification; vmcs12->vm_exit_intr_info = exit_intr_info; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 50810d471462..df8e338267aa 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1570,12 +1570,18 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) { + if (to_vmx(vcpu)->exit_reason.enclave_mode) { + kvm_queue_exception(vcpu, UD_VECTOR); + return false; + } return true; } static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { + union vmx_exit_reason exit_reason = to_vmx(vcpu)->exit_reason; unsigned long rip, orig_rip; + u32 instr_len; /* * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on @@ -1586,9 +1592,33 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) * i.e. we end up advancing IP with some random value. */ if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || - to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) { + exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) { + instr_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + + /* + * Emulating an enclave's instructions isn't supported as KVM + * cannot access the enclave's memory or its true RIP, e.g. the + * vmcs.GUEST_RIP points at the exit point of the enclave, not + * the RIP that actually triggered the VM-Exit. But, because + * most instructions that cause VM-Exit will #UD in an enclave, + * most instruction-based VM-Exits simply do not occur. + * + * There are a few exceptions, notably the debug instructions + * INT1ICEBRK and INT3, as they are allowed in debug enclaves + * and generate #DB/#BP as expected, which KVM might intercept. + * But again, the CPU does the dirty work and saves an instr + * length of zero so VMMs don't shoot themselves in the foot. + * WARN if KVM tries to skip a non-zero length instruction on + * a VM-Exit from an enclave. + */ + if (!instr_len) + goto rip_updated; + + WARN(exit_reason.enclave_mode, + "KVM: skipping instruction after SGX enclave VM-Exit"); + orig_rip = kvm_rip_read(vcpu); - rip = orig_rip + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + rip = orig_rip + instr_len; #ifdef CONFIG_X86_64 /* * We need to mask out the high 32 bits of RIP if not in 64-bit @@ -1604,6 +1634,7 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) return 0; } +rip_updated: /* skipping an emulated instruction also counts */ vmx_set_interrupt_shadow(vcpu, 0); @@ -5384,6 +5415,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { gpa_t gpa; + if (!vmx_can_emulate_instruction(vcpu, NULL, 0)) + return 1; + /* * A nested guest cannot optimize MMIO vmexits, because we have an * nGPA here instead of the required GPA.