[RFC,v5,09/20] kvm: extend to accomodate the VM introspection subsystem

Message ID	20181220182850.4579-10-alazar@bitdefender.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: =?utf-8?q?Adalbert_Laz=C4=83r?= <alazar@bitdefender.com> To: kvm@vger.kernel.org Cc: Paolo Bonzini <pbonzini@redhat.com>, =?utf-8?b?UmFkaW0gS3LEjW3DocWZ?= <rkrcmar@redhat.com>, =?utf-8?q?Mihai_Don?= =?utf-8?q?=C8=9Bu?= <mdontu@bitdefender.com>, =?utf-8?q?Adalbert_Laz=C4=83r?= <alazar@bitdefender.com>, =?utf-8?b?Tmlj?= =?utf-8?b?dciZb3IgQ8OuyJt1?= <ncitu@bitdefender.com>, Marian Rotariu <marian.c.rotariu@gmail.com> Subject: [RFC PATCH v5 09/20] kvm: extend to accomodate the VM introspection subsystem Date: Thu, 20 Dec 2018 20:28:39 +0200 Message-Id: <20181220182850.4579-10-alazar@bitdefender.com> In-Reply-To: <20181220182850.4579-1-alazar@bitdefender.com> References: <20181220182850.4579-1-alazar@bitdefender.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: kvm-owner@vger.kernel.org Precedence: bulk
Series	VM introspection \| expand [RFC,v5,00/20] VM introspection [RFC,v5,01/20] kvm: document the VM introspection API [RFC,v5,02/20] kvm: document the VM introspection ioctl-s and capability [RFC,v5,03/20] kvm: document the VM introspection hypercalls [RFC,v5,04/20] kvm: add the VM introspection API/ABI headers [RFC,v5,05/20] kvm: x86: do not unconditionally patch the hypercall instruction during emulation [RFC,v5,06/20] mm: add support for remote mapping [RFC,v5,07/20] add memory map/unmap support for VM introspection on the guest side [RFC,v5,08/20] kvm: page track: add support for preread, prewrite and preexec [RFC,v5,09/20] kvm: extend to accomodate the VM introspection subsystem [RFC,v5,10/20] kvm: x86: add tracepoints for interrupt and exception injections [RFC,v5,12/20] kvm: x86: hook in the VM introspection subsystem [RFC,v5,13/20] kvm: x86: hook in kvmi_msr_event() [RFC,v5,14/20] kvm: x86: hook in kvmi_breakpoint_event() [RFC,v5,15/20] kvm: x86: intercept accesses to IDTR, GDTR, LDTR and TR [RFC,v5,16/20] kvm: x86: hook the single-step events [RFC,v5,17/20] kvm: x86: hook in kvmi_cr_event() [RFC,v5,18/20] kvm: x86: hook in kvmi_xsetbv_event() [RFC,v5,19/20] kvm: x86: handle the introspection hypercalls [RFC,v5,20/20] kvm: x86: hook in kvmi_trap_event()

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 93c4bf598fb0..4ed7b702e9ce 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -444,6 +444,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); #define EMULATION_OK 0 #define EMULATION_RESTART 1 #define EMULATION_INTERCEPTED 2 +#define EMULATION_USER_EXIT 3 void init_decode_cache(struct x86_emulate_ctxt *ctxt); int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 796fc5f1d76c..db4248513900 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -757,6 +757,9 @@ struct kvm_vcpu_arch { /* set at EPT violation at this point */ unsigned long exit_qualification; + /* #PF translated error code from EPT/NPT exit reason */ + u64 error_code; + /* pv related host specific info */ struct { bool pv_unhalted; @@ -1186,6 +1189,13 @@ struct kvm_x86_ops { int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); + + void (*msr_intercept)(struct kvm_vcpu *vcpu, unsigned int msr, + bool enable); + u64 (*fault_gla)(struct kvm_vcpu *vcpu); + void (*set_mtf)(struct kvm_vcpu *vcpu, bool enable); + bool (*nested_pagefault)(struct kvm_vcpu *vcpu); + bool (*spt_fault)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { @@ -1283,6 +1293,7 @@ extern u64 kvm_max_tsc_scaling_ratio; extern u64 kvm_default_tsc_scaling_ratio; extern u64 kvm_mce_cap_supported; +extern bool kvm_eptp_switching_supported; enum emulation_result { EMULATE_DONE, /* no further processing */ @@ -1581,4 +1592,11 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define put_smstate(type, buf, offset, val) \ *(type *)((buf) + (offset) - 0x7e00) = val +void kvm_arch_msr_intercept(struct kvm_vcpu *vcpu, unsigned int msr, + bool enable); +u64 kvm_mmu_fault_gla(struct kvm_vcpu *vcpu); +bool kvm_mmu_nested_pagefault(struct kvm_vcpu *vcpu); +bool kvm_spt_fault(struct kvm_vcpu *vcpu); +void kvm_set_mtf(struct kvm_vcpu *vcpu, bool enable); +void kvm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask); #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index ade0f153947d..9b18c1fa5068 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -526,6 +526,7 @@ struct vmx_msr_entry { #define EPT_VIOLATION_READABLE_BIT 3 #define EPT_VIOLATION_WRITABLE_BIT 4 #define EPT_VIOLATION_EXECUTABLE_BIT 5 +#define EPT_VIOLATION_GLA_VALID_BIT 7 #define EPT_VIOLATION_GVA_TRANSLATED_BIT 8 #define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT) #define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT) @@ -533,6 +534,7 @@ struct vmx_msr_entry { #define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT) #define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT) #define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT) +#define EPT_VIOLATION_GLA_VALID (1 << EPT_VIOLATION_GLA_VALID_BIT) #define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT) /* diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 78e430f4e15c..74baab83c526 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5366,7 +5366,12 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->memopp->addr.mem.ea + ctxt->_eip); done: - return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; + if (rc == X86EMUL_RETRY_INSTR) + return EMULATION_USER_EXIT; + else if (rc == X86EMUL_CONTINUE) + return EMULATION_OK; + else + return EMULATION_FAILED; } bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt) @@ -5736,6 +5741,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) if (rc == X86EMUL_INTERCEPTED) return EMULATION_INTERCEPTED; + if (rc == X86EMUL_RETRY_INSTR) + return EMULATION_USER_EXIT; if (rc == X86EMUL_CONTINUE) writeback_registers(ctxt); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ff3252621fcf..ed4b3c593bf5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -6119,3 +6119,13 @@ void kvm_mmu_module_exit(void) unregister_shrinker(&mmu_shrinker); mmu_audit_disable(); } + +u64 kvm_mmu_fault_gla(struct kvm_vcpu *vcpu) +{ + return kvm_x86_ops->fault_gla(vcpu); +} + +bool kvm_mmu_nested_pagefault(struct kvm_vcpu *vcpu) +{ + return kvm_x86_ops->nested_pagefault(vcpu); +} diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index cc6467b35a85..b5714b8a2f51 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1054,7 +1054,8 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) return !!test_bit(bit_write, &tmp); } -static void set_msr_interception(u32 *msrpm, unsigned msr, +static void set_msr_interception(struct vcpu_svm *svm, + u32 *msrpm, unsigned msr, int read, int write) { u8 bit_read, bit_write; @@ -1090,7 +1091,7 @@ static void svm_vcpu_init_msrpm(u32 *msrpm) if (!direct_access_msrs[i].always) continue; - set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); + set_msr_interception(NULL, msrpm, direct_access_msrs[i].index, 1, 1); } } @@ -1142,10 +1143,10 @@ static void svm_enable_lbrv(struct vcpu_svm *svm) u32 *msrpm = svm->msrpm; svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; - set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); - set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); - set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); - set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1); + set_msr_interception(svm, msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); + set_msr_interception(svm, msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); + set_msr_interception(svm, msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); + set_msr_interception(svm, msrpm, MSR_IA32_LASTINTTOIP, 1, 1); } static void svm_disable_lbrv(struct vcpu_svm *svm) @@ -1153,10 +1154,10 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) u32 *msrpm = svm->msrpm; svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; - set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); - set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); - set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); - set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); + set_msr_interception(svm, msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); + set_msr_interception(svm, msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); + set_msr_interception(svm, msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); + set_msr_interception(svm, msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } static void disable_nmi_singlestep(struct vcpu_svm *svm) @@ -2660,6 +2661,8 @@ static int pf_interception(struct vcpu_svm *svm) u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2); u64 error_code = svm->vmcb->control.exit_info_1; + svm->vcpu.arch.error_code = error_code; + return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address, static_cpu_has(X86_FEATURE_DECODEASSISTS) ? svm->vmcb->control.insn_bytes : NULL, @@ -4262,7 +4265,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) * We update the L1 MSR bit as well since it will end up * touching the MSR anyway now. */ - set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); + set_msr_interception(svm, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && @@ -4278,7 +4281,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); if (is_guest_mode(vcpu)) break; - set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); + set_msr_interception(svm, svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); break; case MSR_AMD64_VIRT_SPEC_CTRL: if (!msr->host_initiated && @@ -7058,6 +7061,41 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, return -ENODEV; } +static void svm_msr_intercept(struct kvm_vcpu *vcpu, unsigned int msr, + bool enable) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u32 *msrpm = svm->msrpm; + + /* + * The below code enable or disable the msr interception for both + * read and write. The best way will be to get here the current + * bit status for read and send that value as argument. + */ + set_msr_interception(svm, msrpm, msr, enable, enable); +} + +static u64 svm_fault_gla(struct kvm_vcpu *vcpu) +{ + return ~0ull; +} + +static bool svm_nested_pagefault(struct kvm_vcpu *vcpu) +{ + return false; +} + +static bool svm_spt_fault(struct kvm_vcpu *vcpu) +{ + const struct vcpu_svm *svm = to_svm(vcpu); + + return (svm->vmcb->control.exit_code == SVM_EXIT_NPF); +} + +static void svm_set_mtf(struct kvm_vcpu *vcpu, bool enable) +{ +} + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -7189,6 +7227,12 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .mem_enc_unreg_region = svm_unregister_enc_region, .nested_enable_evmcs = nested_enable_evmcs, + + .set_mtf = svm_set_mtf, + .msr_intercept = svm_msr_intercept, + .fault_gla = svm_fault_gla, + .nested_pagefault = svm_nested_pagefault, + .spt_fault = svm_spt_fault, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8d5d984541be..34e4cda9b1ba 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1308,7 +1308,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, + unsigned long *msr_bitmap, u32 msr, int type); static DEFINE_PER_CPU(struct vmcs *, vmxarea); @@ -4255,7 +4256,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) * in the merging. We update the vmcs01 here for L1 as well * since it will end up touching the MSR anyway now. */ - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, + vmx_disable_intercept_for_msr(vcpu, vmx->vmcs01.msr_bitmap, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW); break; @@ -4283,7 +4284,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) * vmcs02.msr_bitmap here since it gets completely overwritten * in the merging. */ - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, + vmx_disable_intercept_for_msr(vcpu, vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W); break; case MSR_IA32_ARCH_CAPABILITIES: @@ -5953,7 +5954,8 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, + unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); @@ -6029,13 +6031,14 @@ static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitm } } -static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, + unsigned long *msr_bitmap, u32 msr, int type, bool value) { if (value) vmx_enable_intercept_for_msr(msr_bitmap, msr, type); else - vmx_disable_intercept_for_msr(msr_bitmap, msr, type); + vmx_disable_intercept_for_msr(vcpu, msr_bitmap, msr, type); } /* @@ -6096,7 +6099,8 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) -static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, +static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, + unsigned long *msr_bitmap, u8 mode) { int msr; @@ -6112,11 +6116,11 @@ static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, * TPR reads and writes can be virtualized even if virtual interrupt * delivery is not in use. */ - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); + vmx_disable_intercept_for_msr(vcpu, msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); + vmx_disable_intercept_for_msr(vcpu, msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); + vmx_disable_intercept_for_msr(vcpu, msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); } } } @@ -6132,7 +6136,7 @@ static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) return; if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) - vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); + vmx_update_msr_bitmap_x2apic(vcpu, msr_bitmap, mode); vmx->msr_bitmap_mode = mode; } @@ -7713,10 +7717,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) EPT_VIOLATION_EXECUTABLE)) ? PFERR_PRESENT_MASK : 0; - error_code |= (exit_qualification & 0x100) != 0 ? - PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; + error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) + ? PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; vcpu->arch.exit_qualification = exit_qualification; + vcpu->arch.error_code = error_code; return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); } @@ -8026,6 +8031,8 @@ static __init int hardware_setup(void) if (enable_shadow_vmcs) init_vmcs_shadow_fields(); + kvm_eptp_switching_supported = cpu_has_vmx_vmfunc(); + kvm_set_posted_intr_wakeup_handler(wakeup_handler); nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv); @@ -11557,12 +11564,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_msrs; msr_bitmap = vmx->vmcs01.msr_bitmap; - vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(NULL, msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(NULL, msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(NULL, msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(NULL, msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(NULL, msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(NULL, msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); vmx->msr_bitmap_mode = 0; vmx->loaded_vmcs = &vmx->vmcs01; @@ -14994,6 +15001,46 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, return 0; } +static void vmx_set_mtf(struct kvm_vcpu *vcpu, bool enable) +{ + if (enable) + vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_MONITOR_TRAP_FLAG); + else + vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_MONITOR_TRAP_FLAG); +} + +static void vmx_msr_intercept(struct kvm_vcpu *vcpu, unsigned int msr, + bool enable) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; + + vmx_set_intercept_for_msr(vcpu, msr_bitmap, msr, MSR_TYPE_W, enable); +} + +static u64 vmx_fault_gla(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.exit_qualification & EPT_VIOLATION_GLA_VALID) + return vmcs_readl(GUEST_LINEAR_ADDRESS); + return ~0ull; +} + +static bool vmx_nested_pagefault(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) + return false; + return true; +} + +static bool vmx_spt_fault(struct kvm_vcpu *vcpu) +{ + const struct vcpu_vmx *vmx = to_vmx(vcpu); + + return (vmx->exit_reason == EXIT_REASON_EPT_VIOLATION); +} + static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -15141,6 +15188,12 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .enable_smi_window = enable_smi_window, .nested_enable_evmcs = nested_enable_evmcs, + + .set_mtf = vmx_set_mtf, + .msr_intercept = vmx_msr_intercept, + .fault_gla = vmx_fault_gla, + .nested_pagefault = vmx_nested_pagefault, + .spt_fault = vmx_spt_fault, }; static void vmx_cleanup_l1d_flush(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3f332b947c1b..94dbc270bd84 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -150,6 +150,9 @@ EXPORT_SYMBOL_GPL(enable_vmware_backdoor); static bool __read_mostly force_emulation_prefix = false; module_param(force_emulation_prefix, bool, S_IRUGO); +bool __read_mostly kvm_eptp_switching_supported; +EXPORT_SYMBOL_GPL(kvm_eptp_switching_supported); + #define KVM_NR_SHARED_MSRS 16 struct kvm_shared_msrs_global { @@ -3714,8 +3717,8 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) } } -static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, - struct kvm_xsave *guest_xsave) +void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, + struct kvm_xsave *guest_xsave) { if (boot_cpu_has(X86_FEATURE_XSAVE)) { memset(guest_xsave, 0, sizeof(struct kvm_xsave)); @@ -6349,7 +6352,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, trace_kvm_emulate_insn_start(vcpu); ++vcpu->stat.insn_emulation; - if (r != EMULATION_OK) { + if (r == EMULATION_USER_EXIT) + return EMULATE_DONE; + if (r != EMULATION_OK) { if (emulation_type & EMULTYPE_TRAP_UD) return EMULATE_FAIL; if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, @@ -6390,6 +6395,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, r = x86_emulate_insn(ctxt); + if (r == EMULATION_USER_EXIT) + return EMULATE_DONE; if (r == EMULATION_INTERCEPTED) return EMULATE_DONE; @@ -8169,6 +8176,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } +void kvm_arch_vcpu_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + __get_regs(vcpu, regs); +} + static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { vcpu->arch.emulate_regs_need_sync_from_vcpu = true; @@ -8209,6 +8221,39 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } +/* + * Similar to __set_regs() but it does not reset the exceptions + */ +void kvm_arch_vcpu_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu->arch.emulate_regs_need_sync_from_vcpu = true; + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + + kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); + kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); + kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); + kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx); + kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi); + kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi); + kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp); + kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp); +#ifdef CONFIG_X86_64 + kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8); + kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9); + kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10); + kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11); + kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12); + kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); + kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); + kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); +#endif + + kvm_rip_write(vcpu, regs->rip); + kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED); + + kvm_make_request(KVM_REQ_EVENT, vcpu); +} + void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) { struct kvm_segment cs; @@ -8264,6 +8309,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, return 0; } +void kvm_arch_vcpu_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + __get_sregs(vcpu, sregs); +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -8457,16 +8507,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return ret; } -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg) +int kvm_arch_vcpu_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { unsigned long rflags; - int i, r; - - vcpu_load(vcpu); + int i; + int ret = 0; if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { - r = -EBUSY; + ret = -EBUSY; if (vcpu->arch.exception.pending) goto out; if (dbg->control & KVM_GUESTDBG_INJECT_DB) @@ -8507,11 +8556,19 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, kvm_x86_ops->update_bp_intercept(vcpu); - r = 0; - out: + return ret; +} + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + int ret; + + vcpu_load(vcpu); + ret = kvm_arch_vcpu_set_guest_debug(vcpu, dbg); vcpu_put(vcpu); - return r; + return ret; } /* @@ -9762,6 +9819,36 @@ bool kvm_vector_hashing_enabled(void) } EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled); +void kvm_arch_msr_intercept(struct kvm_vcpu *vcpu, unsigned int msr, + bool enable) +{ + kvm_x86_ops->msr_intercept(vcpu, msr, enable); +} +EXPORT_SYMBOL_GPL(kvm_arch_msr_intercept); + +void kvm_arch_queue_bp(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, BP_VECTOR); +} + +void kvm_set_mtf(struct kvm_vcpu *vcpu, bool enable) +{ + kvm_x86_ops->set_mtf(vcpu, enable); +} +EXPORT_SYMBOL(kvm_set_mtf); + +void kvm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) +{ + kvm_x86_ops->set_interrupt_shadow(vcpu, mask); +} +EXPORT_SYMBOL(kvm_set_interrupt_shadow); + +bool kvm_spt_fault(struct kvm_vcpu *vcpu) +{ + return kvm_x86_ops->spt_fault(vcpu); +} +EXPORT_SYMBOL(kvm_spt_fault); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c926698040e0..1f3fa3c50133 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -775,9 +775,13 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr); int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); +void kvm_arch_vcpu_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); +void kvm_arch_vcpu_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +void kvm_arch_vcpu_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs); int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, @@ -786,7 +790,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state); int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg); +int kvm_arch_vcpu_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); +void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, + struct kvm_xsave *guest_xsave); int kvm_arch_init(void *opaque); void kvm_arch_exit(void); @@ -1299,4 +1307,6 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ +void kvm_arch_queue_bp(struct kvm_vcpu *vcpu); + #endif diff --git a/kernel/signal.c b/kernel/signal.c index 9a32bc2088c9..11a9766b6df1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1372,6 +1372,7 @@ int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid) */ } } +EXPORT_SYMBOL(kill_pid_info); static int kill_proc_info(int sig, struct kernel_siginfo *info, pid_t pid) {

[RFC,v5,09/20] kvm: extend to accomodate the VM introspection subsystem

Commit Message

Patch