Message ID | 1375366117-9014-11-git-send-email-gleb@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 08/01/2013 10:08 PM, Gleb Natapov wrote: > +/* Emulate the INVEPT instruction */ > +static int handle_invept(struct kvm_vcpu *vcpu) > +{ > + u32 vmx_instruction_info; > + bool ok; > + unsigned long type; > + gva_t gva; > + struct x86_exception e; > + struct { > + u64 eptp, gpa; > + } operand; > + > + if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || > + !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { > + kvm_queue_exception(vcpu, UD_VECTOR); > + return 1; > + } > + > + if (!nested_vmx_check_permission(vcpu)) > + return 1; > + > + if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) { > + kvm_queue_exception(vcpu, UD_VECTOR); > + return 1; > + } > + > + /* According to the Intel VMX instruction reference, the memory > + * operand is read even if it isn't needed (e.g., for type==global) > + */ > + vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); > + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), > + vmx_instruction_info, &gva)) > + return 1; > + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, > + sizeof(operand), &e)) { > + kvm_inject_page_fault(vcpu, &e); > + return 1; > + } > + > + type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); > + > + switch (type) { > + case VMX_EPT_EXTENT_GLOBAL: > + case VMX_EPT_EXTENT_CONTEXT: > + ok = !!(nested_vmx_ept_caps & > + (1UL << (type + VMX_EPT_EXTENT_SHIFT))); > + break; > + default: > + ok = false; > + } > + > + if (ok) { > + kvm_mmu_sync_roots(vcpu); > + kvm_mmu_flush_tlb(vcpu); > + nested_vmx_succeed(vcpu); > + } > + else > + nested_vmx_failValid(vcpu, > + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); > + > + skip_emulated_instruction(vcpu); > + return 1; Can this code be changed to: switch (type) { case VMX_EPT_EXTENT_GLOBAL: case VMX_EPT_EXTENT_CONTEXT: if (nested_vmx_ept_caps & (1UL << (type + VMX_EPT_EXTENT_SHIFT) { kvm_mmu_sync_roots(vcpu); kvm_mmu_flush_tlb(vcpu); nested_vmx_succeed(vcpu); break; } default: nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); } ? That's clearer i think. Also, we can sync the shadow page table only if the current eptp is the required eptp, that means: if (type == GLOBAL || operand.eptp == nested_ept_get_cr3(vcpu)) { kvm_mmu_sync_roots(vcpu); ...... } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 02, 2013 at 04:06:00PM +0800, Xiao Guangrong wrote: > On 08/01/2013 10:08 PM, Gleb Natapov wrote: > > > +/* Emulate the INVEPT instruction */ > > +static int handle_invept(struct kvm_vcpu *vcpu) > > +{ > > + u32 vmx_instruction_info; > > + bool ok; > > + unsigned long type; > > + gva_t gva; > > + struct x86_exception e; > > + struct { > > + u64 eptp, gpa; > > + } operand; > > + > > + if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || > > + !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { > > + kvm_queue_exception(vcpu, UD_VECTOR); > > + return 1; > > + } > > + > > + if (!nested_vmx_check_permission(vcpu)) > > + return 1; > > + > > + if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) { > > + kvm_queue_exception(vcpu, UD_VECTOR); > > + return 1; > > + } > > + > > + /* According to the Intel VMX instruction reference, the memory > > + * operand is read even if it isn't needed (e.g., for type==global) > > + */ > > + vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); > > + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), > > + vmx_instruction_info, &gva)) > > + return 1; > > + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, > > + sizeof(operand), &e)) { > > + kvm_inject_page_fault(vcpu, &e); > > + return 1; > > + } > > + > > + type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); > > + > > + switch (type) { > > + case VMX_EPT_EXTENT_GLOBAL: > > + case VMX_EPT_EXTENT_CONTEXT: > > + ok = !!(nested_vmx_ept_caps & > > + (1UL << (type + VMX_EPT_EXTENT_SHIFT))); > > + break; > > + default: > > + ok = false; > > + } > > + > > + if (ok) { > > + kvm_mmu_sync_roots(vcpu); > > + kvm_mmu_flush_tlb(vcpu); > > + nested_vmx_succeed(vcpu); > > + } > > + else > > + nested_vmx_failValid(vcpu, > > + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); > > + > > + skip_emulated_instruction(vcpu); > > + return 1; > > Can this code be changed to: > > switch (type) { > case VMX_EPT_EXTENT_GLOBAL: > case VMX_EPT_EXTENT_CONTEXT: > if (nested_vmx_ept_caps & > (1UL << (type + VMX_EPT_EXTENT_SHIFT) { > kvm_mmu_sync_roots(vcpu); > kvm_mmu_flush_tlb(vcpu); > nested_vmx_succeed(vcpu); > break; > } > default: > nested_vmx_failValid(vcpu, > VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); > } > ? > That's clearer i think. > > Also, we can sync the shadow page table only if the current eptp is the required > eptp, that means: > > if (type == GLOBAL || operand.eptp == nested_ept_get_cr3(vcpu)) { > kvm_mmu_sync_roots(vcpu); > ...... > } Good point. Will do. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index f3e01a2..966502d 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -387,6 +387,7 @@ enum vmcs_field { #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 #define VMX_EPT_EXTENT_CONTEXT 1 #define VMX_EPT_EXTENT_GLOBAL 2 +#define VMX_EPT_EXTENT_SHIFT 24 #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) @@ -394,6 +395,7 @@ enum vmcs_field { #define VMX_EPTP_WB_BIT (1ull << 14) #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) +#define VMX_EPT_INVEPT_BIT (1ull << 20) #define VMX_EPT_AD_BIT (1ull << 21) #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index d651082..7a34e8f 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -65,6 +65,7 @@ #define EXIT_REASON_EOI_INDUCED 45 #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_INVEPT 50 #define EXIT_REASON_PREEMPTION_TIMER 52 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a512ecf..a1f8f7b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3182,6 +3182,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) mmu_sync_roots(vcpu); spin_unlock(&vcpu->kvm->mmu_lock); } +EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, struct x86_exception *exception) @@ -3451,6 +3452,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) ++vcpu->stat.tlb_flush; kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } +EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); static void paging_new_cr3(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4e98764..a4d9385 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2156,6 +2156,7 @@ static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; static u32 nested_vmx_misc_low, nested_vmx_misc_high; +static u32 nested_vmx_ept_caps; static __init void nested_vmx_setup_ctls_msrs(void) { /* @@ -6270,6 +6271,70 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) return 1; } +/* Emulate the INVEPT instruction */ +static int handle_invept(struct kvm_vcpu *vcpu) +{ + u32 vmx_instruction_info; + bool ok; + unsigned long type; + gva_t gva; + struct x86_exception e; + struct { + u64 eptp, gpa; + } operand; + + if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || + !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + if (!nested_vmx_check_permission(vcpu)) + return 1; + + if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + /* According to the Intel VMX instruction reference, the memory + * operand is read even if it isn't needed (e.g., for type==global) + */ + vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + vmx_instruction_info, &gva)) + return 1; + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, + sizeof(operand), &e)) { + kvm_inject_page_fault(vcpu, &e); + return 1; + } + + type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); + + switch (type) { + case VMX_EPT_EXTENT_GLOBAL: + case VMX_EPT_EXTENT_CONTEXT: + ok = !!(nested_vmx_ept_caps & + (1UL << (type + VMX_EPT_EXTENT_SHIFT))); + break; + default: + ok = false; + } + + if (ok) { + kvm_mmu_sync_roots(vcpu); + kvm_mmu_flush_tlb(vcpu); + nested_vmx_succeed(vcpu); + } + else + nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + + skip_emulated_instruction(vcpu); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -6314,6 +6379,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, + [EXIT_REASON_INVEPT] = handle_invept, }; static const int kvm_vmx_max_exit_handlers = @@ -6540,6 +6606,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: + case EXIT_REASON_INVEPT: /* * VMX instructions trap unconditionally. This allows L1 to * emulate them for its L2 guest, i.e., allows 3-level nesting!