From patchwork Mon Dec  3 21:53:08 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Sean Christopherson <sean.j.christopherson@intel.com>
X-Patchwork-Id: 10710731
Return-Path: <kvm-owner@kernel.org>
Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org
 [172.30.200.125])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E410616B1
	for <patchwork-kvm@patchwork.kernel.org>;
 Mon,  3 Dec 2018 21:53:49 +0000 (UTC)
Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1])
	by mail.wl.linuxfoundation.org (Postfix) with ESMTP id D041528CD5
	for <patchwork-kvm@patchwork.kernel.org>;
 Mon,  3 Dec 2018 21:53:49 +0000 (UTC)
Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486)
	id C428E28F92; Mon,  3 Dec 2018 21:53:49 +0000 (UTC)
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on
	pdx-wl-mail.web.codeaurora.org
X-Spam-Level: 
X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI,
	RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 0DDB428F56
	for <patchwork-kvm@patchwork.kernel.org>;
 Mon,  3 Dec 2018 21:53:48 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1726118AbeLCVxq (ORCPT
        <rfc822;patchwork-kvm@patchwork.kernel.org>);
        Mon, 3 Dec 2018 16:53:46 -0500
Received: from mga05.intel.com ([192.55.52.43]:29435 "EHLO mga05.intel.com"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1726002AbeLCVxY (ORCPT <rfc822;kvm@vger.kernel.org>);
        Mon, 3 Dec 2018 16:53:24 -0500
X-Amp-Result: SKIPPED(no attachment in message)
X-Amp-File-Uploaded: False
Received: from fmsmga008.fm.intel.com ([10.253.24.58])
  by fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;
 03 Dec 2018 13:53:23 -0800
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.56,311,1539673200";
   d="scan'208";a="106704406"
Received: from sjchrist-coffee.jf.intel.com ([10.54.74.154])
  by fmsmga008.fm.intel.com with ESMTP; 03 Dec 2018 13:53:23 -0800
From: Sean Christopherson <sean.j.christopherson@intel.com>
To: Paolo Bonzini <pbonzini@redhat.com>,
 =?utf-8?b?UmFkaW0gS3LEjW3DocWZ?= <rkrcmar@redhat.com>
Cc: kvm@vger.kernel.org
Subject: [PATCH 18/28] KVM: VMX: Add vmx.h to hold VMX definitions and inline
 functions
Date: Mon,  3 Dec 2018 13:53:08 -0800
Message-Id: <20181203215318.15545-19-sean.j.christopherson@intel.com>
X-Mailer: git-send-email 2.19.2
In-Reply-To: <20181203215318.15545-1-sean.j.christopherson@intel.com>
References: <20181203215318.15545-1-sean.j.christopherson@intel.com>
MIME-Version: 1.0
Sender: kvm-owner@vger.kernel.org
Precedence: bulk
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org
X-Virus-Scanned: ClamAV using ClamSMTP

As a general rule of thumb, functions are only moved to vmx.h if they
were already tagged inline.  Exceptions were made for obvious cases
where functions were already inlined by gcc.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 539 +---------------------------------------
 arch/x86/kvm/vmx/vmx.h | 550 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 554 insertions(+), 535 deletions(-)
 create mode 100644 arch/x86/kvm/vmx/vmx.h

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a4885bc40064..54fe6c1b4593 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -60,6 +60,7 @@
 #include "trace.h"
 #include "vmcs.h"
 #include "vmcs12.h"
+#include "vmx.h"
 #include "x86.h"
 
 MODULE_AUTHOR("Qumranet");
@@ -117,10 +118,6 @@ static u64 __read_mostly host_xss;
 bool __read_mostly enable_pml = 1;
 module_param_named(pml, enable_pml, bool, S_IRUGO);
 
-#define MSR_TYPE_R	1
-#define MSR_TYPE_W	2
-#define MSR_TYPE_RW	3
-
 #define MSR_BITMAP_MODE_X2APIC		1
 #define MSR_BITMAP_MODE_X2APIC_APICV	2
 
@@ -342,317 +339,6 @@ static const struct kernel_param_ops vmentry_l1d_flush_ops = {
 };
 module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
 
-enum ept_pointers_status {
-	EPT_POINTERS_CHECK = 0,
-	EPT_POINTERS_MATCH = 1,
-	EPT_POINTERS_MISMATCH = 2
-};
-
-struct kvm_vmx {
-	struct kvm kvm;
-
-	unsigned int tss_addr;
-	bool ept_identity_pagetable_done;
-	gpa_t ept_identity_map_addr;
-
-	enum ept_pointers_status ept_pointers_match;
-	spinlock_t ept_pointer_lock;
-};
-
-struct shared_msr_entry {
-	unsigned index;
-	u64 data;
-	u64 mask;
-};
-
-/*
- * The nested_vmx structure is part of vcpu_vmx, and holds information we need
- * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
- */
-struct nested_vmx {
-	/* Has the level1 guest done vmxon? */
-	bool vmxon;
-	gpa_t vmxon_ptr;
-	bool pml_full;
-
-	/* The guest-physical address of the current VMCS L1 keeps for L2 */
-	gpa_t current_vmptr;
-	/*
-	 * Cache of the guest's VMCS, existing outside of guest memory.
-	 * Loaded from guest memory during VMPTRLD. Flushed to guest
-	 * memory during VMCLEAR and VMPTRLD.
-	 */
-	struct vmcs12 *cached_vmcs12;
-	/*
-	 * Cache of the guest's shadow VMCS, existing outside of guest
-	 * memory. Loaded from guest memory during VM entry. Flushed
-	 * to guest memory during VM exit.
-	 */
-	struct vmcs12 *cached_shadow_vmcs12;
-	/*
-	 * Indicates if the shadow vmcs or enlightened vmcs must be updated
-	 * with the data held by struct vmcs12.
-	 */
-	bool need_vmcs12_sync;
-	bool dirty_vmcs12;
-
-	/*
-	 * vmcs02 has been initialized, i.e. state that is constant for
-	 * vmcs02 has been written to the backing VMCS.  Initialization
-	 * is delayed until L1 actually attempts to run a nested VM.
-	 */
-	bool vmcs02_initialized;
-
-	bool change_vmcs01_virtual_apic_mode;
-
-	/*
-	 * Enlightened VMCS has been enabled. It does not mean that L1 has to
-	 * use it. However, VMX features available to L1 will be limited based
-	 * on what the enlightened VMCS supports.
-	 */
-	bool enlightened_vmcs_enabled;
-
-	/* L2 must run next, and mustn't decide to exit to L1. */
-	bool nested_run_pending;
-
-	struct loaded_vmcs vmcs02;
-
-	/*
-	 * Guest pages referred to in the vmcs02 with host-physical
-	 * pointers, so we must keep them pinned while L2 runs.
-	 */
-	struct page *apic_access_page;
-	struct page *virtual_apic_page;
-	struct page *pi_desc_page;
-	struct pi_desc *pi_desc;
-	bool pi_pending;
-	u16 posted_intr_nv;
-
-	struct hrtimer preemption_timer;
-	bool preemption_timer_expired;
-
-	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
-	u64 vmcs01_debugctl;
-	u64 vmcs01_guest_bndcfgs;
-
-	u16 vpid02;
-	u16 last_vpid;
-
-	struct nested_vmx_msrs msrs;
-
-	/* SMM related state */
-	struct {
-		/* in VMX operation on SMM entry? */
-		bool vmxon;
-		/* in guest mode on SMM entry? */
-		bool guest_mode;
-	} smm;
-
-	gpa_t hv_evmcs_vmptr;
-	struct page *hv_evmcs_page;
-	struct hv_enlightened_vmcs *hv_evmcs;
-};
-
-#define POSTED_INTR_ON  0
-#define POSTED_INTR_SN  1
-
-/* Posted-Interrupt Descriptor */
-struct pi_desc {
-	u32 pir[8];     /* Posted interrupt requested */
-	union {
-		struct {
-				/* bit 256 - Outstanding Notification */
-			u16	on	: 1,
-				/* bit 257 - Suppress Notification */
-				sn	: 1,
-				/* bit 271:258 - Reserved */
-				rsvd_1	: 14;
-				/* bit 279:272 - Notification Vector */
-			u8	nv;
-				/* bit 287:280 - Reserved */
-			u8	rsvd_2;
-				/* bit 319:288 - Notification Destination */
-			u32	ndst;
-		};
-		u64 control;
-	};
-	u32 rsvd[6];
-} __aligned(64);
-
-static bool pi_test_and_set_on(struct pi_desc *pi_desc)
-{
-	return test_and_set_bit(POSTED_INTR_ON,
-			(unsigned long *)&pi_desc->control);
-}
-
-static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
-{
-	return test_and_clear_bit(POSTED_INTR_ON,
-			(unsigned long *)&pi_desc->control);
-}
-
-static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
-{
-	return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
-}
-
-static inline void pi_clear_sn(struct pi_desc *pi_desc)
-{
-	return clear_bit(POSTED_INTR_SN,
-			(unsigned long *)&pi_desc->control);
-}
-
-static inline void pi_set_sn(struct pi_desc *pi_desc)
-{
-	return set_bit(POSTED_INTR_SN,
-			(unsigned long *)&pi_desc->control);
-}
-
-static inline void pi_clear_on(struct pi_desc *pi_desc)
-{
-	clear_bit(POSTED_INTR_ON,
-  		  (unsigned long *)&pi_desc->control);
-}
-
-static inline int pi_test_on(struct pi_desc *pi_desc)
-{
-	return test_bit(POSTED_INTR_ON,
-			(unsigned long *)&pi_desc->control);
-}
-
-static inline int pi_test_sn(struct pi_desc *pi_desc)
-{
-	return test_bit(POSTED_INTR_SN,
-			(unsigned long *)&pi_desc->control);
-}
-
-#define NR_AUTOLOAD_MSRS 8
-
-struct vmx_msrs {
-	unsigned int		nr;
-	struct vmx_msr_entry	val[NR_AUTOLOAD_MSRS];
-};
-
-struct vcpu_vmx {
-	struct kvm_vcpu       vcpu;
-	unsigned long         host_rsp;
-	u8                    fail;
-	u8		      msr_bitmap_mode;
-	u32                   exit_intr_info;
-	u32                   idt_vectoring_info;
-	ulong                 rflags;
-	struct shared_msr_entry *guest_msrs;
-	int                   nmsrs;
-	int                   save_nmsrs;
-	bool                  guest_msrs_dirty;
-	unsigned long	      host_idt_base;
-#ifdef CONFIG_X86_64
-	u64 		      msr_host_kernel_gs_base;
-	u64 		      msr_guest_kernel_gs_base;
-#endif
-
-	u64 		      arch_capabilities;
-	u64 		      spec_ctrl;
-
-	u32 vm_entry_controls_shadow;
-	u32 vm_exit_controls_shadow;
-	u32 secondary_exec_control;
-
-	/*
-	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
-	 * non-nested (L1) guest, it always points to vmcs01. For a nested
-	 * guest (L2), it points to a different VMCS.  loaded_cpu_state points
-	 * to the VMCS whose state is loaded into the CPU registers that only
-	 * need to be switched when transitioning to/from the kernel; a NULL
-	 * value indicates that host state is loaded.
-	 */
-	struct loaded_vmcs    vmcs01;
-	struct loaded_vmcs   *loaded_vmcs;
-	struct loaded_vmcs   *loaded_cpu_state;
-	bool                  __launched; /* temporary, used in vmx_vcpu_run */
-	struct msr_autoload {
-		struct vmx_msrs guest;
-		struct vmx_msrs host;
-	} msr_autoload;
-
-	struct {
-		int vm86_active;
-		ulong save_rflags;
-		struct kvm_segment segs[8];
-	} rmode;
-	struct {
-		u32 bitmask; /* 4 bits per segment (1 bit per field) */
-		struct kvm_save_segment {
-			u16 selector;
-			unsigned long base;
-			u32 limit;
-			u32 ar;
-		} seg[8];
-	} segment_cache;
-	int vpid;
-	bool emulation_required;
-
-	u32 exit_reason;
-
-	/* Posted interrupt descriptor */
-	struct pi_desc pi_desc;
-
-	/* Support for a guest hypervisor (nested VMX) */
-	struct nested_vmx nested;
-
-	/* Dynamic PLE window. */
-	int ple_window;
-	bool ple_window_dirty;
-
-	bool req_immediate_exit;
-
-	/* Support for PML */
-#define PML_ENTITY_NUM		512
-	struct page *pml_pg;
-
-	/* apic deadline value in host tsc */
-	u64 hv_deadline_tsc;
-
-	u64 current_tsc_ratio;
-
-	u32 host_pkru;
-
-	unsigned long host_debugctlmsr;
-
-	/*
-	 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
-	 * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
-	 * in msr_ia32_feature_control_valid_bits.
-	 */
-	u64 msr_ia32_feature_control;
-	u64 msr_ia32_feature_control_valid_bits;
-	u64 ept_pointer;
-};
-
-enum segment_cache_field {
-	SEG_FIELD_SEL = 0,
-	SEG_FIELD_BASE = 1,
-	SEG_FIELD_LIMIT = 2,
-	SEG_FIELD_AR = 3,
-
-	SEG_FIELD_NR = 4
-};
-
-static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
-{
-	return container_of(kvm, struct kvm_vmx, kvm);
-}
-
-static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
-{
-	return container_of(vcpu, struct vcpu_vmx, vcpu);
-}
-
-static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
-{
-	return &(to_vmx(vcpu)->pi_desc);
-}
-
 static u16 shadow_read_only_fields[] = {
 #define SHADOW_FIELD_RO(x) x,
 #include "vmcs_shadow_fields.h"
@@ -679,7 +365,6 @@ static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu)
 
 static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
 static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
-static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
 static void vmx_get_segment(struct kvm_vcpu *vcpu,
@@ -1051,15 +736,6 @@ static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
 	return NULL;
 }
 
-static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
-{
-	vmcs_clear(loaded_vmcs->vmcs);
-	if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
-		vmcs_clear(loaded_vmcs->shadow_vmcs);
-	loaded_vmcs->cpu = -1;
-	loaded_vmcs->launched = 0;
-}
-
 #ifdef CONFIG_KEXEC_CORE
 /*
  * This bitmap is used to indicate whether the vmclear
@@ -1100,7 +776,7 @@ static inline void crash_enable_local_vmclear(int cpu) { }
 static inline void crash_disable_local_vmclear(int cpu) { }
 #endif /* CONFIG_KEXEC_CORE */
 
-static void __loaded_vmcs_clear(void *arg)
+void __loaded_vmcs_clear(void *arg)
 {
 	struct loaded_vmcs *loaded_vmcs = arg;
 	int cpu = raw_smp_processor_id();
@@ -1124,86 +800,6 @@ static void __loaded_vmcs_clear(void *arg)
 	crash_enable_local_vmclear(cpu);
 }
 
-static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
-{
-	int cpu = loaded_vmcs->cpu;
-
-	if (cpu != -1)
-		smp_call_function_single(cpu,
-			 __loaded_vmcs_clear, loaded_vmcs, 1);
-}
-
-static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
-{
-	vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
-}
-
-static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
-{
-	vmcs_write32(VM_ENTRY_CONTROLS, val);
-	vmx->vm_entry_controls_shadow = val;
-}
-
-static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
-{
-	if (vmx->vm_entry_controls_shadow != val)
-		vm_entry_controls_init(vmx, val);
-}
-
-static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
-{
-	return vmx->vm_entry_controls_shadow;
-}
-
-
-static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
-{
-	vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
-}
-
-static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
-{
-	vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
-}
-
-static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
-{
-	vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
-}
-
-static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
-{
-	vmcs_write32(VM_EXIT_CONTROLS, val);
-	vmx->vm_exit_controls_shadow = val;
-}
-
-static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
-{
-	if (vmx->vm_exit_controls_shadow != val)
-		vm_exit_controls_init(vmx, val);
-}
-
-static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
-{
-	return vmx->vm_exit_controls_shadow;
-}
-
-
-static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
-{
-	vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
-}
-
-static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
-{
-	vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
-}
-
-static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
-{
-	vmx->segment_cache.bitmask = 0;
-}
-
 static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
 				       unsigned field)
 {
@@ -1749,12 +1345,6 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 			   new.control) != old.control);
 }
 
-static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
-{
-	vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
-	vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
-}
-
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -3389,88 +2979,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 	return 0;
 }
 
-static struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu)
-{
-	int node = cpu_to_node(cpu);
-	struct page *pages;
-	struct vmcs *vmcs;
-
-	pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
-	if (!pages)
-		return NULL;
-	vmcs = page_address(pages);
-	memset(vmcs, 0, vmcs_config.size);
-
-	/* KVM supports Enlightened VMCS v1 only */
-	if (static_branch_unlikely(&enable_evmcs))
-		vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
-	else
-		vmcs->hdr.revision_id = vmcs_config.revision_id;
-
-	if (shadow)
-		vmcs->hdr.shadow_vmcs = 1;
-	return vmcs;
-}
-
-static void free_vmcs(struct vmcs *vmcs)
-{
-	free_pages((unsigned long)vmcs, vmcs_config.order);
-}
-
-/*
- * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
- */
-static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
-{
-	if (!loaded_vmcs->vmcs)
-		return;
-	loaded_vmcs_clear(loaded_vmcs);
-	free_vmcs(loaded_vmcs->vmcs);
-	loaded_vmcs->vmcs = NULL;
-	if (loaded_vmcs->msr_bitmap)
-		free_page((unsigned long)loaded_vmcs->msr_bitmap);
-	WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
-}
-
-static struct vmcs *alloc_vmcs(bool shadow)
-{
-	return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
-}
-
-static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
-{
-	loaded_vmcs->vmcs = alloc_vmcs(false);
-	if (!loaded_vmcs->vmcs)
-		return -ENOMEM;
-
-	loaded_vmcs->shadow_vmcs = NULL;
-	loaded_vmcs_init(loaded_vmcs);
-
-	if (cpu_has_vmx_msr_bitmap()) {
-		loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-		if (!loaded_vmcs->msr_bitmap)
-			goto out_vmcs;
-		memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
-
-		if (IS_ENABLED(CONFIG_HYPERV) &&
-		    static_branch_unlikely(&enable_evmcs) &&
-		    (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
-			struct hv_enlightened_vmcs *evmcs =
-				(struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
-
-			evmcs->hv_enlightenments_control.msr_bitmap = 1;
-		}
-	}
-
-	memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
-
-	return 0;
-
-out_vmcs:
-	free_loaded_vmcs(loaded_vmcs);
-	return -ENOMEM;
-}
-
 static void free_kvm_area(void)
 {
 	int cpu;
@@ -3769,24 +3277,6 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 
 #endif
 
-static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
-				bool invalidate_gpa)
-{
-	if (enable_ept && (invalidate_gpa || !enable_vpid)) {
-		if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
-			return;
-		ept_sync_context(construct_eptp(vcpu,
-						vcpu->arch.mmu->root_hpa));
-	} else {
-		vpid_sync_context(vpid);
-	}
-}
-
-static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
-{
-	__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
-}
-
 static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
 {
 	int vpid = to_vmx(vcpu)->vpid;
@@ -3966,7 +3456,7 @@ static int get_ept_level(struct kvm_vcpu *vcpu)
 	return 4;
 }
 
-static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
+u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
 {
 	u64 eptp = VMX_EPTP_MT_WB;
 
@@ -4726,8 +4216,6 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
 	return mode;
 }
 
-#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
-
 static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
 					 u8 mode)
 {
@@ -4828,11 +4316,6 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 	nested_mark_vmcs12_pages_dirty(vcpu);
 }
 
-static u8 vmx_get_rvi(void)
-{
-	return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
-}
-
 static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5052,21 +4535,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 		vmx_update_msr_bitmap(vcpu);
 }
 
-static u32 vmx_vmentry_ctrl(void)
-{
-	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
-	return vmcs_config.vmentry_ctrl &
-		~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
-}
-
-static u32 vmx_vmexit_ctrl(void)
-{
-	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
-	return vmcs_config.vmexit_ctrl &
-		~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
-}
-
-static u32 vmx_exec_control(struct vcpu_vmx *vmx)
+u32 vmx_exec_control(struct vcpu_vmx *vmx)
 {
 	u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
 
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
new file mode 100644
index 000000000000..413448e11a8d
--- /dev/null
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -0,0 +1,550 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_VMX_H
+#define __KVM_X86_VMX_H
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm.h>
+
+#include "capabilities.h"
+#include "ops.h"
+#include "vmcs.h"
+
+#define MSR_TYPE_R	1
+#define MSR_TYPE_W	2
+#define MSR_TYPE_RW	3
+
+#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
+
+#define NR_AUTOLOAD_MSRS 8
+
+struct vmx_msrs {
+	unsigned int		nr;
+	struct vmx_msr_entry	val[NR_AUTOLOAD_MSRS];
+};
+
+struct shared_msr_entry {
+	unsigned index;
+	u64 data;
+	u64 mask;
+};
+
+enum segment_cache_field {
+	SEG_FIELD_SEL = 0,
+	SEG_FIELD_BASE = 1,
+	SEG_FIELD_LIMIT = 2,
+	SEG_FIELD_AR = 3,
+
+	SEG_FIELD_NR = 4
+};
+
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+	u32 pir[8];     /* Posted interrupt requested */
+	union {
+		struct {
+				/* bit 256 - Outstanding Notification */
+			u16	on	: 1,
+				/* bit 257 - Suppress Notification */
+				sn	: 1,
+				/* bit 271:258 - Reserved */
+				rsvd_1	: 14;
+				/* bit 279:272 - Notification Vector */
+			u8	nv;
+				/* bit 287:280 - Reserved */
+			u8	rsvd_2;
+				/* bit 319:288 - Notification Destination */
+			u32	ndst;
+		};
+		u64 control;
+	};
+	u32 rsvd[6];
+} __aligned(64);
+
+
+/*
+ * The nested_vmx structure is part of vcpu_vmx, and holds information we need
+ * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
+ */
+struct nested_vmx {
+	/* Has the level1 guest done vmxon? */
+	bool vmxon;
+	gpa_t vmxon_ptr;
+	bool pml_full;
+
+	/* The guest-physical address of the current VMCS L1 keeps for L2 */
+	gpa_t current_vmptr;
+	/*
+	 * Cache of the guest's VMCS, existing outside of guest memory.
+	 * Loaded from guest memory during VMPTRLD. Flushed to guest
+	 * memory during VMCLEAR and VMPTRLD.
+	 */
+	struct vmcs12 *cached_vmcs12;
+	/*
+	 * Cache of the guest's shadow VMCS, existing outside of guest
+	 * memory. Loaded from guest memory during VM entry. Flushed
+	 * to guest memory during VM exit.
+	 */
+	struct vmcs12 *cached_shadow_vmcs12;
+	/*
+	 * Indicates if the shadow vmcs or enlightened vmcs must be updated
+	 * with the data held by struct vmcs12.
+	 */
+	bool need_vmcs12_sync;
+	bool dirty_vmcs12;
+
+	/*
+	 * vmcs02 has been initialized, i.e. state that is constant for
+	 * vmcs02 has been written to the backing VMCS.  Initialization
+	 * is delayed until L1 actually attempts to run a nested VM.
+	 */
+	bool vmcs02_initialized;
+
+	bool change_vmcs01_virtual_apic_mode;
+
+	/*
+	 * Enlightened VMCS has been enabled. It does not mean that L1 has to
+	 * use it. However, VMX features available to L1 will be limited based
+	 * on what the enlightened VMCS supports.
+	 */
+	bool enlightened_vmcs_enabled;
+
+	/* L2 must run next, and mustn't decide to exit to L1. */
+	bool nested_run_pending;
+
+	struct loaded_vmcs vmcs02;
+
+	/*
+	 * Guest pages referred to in the vmcs02 with host-physical
+	 * pointers, so we must keep them pinned while L2 runs.
+	 */
+	struct page *apic_access_page;
+	struct page *virtual_apic_page;
+	struct page *pi_desc_page;
+	struct pi_desc *pi_desc;
+	bool pi_pending;
+	u16 posted_intr_nv;
+
+	struct hrtimer preemption_timer;
+	bool preemption_timer_expired;
+
+	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
+	u64 vmcs01_debugctl;
+	u64 vmcs01_guest_bndcfgs;
+
+	u16 vpid02;
+	u16 last_vpid;
+
+	struct nested_vmx_msrs msrs;
+
+	/* SMM related state */
+	struct {
+		/* in VMX operation on SMM entry? */
+		bool vmxon;
+		/* in guest mode on SMM entry? */
+		bool guest_mode;
+	} smm;
+
+	gpa_t hv_evmcs_vmptr;
+	struct page *hv_evmcs_page;
+	struct hv_enlightened_vmcs *hv_evmcs;
+};
+
+struct vcpu_vmx {
+	struct kvm_vcpu       vcpu;
+	unsigned long         host_rsp;
+	u8                    fail;
+	u8		      msr_bitmap_mode;
+	u32                   exit_intr_info;
+	u32                   idt_vectoring_info;
+	ulong                 rflags;
+	struct shared_msr_entry *guest_msrs;
+	int                   nmsrs;
+	int                   save_nmsrs;
+	bool                  guest_msrs_dirty;
+	unsigned long	      host_idt_base;
+#ifdef CONFIG_X86_64
+	u64		      msr_host_kernel_gs_base;
+	u64		      msr_guest_kernel_gs_base;
+#endif
+
+	u64		      arch_capabilities;
+	u64		      spec_ctrl;
+
+	u32 vm_entry_controls_shadow;
+	u32 vm_exit_controls_shadow;
+	u32 secondary_exec_control;
+
+	/*
+	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
+	 * non-nested (L1) guest, it always points to vmcs01. For a nested
+	 * guest (L2), it points to a different VMCS.  loaded_cpu_state points
+	 * to the VMCS whose state is loaded into the CPU registers that only
+	 * need to be switched when transitioning to/from the kernel; a NULL
+	 * value indicates that host state is loaded.
+	 */
+	struct loaded_vmcs    vmcs01;
+	struct loaded_vmcs   *loaded_vmcs;
+	struct loaded_vmcs   *loaded_cpu_state;
+	bool                  __launched; /* temporary, used in vmx_vcpu_run */
+	struct msr_autoload {
+		struct vmx_msrs guest;
+		struct vmx_msrs host;
+	} msr_autoload;
+
+	struct {
+		int vm86_active;
+		ulong save_rflags;
+		struct kvm_segment segs[8];
+	} rmode;
+	struct {
+		u32 bitmask; /* 4 bits per segment (1 bit per field) */
+		struct kvm_save_segment {
+			u16 selector;
+			unsigned long base;
+			u32 limit;
+			u32 ar;
+		} seg[8];
+	} segment_cache;
+	int vpid;
+	bool emulation_required;
+
+	u32 exit_reason;
+
+	/* Posted interrupt descriptor */
+	struct pi_desc pi_desc;
+
+	/* Support for a guest hypervisor (nested VMX) */
+	struct nested_vmx nested;
+
+	/* Dynamic PLE window. */
+	int ple_window;
+	bool ple_window_dirty;
+
+	bool req_immediate_exit;
+
+	/* Support for PML */
+#define PML_ENTITY_NUM		512
+	struct page *pml_pg;
+
+	/* apic deadline value in host tsc */
+	u64 hv_deadline_tsc;
+
+	u64 current_tsc_ratio;
+
+	u32 host_pkru;
+
+	unsigned long host_debugctlmsr;
+
+	/*
+	 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
+	 * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
+	 * in msr_ia32_feature_control_valid_bits.
+	 */
+	u64 msr_ia32_feature_control;
+	u64 msr_ia32_feature_control_valid_bits;
+	u64 ept_pointer;
+};
+
+enum ept_pointers_status {
+	EPT_POINTERS_CHECK = 0,
+	EPT_POINTERS_MATCH = 1,
+	EPT_POINTERS_MISMATCH = 2
+};
+
+struct kvm_vmx {
+	struct kvm kvm;
+
+	unsigned int tss_addr;
+	bool ept_identity_pagetable_done;
+	gpa_t ept_identity_map_addr;
+
+	enum ept_pointers_status ept_pointers_match;
+	spinlock_t ept_pointer_lock;
+};
+
+#define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
+static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+	return test_and_set_bit(POSTED_INTR_ON,
+			(unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
+{
+	return test_and_clear_bit(POSTED_INTR_ON,
+			(unsigned long *)&pi_desc->control);
+}
+
+static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+{
+	return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
+}
+
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+	return clear_bit(POSTED_INTR_SN,
+			(unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_set_sn(struct pi_desc *pi_desc)
+{
+	return set_bit(POSTED_INTR_SN,
+			(unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_on(struct pi_desc *pi_desc)
+{
+	clear_bit(POSTED_INTR_ON,
+		(unsigned long *)&pi_desc->control);
+}
+
+static inline int pi_test_on(struct pi_desc *pi_desc)
+{
+	return test_bit(POSTED_INTR_ON,
+			(unsigned long *)&pi_desc->control);
+}
+
+static inline int pi_test_sn(struct pi_desc *pi_desc)
+{
+	return test_bit(POSTED_INTR_SN,
+			(unsigned long *)&pi_desc->control);
+}
+
+static inline u8 vmx_get_rvi(void)
+{
+	return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
+}
+
+static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
+{
+	vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
+}
+
+static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
+{
+	vmcs_write32(VM_ENTRY_CONTROLS, val);
+	vmx->vm_entry_controls_shadow = val;
+}
+
+static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
+{
+	if (vmx->vm_entry_controls_shadow != val)
+		vm_entry_controls_init(vmx, val);
+}
+
+static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
+{
+	return vmx->vm_entry_controls_shadow;
+}
+
+static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
+{
+	vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
+}
+
+static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
+{
+	vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
+}
+
+static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
+{
+	vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
+}
+
+static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
+{
+	vmcs_write32(VM_EXIT_CONTROLS, val);
+	vmx->vm_exit_controls_shadow = val;
+}
+
+static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
+{
+	if (vmx->vm_exit_controls_shadow != val)
+		vm_exit_controls_init(vmx, val);
+}
+
+static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
+{
+	return vmx->vm_exit_controls_shadow;
+}
+
+static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
+{
+	vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
+}
+
+static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
+{
+	vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
+}
+
+static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
+{
+	vmx->segment_cache.bitmask = 0;
+}
+
+static u32 vmx_vmentry_ctrl(void)
+{
+	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
+	return vmcs_config.vmentry_ctrl &
+		~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
+}
+
+static u32 vmx_vmexit_ctrl(void)
+{
+	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
+	return vmcs_config.vmexit_ctrl &
+		~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
+}
+
+u32 vmx_exec_control(struct vcpu_vmx *vmx);
+
+static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
+{
+	return container_of(kvm, struct kvm_vmx, kvm);
+}
+
+static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
+{
+	return container_of(vcpu, struct vcpu_vmx, vcpu);
+}
+
+static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+	return &(to_vmx(vcpu)->pi_desc);
+}
+
+static inline struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu)
+{
+	int node = cpu_to_node(cpu);
+	struct page *pages;
+	struct vmcs *vmcs;
+
+	pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
+	if (!pages)
+		return NULL;
+	vmcs = page_address(pages);
+	memset(vmcs, 0, vmcs_config.size);
+
+	/* KVM supports Enlightened VMCS v1 only */
+	if (static_branch_unlikely(&enable_evmcs))
+		vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
+	else
+		vmcs->hdr.revision_id = vmcs_config.revision_id;
+
+	if (shadow)
+		vmcs->hdr.shadow_vmcs = 1;
+	return vmcs;
+}
+
+static inline void free_vmcs(struct vmcs *vmcs)
+{
+	free_pages((unsigned long)vmcs, vmcs_config.order);
+}
+
+static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
+{
+	vmcs_clear(loaded_vmcs->vmcs);
+	if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
+		vmcs_clear(loaded_vmcs->shadow_vmcs);
+	loaded_vmcs->cpu = -1;
+	loaded_vmcs->launched = 0;
+}
+
+
+void __loaded_vmcs_clear(void *arg);
+
+static inline void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
+{
+	int cpu = loaded_vmcs->cpu;
+
+	if (cpu != -1)
+		smp_call_function_single(cpu,
+			 __loaded_vmcs_clear, loaded_vmcs, 1);
+}
+
+/*
+ * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
+ */
+static inline void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
+{
+	if (!loaded_vmcs->vmcs)
+		return;
+	loaded_vmcs_clear(loaded_vmcs);
+	free_vmcs(loaded_vmcs->vmcs);
+	loaded_vmcs->vmcs = NULL;
+	if (loaded_vmcs->msr_bitmap)
+		free_page((unsigned long)loaded_vmcs->msr_bitmap);
+	WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
+}
+
+static inline struct vmcs *alloc_vmcs(bool shadow)
+{
+	return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
+}
+
+static inline int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
+{
+	loaded_vmcs->vmcs = alloc_vmcs(false);
+	if (!loaded_vmcs->vmcs)
+		return -ENOMEM;
+
+	loaded_vmcs->shadow_vmcs = NULL;
+	loaded_vmcs_init(loaded_vmcs);
+
+	if (cpu_has_vmx_msr_bitmap()) {
+		loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+		if (!loaded_vmcs->msr_bitmap)
+			goto out_vmcs;
+		memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
+
+		if (IS_ENABLED(CONFIG_HYPERV) &&
+		    static_branch_unlikely(&enable_evmcs) &&
+		    (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
+			struct hv_enlightened_vmcs *evmcs =
+				(struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
+
+			evmcs->hv_enlightenments_control.msr_bitmap = 1;
+		}
+	}
+
+	memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
+
+	return 0;
+
+out_vmcs:
+	free_loaded_vmcs(loaded_vmcs);
+	return -ENOMEM;
+}
+
+u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
+
+static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
+				bool invalidate_gpa)
+{
+	if (enable_ept && (invalidate_gpa || !enable_vpid)) {
+		if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
+			return;
+		ept_sync_context(construct_eptp(vcpu,
+						vcpu->arch.mmu->root_hpa));
+	} else {
+		vpid_sync_context(vpid);
+	}
+}
+
+static inline void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+{
+	__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
+}
+
+static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
+{
+	vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
+	vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+}
+
+#endif /* __KVM_X86_VMX_H */