diff mbox

[v5] KVM: nVMX: Fully support of nested VMX preemption timer

Message ID 1379319104-10266-1-git-send-email-yzt356@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Arthur Chunqi Li Sept. 16, 2013, 8:11 a.m. UTC
This patch contains the following two changes:
1. Fix the bug in nested preemption timer support. If vmexit L2->L0
with some reasons not emulated by L1, preemption timer value should
be save in such exits.
2. Add support of "Save VMX-preemption timer value" VM-Exit controls
to nVMX.

With this patch, nested VMX preemption timer features are fully
supported.

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
ChangeLog to v4:
	Format changes and remove a flag in nested_vmx.
 arch/x86/include/uapi/asm/msr-index.h |    1 +
 arch/x86/kvm/vmx.c                    |   44 +++++++++++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 2 deletions(-)

Comments

Gleb Natapov Sept. 22, 2013, 7:47 a.m. UTC | #1
On Mon, Sep 16, 2013 at 04:11:44PM +0800, Arthur Chunqi Li wrote:
> This patch contains the following two changes:
> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0
> with some reasons not emulated by L1, preemption timer value should
> be save in such exits.
> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls
> to nVMX.
> 
> With this patch, nested VMX preemption timer features are fully
> supported.
> 
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
Reviewed-by: Gleb Natapov <gleb@redhat.com>

One more if() on vmentry path is unfortunate, but I do not see a way
around it yet.

> ---
> ChangeLog to v4:
> 	Format changes and remove a flag in nested_vmx.
>  arch/x86/include/uapi/asm/msr-index.h |    1 +
>  arch/x86/kvm/vmx.c                    |   44 +++++++++++++++++++++++++++++++--
>  2 files changed, 43 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
> index bb04650..b93e09a 100644
> --- a/arch/x86/include/uapi/asm/msr-index.h
> +++ b/arch/x86/include/uapi/asm/msr-index.h
> @@ -536,6 +536,7 @@
>  
>  /* MSR_IA32_VMX_MISC bits */
>  #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE   0x1F
>  /* AMD-V MSRs */
>  
>  #define MSR_VM_CR                       0xc0010114
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 1f1da43..e1fa13a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2204,7 +2204,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>  #ifdef CONFIG_X86_64
>  		VM_EXIT_HOST_ADDR_SPACE_SIZE |
>  #endif
> -		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
> +		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
> +		VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
> +	if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
> +		!(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
> +		nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
> +		nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
> +	}
>  	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>  				      VM_EXIT_LOAD_IA32_EFER);
>  
> @@ -6707,6 +6713,27 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
>  	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
>  }
>  
> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
> +{
> +	u64 delta_tsc_l1;
> +	u32 preempt_val_l1, preempt_val_l2, preempt_scale;
> +
> +	if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
> +			PIN_BASED_VMX_PREEMPTION_TIMER))
> +		return;
> +	preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
> +			MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
> +	preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
> +	delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
> +		- vcpu->arch.last_guest_tsc;
> +	preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
> +	if (preempt_val_l2 <= preempt_val_l1)
> +		preempt_val_l2 = 0;
> +	else
> +		preempt_val_l2 -= preempt_val_l1;
> +	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
> +}
> +
>  /*
>   * The guest has exited.  See if we can fix it or if we need userspace
>   * assistance.
> @@ -7131,6 +7158,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  	atomic_switch_perf_msrs(vmx);
>  	debugctlmsr = get_debugctlmsr();
>  
> +	if (is_guest_mode(vcpu) && !(vmx->nested.nested_run_pending))
> +		nested_adjust_preemption_timer(vcpu);
>  	vmx->__launched = vmx->loaded_vmcs->launched;
>  	asm(
>  		/* Store host registers */
> @@ -7518,6 +7547,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	u32 exec_control;
> +	u32 exit_control;
>  
>  	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
>  	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
> @@ -7691,7 +7721,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
>  	 * bits are further modified by vmx_set_efer() below.
>  	 */
> -	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
> +	exit_control = vmcs_config.vmexit_ctrl;
> +	if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
> +		exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
> +	vmcs_write32(VM_EXIT_CONTROLS, exit_control);
>  
>  	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
>  	 * emulated by vmx_set_efer(), below.
> @@ -8090,6 +8123,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	vmcs12->guest_pending_dbg_exceptions =
>  		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
>  
> +	if ((vmcs12->pin_based_vm_exec_control &
> +			PIN_BASED_VMX_PREEMPTION_TIMER) &&
> +			(vmcs12->vm_exit_controls &
> +			VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
> +		vmcs12->vmx_preemption_timer_value =
> +			vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
> +
>  	/*
>  	 * In some cases (usually, nested EPT), L2 is allowed to change its
>  	 * own CR3 without exiting. If it has changed it, we must keep it.
> -- 
> 1.7.9.5

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 25, 2013, 1:58 p.m. UTC | #2
Il 16/09/2013 10:11, Arthur Chunqi Li ha scritto:
> This patch contains the following two changes:
> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0
> with some reasons not emulated by L1, preemption timer value should
> be save in such exits.
> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls
> to nVMX.
> 
> With this patch, nested VMX preemption timer features are fully
> supported.
> 
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
> ChangeLog to v4:
> 	Format changes and remove a flag in nested_vmx.
>  arch/x86/include/uapi/asm/msr-index.h |    1 +
>  arch/x86/kvm/vmx.c                    |   44 +++++++++++++++++++++++++++++++--
>  2 files changed, 43 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
> index bb04650..b93e09a 100644
> --- a/arch/x86/include/uapi/asm/msr-index.h
> +++ b/arch/x86/include/uapi/asm/msr-index.h
> @@ -536,6 +536,7 @@
>  
>  /* MSR_IA32_VMX_MISC bits */
>  #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE   0x1F
>  /* AMD-V MSRs */
>  
>  #define MSR_VM_CR                       0xc0010114
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 1f1da43..e1fa13a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2204,7 +2204,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>  #ifdef CONFIG_X86_64
>  		VM_EXIT_HOST_ADDR_SPACE_SIZE |
>  #endif
> -		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
> +		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
> +		VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
> +	if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
> +		!(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
> +		nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
> +		nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
> +	}
>  	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>  				      VM_EXIT_LOAD_IA32_EFER);
>  
> @@ -6707,6 +6713,27 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
>  	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
>  }
>  
> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
> +{
> +	u64 delta_tsc_l1;
> +	u32 preempt_val_l1, preempt_val_l2, preempt_scale;
> +
> +	if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
> +			PIN_BASED_VMX_PREEMPTION_TIMER))
> +		return;
> +	preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
> +			MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
> +	preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
> +	delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
> +		- vcpu->arch.last_guest_tsc;
> +	preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
> +	if (preempt_val_l2 <= preempt_val_l1)
> +		preempt_val_l2 = 0;
> +	else
> +		preempt_val_l2 -= preempt_val_l1;
> +	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
> +}
> +
>  /*
>   * The guest has exited.  See if we can fix it or if we need userspace
>   * assistance.
> @@ -7131,6 +7158,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  	atomic_switch_perf_msrs(vmx);
>  	debugctlmsr = get_debugctlmsr();
>  
> +	if (is_guest_mode(vcpu) && !(vmx->nested.nested_run_pending))
> +		nested_adjust_preemption_timer(vcpu);
>  	vmx->__launched = vmx->loaded_vmcs->launched;
>  	asm(
>  		/* Store host registers */
> @@ -7518,6 +7547,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	u32 exec_control;
> +	u32 exit_control;
>  
>  	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
>  	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
> @@ -7691,7 +7721,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
>  	 * bits are further modified by vmx_set_efer() below.
>  	 */
> -	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
> +	exit_control = vmcs_config.vmexit_ctrl;
> +	if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
> +		exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
> +	vmcs_write32(VM_EXIT_CONTROLS, exit_control);
>  
>  	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
>  	 * emulated by vmx_set_efer(), below.
> @@ -8090,6 +8123,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	vmcs12->guest_pending_dbg_exceptions =
>  		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
>  
> +	if ((vmcs12->pin_based_vm_exec_control &
> +			PIN_BASED_VMX_PREEMPTION_TIMER) &&
> +			(vmcs12->vm_exit_controls &
> +			VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
> +		vmcs12->vmx_preemption_timer_value =
> +			vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
> +
>  	/*
>  	 * In some cases (usually, nested EPT), L2 is allowed to change its
>  	 * own CR3 without exiting. If it has changed it, we must keep it.
> 

Applying to kvm/queue, thanks.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bb04650..b93e09a 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -536,6 +536,7 @@ 
 
 /* MSR_IA32_VMX_MISC bits */
 #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE   0x1F
 /* AMD-V MSRs */
 
 #define MSR_VM_CR                       0xc0010114
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1f1da43..e1fa13a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2204,7 +2204,13 @@  static __init void nested_vmx_setup_ctls_msrs(void)
 #ifdef CONFIG_X86_64
 		VM_EXIT_HOST_ADDR_SPACE_SIZE |
 #endif
-		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+		VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+	if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+		!(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
+		nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+		nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+	}
 	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
 				      VM_EXIT_LOAD_IA32_EFER);
 
@@ -6707,6 +6713,27 @@  static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
 	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
 }
 
+static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
+{
+	u64 delta_tsc_l1;
+	u32 preempt_val_l1, preempt_val_l2, preempt_scale;
+
+	if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
+			PIN_BASED_VMX_PREEMPTION_TIMER))
+		return;
+	preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
+			MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
+	preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+	delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
+		- vcpu->arch.last_guest_tsc;
+	preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
+	if (preempt_val_l2 <= preempt_val_l1)
+		preempt_val_l2 = 0;
+	else
+		preempt_val_l2 -= preempt_val_l1;
+	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
+}
+
 /*
  * The guest has exited.  See if we can fix it or if we need userspace
  * assistance.
@@ -7131,6 +7158,8 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	atomic_switch_perf_msrs(vmx);
 	debugctlmsr = get_debugctlmsr();
 
+	if (is_guest_mode(vcpu) && !(vmx->nested.nested_run_pending))
+		nested_adjust_preemption_timer(vcpu);
 	vmx->__launched = vmx->loaded_vmcs->launched;
 	asm(
 		/* Store host registers */
@@ -7518,6 +7547,7 @@  static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 exec_control;
+	u32 exit_control;
 
 	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
 	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7691,7 +7721,10 @@  static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
 	 * bits are further modified by vmx_set_efer() below.
 	 */
-	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+	exit_control = vmcs_config.vmexit_ctrl;
+	if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
+		exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+	vmcs_write32(VM_EXIT_CONTROLS, exit_control);
 
 	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
 	 * emulated by vmx_set_efer(), below.
@@ -8090,6 +8123,13 @@  static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vmcs12->guest_pending_dbg_exceptions =
 		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
 
+	if ((vmcs12->pin_based_vm_exec_control &
+			PIN_BASED_VMX_PREEMPTION_TIMER) &&
+			(vmcs12->vm_exit_controls &
+			VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+		vmcs12->vmx_preemption_timer_value =
+			vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+
 	/*
 	 * In some cases (usually, nested EPT), L2 is allowed to change its
 	 * own CR3 without exiting. If it has changed it, we must keep it.