diff mbox

[v2,4/5] KVM: nVMX: Allow to disable VM_{ENTRY_LOAD,EXIT_SAVE}_DEBUG_CONTROLS

Message ID 1d6af71531e07c1f765c116d88c94993b6cedbe3.1402919981.git.jan.kiszka@siemens.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Kiszka June 16, 2014, 11:59 a.m. UTC
Allow L1 to "leak" its debug controls into L2, i.e. permit cleared
VM_{ENTRY_LOAD,EXIT_SAVE}_DEBUG_CONTROLS. This requires to manually
transfer the state of DR7 and IA32_DEBUGCTLMSR from L1 into L2 as both
run on different VMCS.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/vmx.c | 44 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 6 deletions(-)

Comments

Bandan Das June 16, 2014, 5:02 p.m. UTC | #1
Jan Kiszka <jan.kiszka@siemens.com> writes:

...
>  	/* cpu-based controls */
>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
>  		nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
> @@ -2409,11 +2422,17 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
>  					nested_vmx_procbased_ctls_high);
>  		break;
>  	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
> +		*pdata = vmx_control_msr(nested_vmx_true_exit_ctls_low,
> +					nested_vmx_exit_ctls_high);
> +		break;
>  	case MSR_IA32_VMX_EXIT_CTLS:
>  		*pdata = vmx_control_msr(nested_vmx_exit_ctls_low,
>  					nested_vmx_exit_ctls_high);
>  		break;
>  	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
> +		*pdata = vmx_control_msr(nested_vmx_true_entry_ctls_low,
> +					nested_vmx_entry_ctls_high);
> +		break;
>  	case MSR_IA32_VMX_ENTRY_CTLS:
>  		*pdata = vmx_control_msr(nested_vmx_entry_ctls_low,
>  					nested_vmx_entry_ctls_high);
> @@ -7836,7 +7855,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
>  	vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
>  
> -	vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
> +	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
> +		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
> +		vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
> +	} else {
> +		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
> +		vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
> +	}

(I guess I don't understand DEBUGCTLS enough) vmcs01_debugctl is used by
L0 to run L1, and if L1 hasn't set VM_ENTRY_LOAD_DEBUG_CONTROLS for L2,
why do we need the GUEST_IA32_DEBUGCTL vmwrite in the "else" case ?

 
>  	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
>  		vmcs12->vm_entry_intr_info_field);
>  	vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
> @@ -7846,7 +7871,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
>  		vmcs12->guest_interruptibility_info);
>  	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
> -	kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
>  	vmx_set_rflags(vcpu, vmcs12->guest_rflags);
>  	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
>  		vmcs12->guest_pending_dbg_exceptions);
> @@ -8143,9 +8167,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
>  	    !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
>  	      nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) ||
>  	    !vmx_control_verify(vmcs12->vm_exit_controls,
> -	      nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) ||
> +				nested_vmx_true_exit_ctls_low,
> +				nested_vmx_exit_ctls_high) ||
>  	    !vmx_control_verify(vmcs12->vm_entry_controls,
> -	      nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high))
> +				nested_vmx_true_entry_ctls_low,
> +				nested_vmx_entry_ctls_high))
>  	{
>  		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
>  		return 1;
> @@ -8222,6 +8248,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
>  
>  	vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
>  
> +	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
> +		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
> +
>  	cpu = get_cpu();
>  	vmx->loaded_vmcs = vmcs02;
>  	vmx_vcpu_put(vcpu);
> @@ -8399,7 +8428,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>  	vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
>  	vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
>  
> -	kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
>  	vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
>  	vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
>  	vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
> @@ -8478,9 +8506,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>  		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
>  		(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
>  
> +	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
> +		kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
> +		vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
> +	}
> +
>  	/* TODO: These cannot have changed unless we have MSR bitmaps and
>  	 * the relevant bit asks not to trap the change */
> -	vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
>  	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
>  		vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
>  	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 17, 2014, 5:18 a.m. UTC | #2
Il 16/06/2014 19:02, Bandan Das ha scritto:
> > -	vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
> > +	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
> > +		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
> > +		vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
> > +	} else {
> > +		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
> > +		vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
> > +	}
>
> (I guess I don't understand DEBUGCTLS enough) vmcs01_debugctl is used by
> L0 to run L1, and if L1 hasn't set VM_ENTRY_LOAD_DEBUG_CONTROLS for L2,
> why do we need the GUEST_IA32_DEBUGCTL vmwrite in the "else" case ?

Beause we always set VM_ENTRY_LOAD_DEBUG_CONTROLS in the VMCS02, so we 
must always fill in GUEST_IA32_DEBUGCTL of the VMCS02.  Depending on the 
VMCS12's VM_ENTRY_LOAD_DEBUG_CONTROLS, the field comes from either 
VMCS01 or VMCS12.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 475f2dc..f20a5ee 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -383,6 +383,9 @@  struct nested_vmx {
 
 	struct hrtimer preemption_timer;
 	bool preemption_timer_expired;
+
+	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
+	u64 vmcs01_debugctl;
 };
 
 #define POSTED_INTR_ON  0
@@ -2243,7 +2246,9 @@  static u32 nested_vmx_true_procbased_ctls_low;
 static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high;
 static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
 static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
+static u32 nested_vmx_true_exit_ctls_low;
 static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
+static u32 nested_vmx_true_entry_ctls_low;
 static u32 nested_vmx_misc_low, nested_vmx_misc_high;
 static u32 nested_vmx_ept_caps;
 static __init void nested_vmx_setup_ctls_msrs(void)
@@ -2289,6 +2294,10 @@  static __init void nested_vmx_setup_ctls_msrs(void)
 	if (vmx_mpx_supported())
 		nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
 
+	/* We support free control of debug control saving. */
+	nested_vmx_true_exit_ctls_low = nested_vmx_exit_ctls_low &
+		~VM_EXIT_SAVE_DEBUG_CONTROLS;
+
 	/* entry controls */
 	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
 		nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
@@ -2303,6 +2312,10 @@  static __init void nested_vmx_setup_ctls_msrs(void)
 	if (vmx_mpx_supported())
 		nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
 
+	/* We support free control of debug control loading. */
+	nested_vmx_true_entry_ctls_low = nested_vmx_entry_ctls_low &
+		~VM_ENTRY_LOAD_DEBUG_CONTROLS;
+
 	/* cpu-based controls */
 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
 		nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
@@ -2409,11 +2422,17 @@  static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 					nested_vmx_procbased_ctls_high);
 		break;
 	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+		*pdata = vmx_control_msr(nested_vmx_true_exit_ctls_low,
+					nested_vmx_exit_ctls_high);
+		break;
 	case MSR_IA32_VMX_EXIT_CTLS:
 		*pdata = vmx_control_msr(nested_vmx_exit_ctls_low,
 					nested_vmx_exit_ctls_high);
 		break;
 	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+		*pdata = vmx_control_msr(nested_vmx_true_entry_ctls_low,
+					nested_vmx_entry_ctls_high);
+		break;
 	case MSR_IA32_VMX_ENTRY_CTLS:
 		*pdata = vmx_control_msr(nested_vmx_entry_ctls_low,
 					nested_vmx_entry_ctls_high);
@@ -7836,7 +7855,13 @@  static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
 	vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
 
-	vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
+	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
+		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
+		vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
+	} else {
+		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
+		vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
+	}
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 		vmcs12->vm_entry_intr_info_field);
 	vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
@@ -7846,7 +7871,6 @@  static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
 		vmcs12->guest_interruptibility_info);
 	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
-	kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
 	vmx_set_rflags(vcpu, vmcs12->guest_rflags);
 	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
 		vmcs12->guest_pending_dbg_exceptions);
@@ -8143,9 +8167,11 @@  static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	    !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
 	      nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) ||
 	    !vmx_control_verify(vmcs12->vm_exit_controls,
-	      nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) ||
+				nested_vmx_true_exit_ctls_low,
+				nested_vmx_exit_ctls_high) ||
 	    !vmx_control_verify(vmcs12->vm_entry_controls,
-	      nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high))
+				nested_vmx_true_entry_ctls_low,
+				nested_vmx_entry_ctls_high))
 	{
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
@@ -8222,6 +8248,9 @@  static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
 	vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
 
+	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
+		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+
 	cpu = get_cpu();
 	vmx->loaded_vmcs = vmcs02;
 	vmx_vcpu_put(vcpu);
@@ -8399,7 +8428,6 @@  static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
 	vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
 
-	kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
 	vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
 	vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
 	vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
@@ -8478,9 +8506,13 @@  static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
 		(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
 
+	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
+		kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
+		vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+	}
+
 	/* TODO: These cannot have changed unless we have MSR bitmaps and
 	 * the relevant bit asks not to trap the change */
-	vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
 	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
 		vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
 	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)