diff mbox series

[4/5] kvm: vmx: Defer setting of DR6 until #DB delivery

Message ID 20181008182945.79957-4-jmattson@google.com (mailing list archive)
State New, archived
Headers show
Series [1/5] kvm: x86: Add payload to kvm_queued_exception and kvm_vcpu_events | expand

Commit Message

Jim Mattson Oct. 8, 2018, 6:29 p.m. UTC
When exception payloads are enabled by userspace (which is not yet
possible) and a #DB is raised in L2, defer the setting of DR6 until
later. Under VMX, this allows the L1 hypervisor to intercept the fault
before DR6 is modified. Under SVM, DR6 is modified before L1 can
intercept the fault (as has always been the case with DR7).

Note that the payload associated with a #DB exception includes only
the "new DR6 bits." When the payload is delievered, DR6.B0-B3 will be
cleared and DR6.RTM will be set prior to merging in the new DR6 bits.

Also note that bit 16 in the "new DR6 bits" is set to indicate that a
debug exception (#DB) or a breakpoint exception (#BP) occurred inside
an RTM region while advanced debugging of RTM transactional regions
was enabled. Though the reverse of DR6.RTM, this makes the #DB payload
field compatible with both the pending debug exceptions field under
VMX and the exit qualification for #DB exceptions under VMX.

Reported-by: Jim Mattson <jmattson@google.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
---
 arch/x86/kvm/vmx.c | 18 ++++++------------
 arch/x86/kvm/x86.c | 47 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 41 insertions(+), 24 deletions(-)

Comments

Liran Alon Oct. 9, 2018, 12:14 p.m. UTC | #1
> On 8 Oct 2018, at 21:29, Jim Mattson <jmattson@google.com> wrote:
> 
> When exception payloads are enabled by userspace (which is not yet
> possible) and a #DB is raised in L2, defer the setting of DR6 until
> later. Under VMX, this allows the L1 hypervisor to intercept the fault
> before DR6 is modified. Under SVM, DR6 is modified before L1 can
> intercept the fault (as has always been the case with DR7).
> 
> Note that the payload associated with a #DB exception includes only
> the "new DR6 bits." When the payload is delievered, DR6.B0-B3 will be
> cleared and DR6.RTM will be set prior to merging in the new DR6 bits.
> 
> Also note that bit 16 in the "new DR6 bits" is set to indicate that a
> debug exception (#DB) or a breakpoint exception (#BP) occurred inside
> an RTM region while advanced debugging of RTM transactional regions
> was enabled. Though the reverse of DR6.RTM, this makes the #DB payload
> field compatible with both the pending debug exceptions field under
> VMX and the exit qualification for #DB exceptions under VMX.
> 
> Reported-by: Jim Mattson <jmattson@google.com>
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Jim Mattson <jmattson@google.com>
> Reviewed-by: Peter Shier <pshier@google.com>
> ---
> arch/x86/kvm/vmx.c | 18 ++++++------------
> arch/x86/kvm/x86.c | 47 ++++++++++++++++++++++++++++++++++------------
> 2 files changed, 41 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index fc3f2d27b580..45a346acc40b 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3288,18 +3288,12 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
> 			*exit_qual = has_payload ? payload : vcpu->arch.cr2;
> 			return 1;
> 		}
> -	} else {
> -		/*
> -		 * FIXME: we must not write DR6 when L1 intercepts an
> -		 * L2 #DB exception.
> -		 */
> -		if (vmcs12->exception_bitmap & (1u << nr)) {
> -			if (nr == DB_VECTOR)
> -				*exit_qual = vcpu->arch.dr6;
> -			else
> -				*exit_qual = 0;
> -			return 1;
> -		}
> +	} else if (vmcs12->exception_bitmap & BIT(nr)) {

I dislike the fact that you also changed (1u << nr) to BIT(nr) on this patch.
All such use-cases currently in vmx.c are of the form (1u << X) and there is no use of BIT() macro.
I am not against it but I prefer for such a change to happen on a separate patch and modify other similar places as-well.

> +		if (nr == DB_VECTOR)
> +			*exit_qual = has_payload ? payload : vcpu->arch.dr6;
> +		else
> +			*exit_qual = 0;
> +		return 1;
> 	}
> 
> 	return 0;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 974f0784ac99..33e171e6d067 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -410,6 +410,28 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
> 		return;
> 
> 	switch (nr) {
> +	case DB_VECTOR:
> +		/*
> +		 * "Certain debug exceptions may clear bit 0-3.  The
> +		 * remaining contents of the DR6 register are never
> +		 * cleared by the processor".
> +		 */
> +		vcpu->arch.dr6 &= ~DR_TRAP_BITS;
> +		/*
> +		 * DR6.RTM is set by all #DB exceptions that don't clear it.
> +		 */
> +		vcpu->arch.dr6 |= DR6_RTM;
> +		vcpu->arch.dr6 |= payload;
> +		/*
> +		 * Bit 16 should be set in the payload whenever the #DB
> +		 * exception should clear DR6.RTM. This makes the payload
> +		 * compatible with the pending debug exceptions under VMX.
> +		 * Though not currently documented in the SDM, this also
> +		 * makes the payload compatible with the exit qualification
> +		 * for #DB exceptions under VMX.
> +		 */
> +		vcpu->arch.dr6 ^= payload & DR6_RTM;
> +		break;
> 	case PF_VECTOR:
> 		vcpu->arch.cr2 = payload;
> 		break;
> @@ -501,6 +523,12 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
> }
> EXPORT_SYMBOL_GPL(kvm_requeue_exception);
> 
> +static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
> +				  unsigned long payload)
> +{
> +	kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
> +}
> +
> static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
> 				    u32 error_code, unsigned long payload)
> {
> @@ -6101,14 +6129,7 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
> 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
> 		*r = EMULATE_USER_EXIT;
> 	} else {
> -		/*
> -		 * "Certain debug exceptions may clear bit 0-3.  The
> -		 * remaining contents of the DR6 register are never
> -		 * cleared by the processor".
> -		 */
> -		vcpu->arch.dr6 &= ~15;
> -		vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
> -		kvm_queue_exception(vcpu, DB_VECTOR);
> +		kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
> 	}
> }
> 
> @@ -7047,10 +7068,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
> 			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
> 					     X86_EFLAGS_RF);
> 
> -		if (vcpu->arch.exception.nr == DB_VECTOR &&
> -		    (vcpu->arch.dr7 & DR7_GD)) {
> -			vcpu->arch.dr7 &= ~DR7_GD;
> -			kvm_update_dr7(vcpu);
> +		if (vcpu->arch.exception.nr == DB_VECTOR) {
> +			kvm_deliver_exception_payload(vcpu);

I would add a comment here that one should note that once we will modify nSVM to use
check_nested_events() framework, the call here for kvm_deliver_exception_payload()
should be moved to svm_check_nested_events().

> +			if (vcpu->arch.dr7 & DR7_GD) {
> +				vcpu->arch.dr7 &= ~DR7_GD;
> +				kvm_update_dr7(vcpu);
> +			}
> 		}
> 
> 		kvm_x86_ops->queue_exception(vcpu);
> -- 
> 2.19.0.605.g01d371f741-goog
> 

Looks good.
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Jim Mattson Oct. 9, 2018, 2:10 p.m. UTC | #2
On Tue, Oct 9, 2018 at 5:14 AM, Liran Alon <liran.alon@oracle.com> wrote:

> I dislike the fact that you also changed (1u << nr) to BIT(nr) on this patch.
> All such use-cases currently in vmx.c are of the form (1u << X) and there is no use of BIT() macro.
> I am not against it but I prefer for such a change to happen on a separate patch and modify other similar places as-well.

Okay; I'll revert that part of the change in v2.

>> -             if (vcpu->arch.exception.nr == DB_VECTOR &&
>> -                 (vcpu->arch.dr7 & DR7_GD)) {
>> -                     vcpu->arch.dr7 &= ~DR7_GD;
>> -                     kvm_update_dr7(vcpu);
>> +             if (vcpu->arch.exception.nr == DB_VECTOR) {
>> +                     kvm_deliver_exception_payload(vcpu);
>
> I would add a comment here that one should note that once we will modify nSVM to use
> check_nested_events() framework, the call here for kvm_deliver_exception_payload()
> should be moved to svm_check_nested_events().

Actually, if nSVM uses the check_nested_events framework, things are a
little more complicated than that. The DR7 update has to be done in
svm_check_nested_events as well. It would be nice to just move the DR7
update into kvm_deliver_exception_payload, but that would break the
old ABI. I'm not sure that this is the appropriate place for a lengthy
comment about a speculative future code change, but I'll go ahead and
write something up.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fc3f2d27b580..45a346acc40b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3288,18 +3288,12 @@  static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
 			*exit_qual = has_payload ? payload : vcpu->arch.cr2;
 			return 1;
 		}
-	} else {
-		/*
-		 * FIXME: we must not write DR6 when L1 intercepts an
-		 * L2 #DB exception.
-		 */
-		if (vmcs12->exception_bitmap & (1u << nr)) {
-			if (nr == DB_VECTOR)
-				*exit_qual = vcpu->arch.dr6;
-			else
-				*exit_qual = 0;
-			return 1;
-		}
+	} else if (vmcs12->exception_bitmap & BIT(nr)) {
+		if (nr == DB_VECTOR)
+			*exit_qual = has_payload ? payload : vcpu->arch.dr6;
+		else
+			*exit_qual = 0;
+		return 1;
 	}
 
 	return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 974f0784ac99..33e171e6d067 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -410,6 +410,28 @@  void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
 		return;
 
 	switch (nr) {
+	case DB_VECTOR:
+		/*
+		 * "Certain debug exceptions may clear bit 0-3.  The
+		 * remaining contents of the DR6 register are never
+		 * cleared by the processor".
+		 */
+		vcpu->arch.dr6 &= ~DR_TRAP_BITS;
+		/*
+		 * DR6.RTM is set by all #DB exceptions that don't clear it.
+		 */
+		vcpu->arch.dr6 |= DR6_RTM;
+		vcpu->arch.dr6 |= payload;
+		/*
+		 * Bit 16 should be set in the payload whenever the #DB
+		 * exception should clear DR6.RTM. This makes the payload
+		 * compatible with the pending debug exceptions under VMX.
+		 * Though not currently documented in the SDM, this also
+		 * makes the payload compatible with the exit qualification
+		 * for #DB exceptions under VMX.
+		 */
+		vcpu->arch.dr6 ^= payload & DR6_RTM;
+		break;
 	case PF_VECTOR:
 		vcpu->arch.cr2 = payload;
 		break;
@@ -501,6 +523,12 @@  void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
 }
 EXPORT_SYMBOL_GPL(kvm_requeue_exception);
 
+static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
+				  unsigned long payload)
+{
+	kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
+}
+
 static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
 				    u32 error_code, unsigned long payload)
 {
@@ -6101,14 +6129,7 @@  static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
 		*r = EMULATE_USER_EXIT;
 	} else {
-		/*
-		 * "Certain debug exceptions may clear bit 0-3.  The
-		 * remaining contents of the DR6 register are never
-		 * cleared by the processor".
-		 */
-		vcpu->arch.dr6 &= ~15;
-		vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
-		kvm_queue_exception(vcpu, DB_VECTOR);
+		kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
 	}
 }
 
@@ -7047,10 +7068,12 @@  static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
 					     X86_EFLAGS_RF);
 
-		if (vcpu->arch.exception.nr == DB_VECTOR &&
-		    (vcpu->arch.dr7 & DR7_GD)) {
-			vcpu->arch.dr7 &= ~DR7_GD;
-			kvm_update_dr7(vcpu);
+		if (vcpu->arch.exception.nr == DB_VECTOR) {
+			kvm_deliver_exception_payload(vcpu);
+			if (vcpu->arch.dr7 & DR7_GD) {
+				vcpu->arch.dr7 &= ~DR7_GD;
+				kvm_update_dr7(vcpu);
+			}
 		}
 
 		kvm_x86_ops->queue_exception(vcpu);