diff mbox series

[5/5] x86/pv: Rewrite %dr6 handling

Message ID 20230912232113.402347-6-andrew.cooper3@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86/pv: #DB vs %dr6 fixes, part 2 | expand

Commit Message

Andrew Cooper Sept. 12, 2023, 11:21 p.m. UTC
All #DB exceptions result in an update of %dr6, but this isn't handled
properly by Xen for any guest type.

To start with, add a new pending_dbg field to x86_event, sharing storage with
cr2, and using the Intel VMCS PENDING_DBG semantics.  Also introduce a
pv_inject_DB() wrapper use this field nicely.

Remove all ad-hoc dr6 handling, leaving it to pv_inject_event() in most cases
and using the new x86_merge_dr6() helper.

In do_debug(), adjust dr6 manually only when a debugger is attached.  This
maintains the old behaviour.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Jinoh Kang <jinoh.kang.kr@gmail.com>
---
 xen/arch/x86/include/asm/domain.h      | 12 ++++++++++++
 xen/arch/x86/pv/emul-priv-op.c         |  5 +----
 xen/arch/x86/pv/emulate.c              |  6 ++----
 xen/arch/x86/pv/ro-page-fault.c        |  4 ++--
 xen/arch/x86/pv/traps.c                | 17 +++++++++++++----
 xen/arch/x86/traps.c                   | 12 +++++++-----
 xen/arch/x86/x86_emulate/x86_emulate.h |  5 ++++-
 7 files changed, 41 insertions(+), 20 deletions(-)

Comments

Jan Beulich Sept. 14, 2023, 4:06 p.m. UTC | #1
On 13.09.2023 01:21, Andrew Cooper wrote:
> --- a/xen/arch/x86/include/asm/domain.h
> +++ b/xen/arch/x86/include/asm/domain.h
> @@ -729,6 +729,18 @@ static inline void pv_inject_hw_exception(unsigned int vector, int errcode)
>      pv_inject_event(&event);
>  }
>  
> +static inline void pv_inject_DB(unsigned long pending_dbg)
> +{
> +    struct x86_event event = {
> +        .vector      = X86_EXC_DB,
> +        .type        = X86_EVENTTYPE_HW_EXCEPTION,
> +        .error_code  = X86_EVENT_NO_EC,
> +        .pending_dbg = pending_dbg,

This being a sub-field of an unnamed union, the build will break (also
in pv_inject_page_fault() then, for cr2 being switched at the same time)
once again for old enough gcc.

> --- a/xen/arch/x86/pv/emulate.c
> +++ b/xen/arch/x86/pv/emulate.c
> @@ -71,11 +71,9 @@ void pv_emul_instruction_done(struct cpu_user_regs *regs, unsigned long rip)
>  {
>      regs->rip = rip;
>      regs->eflags &= ~X86_EFLAGS_RF;
> +
>      if ( regs->eflags & X86_EFLAGS_TF )
> -    {
> -        current->arch.dr6 |= DR_STEP | DR_STATUS_RESERVED_ONE;
> -        pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
> -    }
> +        pv_inject_DB(X86_DR6_BS);
>  }

This (not your change, the construct) looks bogus at the first and second
glance, because of looking at EFLAGS.TF after emulation, when the initial
state of TF matters. It is only correct (at the third, closer look) because
the function presently is used only from paths not altering the guest's
EFLAGS. Do you think it would make sense to add a comment at this occasion?

Jan
Andrew Cooper Sept. 14, 2023, 6:39 p.m. UTC | #2
On 14/09/2023 5:06 pm, Jan Beulich wrote:
> On 13.09.2023 01:21, Andrew Cooper wrote:
>> --- a/xen/arch/x86/include/asm/domain.h
>> +++ b/xen/arch/x86/include/asm/domain.h
>> @@ -729,6 +729,18 @@ static inline void pv_inject_hw_exception(unsigned int vector, int errcode)
>>      pv_inject_event(&event);
>>  }
>>  
>> +static inline void pv_inject_DB(unsigned long pending_dbg)
>> +{
>> +    struct x86_event event = {
>> +        .vector      = X86_EXC_DB,
>> +        .type        = X86_EVENTTYPE_HW_EXCEPTION,
>> +        .error_code  = X86_EVENT_NO_EC,
>> +        .pending_dbg = pending_dbg,
> This being a sub-field of an unnamed union, the build will break (also
> in pv_inject_page_fault() then, for cr2 being switched at the same time)
> once again for old enough gcc.

I'm sick and tired of utterly obsolete compiler bugs stopping us writing
good code.

It will break HVM #PF too, and I'll fix it for now as these patches need
backporting, but I've got a very strong mind to intentionally break it
next time this comes up in staging.

>> --- a/xen/arch/x86/pv/emulate.c
>> +++ b/xen/arch/x86/pv/emulate.c
>> @@ -71,11 +71,9 @@ void pv_emul_instruction_done(struct cpu_user_regs *regs, unsigned long rip)
>>  {
>>      regs->rip = rip;
>>      regs->eflags &= ~X86_EFLAGS_RF;
>> +
>>      if ( regs->eflags & X86_EFLAGS_TF )
>> -    {
>> -        current->arch.dr6 |= DR_STEP | DR_STATUS_RESERVED_ONE;
>> -        pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
>> -    }
>> +        pv_inject_DB(X86_DR6_BS);
>>  }
> This (not your change, the construct) looks bogus at the first and second
> glance, because of looking at EFLAGS.TF after emulation, when the initial
> state of TF matters. It is only correct (at the third, closer look) because
> the function presently is used only from paths not altering the guest's
> EFLAGS. Do you think it would make sense to add a comment at this occasion?

It is buggy yes, but if you notice, so is SVM's __update_guest_eip() and
VMX's update_guest_eip().

And remember that while for most instructions it's the initial state
that matters, it's the final state that matters for SYSCALL/SYSRET, and
each of LRET/IRET/ERET{U,S} have oddities that aren't currently
expressed in the emulator.

I'll leave a todo for now.  This is a problem that can only reasonably
be fixed by unifying the intercept and emulation paths into a coherent
model of the instruction cycle.

~Andrew
diff mbox series

Patch

diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index c2d9fc333be5..5bf488437ce1 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -729,6 +729,18 @@  static inline void pv_inject_hw_exception(unsigned int vector, int errcode)
     pv_inject_event(&event);
 }
 
+static inline void pv_inject_DB(unsigned long pending_dbg)
+{
+    struct x86_event event = {
+        .vector      = X86_EXC_DB,
+        .type        = X86_EVENTTYPE_HW_EXCEPTION,
+        .error_code  = X86_EVENT_NO_EC,
+        .pending_dbg = pending_dbg,
+    };
+
+    pv_inject_event(&event);
+}
+
 static inline void pv_inject_page_fault(int errcode, unsigned long cr2)
 {
     const struct x86_event event = {
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 6963db35c960..437172ee0fc3 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -1365,10 +1365,7 @@  int pv_emulate_privileged_op(struct cpu_user_regs *regs)
         ASSERT(!curr->arch.pv.trap_bounce.flags);
 
         if ( ctxt.ctxt.retire.pending_dbg )
-        {
-            curr->arch.dr6 |= ctxt.ctxt.retire.pending_dbg | DR_STATUS_RESERVED_ONE;
-            pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
-        }
+            pv_inject_DB(ctxt.ctxt.retire.pending_dbg);
 
         /* fall through */
     case X86EMUL_RETRY:
diff --git a/xen/arch/x86/pv/emulate.c b/xen/arch/x86/pv/emulate.c
index e7a1c0a2cc4f..e522e58533f1 100644
--- a/xen/arch/x86/pv/emulate.c
+++ b/xen/arch/x86/pv/emulate.c
@@ -71,11 +71,9 @@  void pv_emul_instruction_done(struct cpu_user_regs *regs, unsigned long rip)
 {
     regs->rip = rip;
     regs->eflags &= ~X86_EFLAGS_RF;
+
     if ( regs->eflags & X86_EFLAGS_TF )
-    {
-        current->arch.dr6 |= DR_STEP | DR_STATUS_RESERVED_ONE;
-        pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
-    }
+        pv_inject_DB(X86_DR6_BS);
 }
 
 uint64_t pv_get_reg(struct vcpu *v, unsigned int reg)
diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c
index cad28ef928ad..f6bb33556e72 100644
--- a/xen/arch/x86/pv/ro-page-fault.c
+++ b/xen/arch/x86/pv/ro-page-fault.c
@@ -389,8 +389,8 @@  int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs)
 
         /* Fallthrough */
     case X86EMUL_OKAY:
-        if ( ctxt.retire.singlestep )
-            pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
+        if ( ctxt.retire.pending_dbg )
+            pv_inject_DB(ctxt.retire.pending_dbg);
 
         /* Fallthrough */
     case X86EMUL_RETRY:
diff --git a/xen/arch/x86/pv/traps.c b/xen/arch/x86/pv/traps.c
index 74f333da7e1c..553b04bca956 100644
--- a/xen/arch/x86/pv/traps.c
+++ b/xen/arch/x86/pv/traps.c
@@ -13,6 +13,7 @@ 
 #include <xen/softirq.h>
 
 #include <asm/pv/trace.h>
+#include <asm/debugreg.h>
 #include <asm/shared.h>
 #include <asm/traps.h>
 #include <irq_vectors.h>
@@ -50,9 +51,9 @@  void pv_inject_event(const struct x86_event *event)
     tb->cs    = ti->cs;
     tb->eip   = ti->address;
 
-    if ( event->type == X86_EVENTTYPE_HW_EXCEPTION &&
-         vector == X86_EXC_PF )
+    switch ( vector | -(event->type == X86_EVENTTYPE_SW_INTERRUPT) )
     {
+    case X86_EXC_PF:
         curr->arch.pv.ctrlreg[2] = event->cr2;
         arch_set_cr2(curr, event->cr2);
 
@@ -62,9 +63,17 @@  void pv_inject_event(const struct x86_event *event)
             error_code |= PFEC_user_mode;
 
         trace_pv_page_fault(event->cr2, error_code);
-    }
-    else
+        break;
+
+    case X86_EXC_DB:
+        curr->arch.dr6 = x86_merge_dr6(curr->domain->arch.cpu_policy,
+                                       curr->arch.dr6, event->pending_dbg);
+        /* Fallthrough */
+
+    default:
         trace_pv_trap(vector, regs->rip, use_error_code, error_code);
+        break;
+    }
 
     if ( use_error_code )
     {
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index dead728ce329..ae5d73abf557 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1887,7 +1887,7 @@  void do_device_not_available(struct cpu_user_regs *regs)
 /* SAF-1-safe */
 void do_debug(struct cpu_user_regs *regs)
 {
-    unsigned long dr6;
+    unsigned long dr6, pending_dbg;
     struct vcpu *v = current;
 
     /* Stash dr6 as early as possible. */
@@ -1997,17 +1997,19 @@  void do_debug(struct cpu_user_regs *regs)
         return;
     }
 
-    /* Save debug status register where guest OS can peek at it */
-    v->arch.dr6 |= (dr6 & ~X86_DR6_DEFAULT);
-    v->arch.dr6 &= (dr6 | ~X86_DR6_DEFAULT);
+    /* Flip dr6 to have positive polarity. */
+    pending_dbg = dr6 ^ X86_DR6_DEFAULT;
 
     if ( guest_kernel_mode(v, regs) && v->domain->debugger_attached )
     {
+        /* Save debug status register where gdbsx can peek at it */
+        v->arch.dr6 = x86_merge_dr6(v->domain->arch.cpu_policy,
+                                    v->arch.dr6, pending_dbg);
         domain_pause_for_debugger();
         return;
     }
 
-    pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
+    pv_inject_DB(pending_dbg);
 }
 
 /* SAF-1-safe */
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h
index f0e74d23c378..81f99dfaa02f 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -78,7 +78,10 @@  struct x86_event {
     uint8_t       type;         /* X86_EVENTTYPE_* */
     uint8_t       insn_len;     /* Instruction length */
     int32_t       error_code;   /* X86_EVENT_NO_EC if n/a */
-    unsigned long cr2;          /* Only for X86_EXC_PF h/w exception */
+    union {
+        unsigned long cr2;         /* #PF */
+        unsigned long pending_dbg; /* #DB (new DR6 bits, positive polarity) */
+    };
 };
 
 /*