diff mbox

target-i386: Fix eflags.TF/#DB handling of syscall/sysret insns

Message ID 94eb2c0bfa1c6a9fec0543057483@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Doug Evans Dec. 6, 2016, 11:06 p.m. UTC
Hi.

While qemu's behaviour matches what one would expect from reading
the docs, it does not match what I'm seeing on h/w.
Can anyone else confirm what the correct behaviour is here?

---

The syscall and sysret instructions behave a bit differently:
TF is checked after the instruction completes.
This allows the o/s to disable #DB at a syscall by adding TF to FMASK.
And then when the sysret is executed the #DB is taken "as if" the
syscall insn just completed.

Signed-off-by: Doug Evans <dje@google.com>
---
  target-i386/bpt_helper.c | 12 ++++++++++++
  target-i386/helper.h     |  1 +
  target-i386/translate.c  | 29 ++++++++++++++++++++++++-----
  3 files changed, 37 insertions(+), 5 deletions(-)

      } else {
@@ -2525,10 +2530,17 @@ static void gen_eob_inhibit_irq(DisasContext *s,  
bool inhibit)
      s->is_jmp = DISAS_TB_JUMP;
  }

+/* End of block.
+   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
+static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
+{
+    gen_eob_worker(s, inhibit, false);
+}
+
  /* End of block, resetting the inhibit irq flag.  */
  static void gen_eob(DisasContext *s)
  {
-    gen_eob_inhibit_irq(s, false);
+    gen_eob_worker(s, false, false);
  }

  /* generate a jump to eip. No segment change must happen before as a
@@ -7108,7 +7120,10 @@ static target_ulong disas_insn(CPUX86State *env,  
DisasContext *s,
          if (s->lma) {
              set_cc_op(s, CC_OP_EFLAGS);
          }
-        gen_eob(s);
+        /* TF handling for the syscall insn is different. The TF bit is  
checked
+           after the syscall insn completes. This allows #DB to not be
+           generated after one has entered CPL0 if TF is set in FMASK.  */
+        gen_eob_worker(s, false, true);
          break;
      case 0x107: /* sysret */
          if (!s->pe) {
@@ -7119,7 +7134,11 @@ static target_ulong disas_insn(CPUX86State *env,  
DisasContext *s,
              if (s->lma) {
                  set_cc_op(s, CC_OP_EFLAGS);
              }
-            gen_eob(s);
+            /* TF handling for the sysret insn is different. The TF bit is
+               checked after the sysret insn completes. This allows #DB to  
be
+               generated "as if" the syscall insn in userspace has just
+               completed.  */
+            gen_eob_worker(s, false, true);
          }
          break;
  #endif

Comments

Doug Evans Dec. 12, 2016, 7:23 p.m. UTC | #1
On Tue, Dec 6, 2016 at 3:06 PM, Doug Evans <dje@google.com> wrote:
> Hi.
>
> While qemu's behaviour matches what one would expect from reading
> the docs, it does not match what I'm seeing on h/w.
> Can anyone else confirm what the correct behaviour is here?
>
> ---
>
> The syscall and sysret instructions behave a bit differently:
> TF is checked after the instruction completes.
> This allows the o/s to disable #DB at a syscall by adding TF to FMASK.
> And then when the sysret is executed the #DB is taken "as if" the
> syscall insn just completed.
>
> Signed-off-by: Doug Evans <dje@google.com>

Ping.
Especially, can anyone confirm the correct behaviour here?

I can provide a testcase with Fuchsia if one likes.
https://fuchsia.googlesource.com/fuchsia/
It's not that hard to repro - we trip over it because we don't have a #DB IST
and since SYSCALL doesn't change SP we get a double fault on qemu
trying to establish the interrupt frame for the #DB (whereas the #DB shouldn't
happen in the first place - at least when run on the h/w I'm using).
The Intel/AMD docs are *really* unclear on this AFAICT.

patchwork reference: http://patchwork.ozlabs.org/patch/703373/
Paolo Bonzini Dec. 13, 2016, 11:33 a.m. UTC | #2
On 12/12/2016 20:23, Doug Evans wrote:
> On Tue, Dec 6, 2016 at 3:06 PM, Doug Evans <dje@google.com> wrote:
>> Hi.
>>
>> While qemu's behaviour matches what one would expect from reading
>> the docs, it does not match what I'm seeing on h/w.
>> Can anyone else confirm what the correct behaviour is here?
>>
>> ---
>>
>> The syscall and sysret instructions behave a bit differently:
>> TF is checked after the instruction completes.
>> This allows the o/s to disable #DB at a syscall by adding TF to FMASK.
>> And then when the sysret is executed the #DB is taken "as if" the
>> syscall insn just completed.
>>
>> Signed-off-by: Doug Evans <dje@google.com>
> 
> Ping.
> Especially, can anyone confirm the correct behaviour here?

Hi, I haven't look at the patch because QEMU was in freeze.  I'll get
back to it later this week, hopefully.

The best way to provide a testcase would be a patch to kvm-unit-tests
(git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git); most of the
tests can be used with both QEMU emulation mode and KVM.

Paolo

> I can provide a testcase with Fuchsia if one likes.
> https://fuchsia.googlesource.com/fuchsia/
> It's not that hard to repro - we trip over it because we don't have a #DB IST
> and since SYSCALL doesn't change SP we get a double fault on qemu
> trying to establish the interrupt frame for the #DB (whereas the #DB shouldn't
> happen in the first place - at least when run on the h/w I'm using).
> The Intel/AMD docs are *really* unclear on this AFAICT.
> 
> patchwork reference: http://patchwork.ozlabs.org/patch/703373/
>
Paolo Bonzini Dec. 16, 2016, 11:01 a.m. UTC | #3
On 07/12/2016 00:06, Doug Evans wrote:
> Hi.
> 
> While qemu's behaviour matches what one would expect from reading
> the docs, it does not match what I'm seeing on h/w.
> Can anyone else confirm what the correct behaviour is here?
> 
> ---
> 
> The syscall and sysret instructions behave a bit differently:
> TF is checked after the instruction completes.
> This allows the o/s to disable #DB at a syscall by adding TF to FMASK.
> And then when the sysret is executed the #DB is taken "as if" the
> syscall insn just completed.
> 
> Signed-off-by: Doug Evans <dje@google.com>
> ---
>  target-i386/bpt_helper.c | 12 ++++++++++++
>  target-i386/helper.h     |  1 +
>  target-i386/translate.c  | 29 ++++++++++++++++++++++++-----
>  3 files changed, 37 insertions(+), 5 deletions(-)
> 
> diff --git a/target-i386/bpt_helper.c b/target-i386/bpt_helper.c
> index 6fd7fe0..d771461 100644
> --- a/target-i386/bpt_helper.c
> +++ b/target-i386/bpt_helper.c
> @@ -244,6 +244,18 @@ void helper_single_step(CPUX86State *env)
>      raise_exception(env, EXCP01_DB);
>  }
> 
> +void helper_rechecking_single_step(CPUX86State *env)
> +{
> +    if ((env->eflags & TF_MASK) != 0)
> +    {
> +#ifndef CONFIG_USER_ONLY
> +        check_hw_breakpoints(env, true);
> +        env->dr[6] |= DR6_BS;
> +#endif
> +        raise_exception(env, EXCP01_DB);
> +    }

Please call helper_single_step here.  Also, a comment on coding
style---the brace goes on the same line as the "if".  You can use
scripts/checkpatch.pl to check the form of your patch.

Do not use your email client to send patches, as that might result in
format=flowed messages which look good in the client but cannot be
processed by the tools.  Use git send-email instead.

Thanks,

Paolo

> +}
> +
>  void helper_set_dr(CPUX86State *env, int reg, target_ulong t0)
>  {
>  #ifndef CONFIG_USER_ONLY
> diff --git a/target-i386/helper.h b/target-i386/helper.h
> index 4e859eb..bd9b2cf 100644
> --- a/target-i386/helper.h
> +++ b/target-i386/helper.h
> @@ -79,6 +79,7 @@ DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
>  DEF_HELPER_2(cmpxchg16b, void, env, tl)
>  #endif
>  DEF_HELPER_1(single_step, void, env)
> +DEF_HELPER_1(rechecking_single_step, void, env)
>  DEF_HELPER_1(cpuid, void, env)
>  DEF_HELPER_1(rdtsc, void, env)
>  DEF_HELPER_1(rdtscp, void, env)
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 9fd1a04..42d036e 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -2500,8 +2500,10 @@ static void gen_bnd_jmp(DisasContext *s)
>  }
> 
>  /* Generate an end of block. Trace exception is also generated if needed.
> -   If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
> -static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
> +   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
> +   If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
> +   S->TF.  This is used by the syscall/sysret insns.  */
> +static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
>  {
>      gen_update_cc_op(s);
> 
> @@ -2517,6 +2519,9 @@ static void gen_eob_inhibit_irq(DisasContext *s,
> bool inhibit)
>      }
>      if (s->singlestep_enabled) {
>          gen_helper_debug(cpu_env);
> +    } else if (recheck_tf) {
> +        gen_helper_rechecking_single_step(cpu_env);
> +        tcg_gen_exit_tb(0);
>      } else if (s->tf) {
>          gen_helper_single_step(cpu_env);
>      } else {
> @@ -2525,10 +2530,17 @@ static void gen_eob_inhibit_irq(DisasContext *s,
> bool inhibit)
>      s->is_jmp = DISAS_TB_JUMP;
>  }
> 
> +/* End of block.
> +   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
> +static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
> +{
> +    gen_eob_worker(s, inhibit, false);
> +}
> +
>  /* End of block, resetting the inhibit irq flag.  */
>  static void gen_eob(DisasContext *s)
>  {
> -    gen_eob_inhibit_irq(s, false);
> +    gen_eob_worker(s, false, false);
>  }
> 
>  /* generate a jump to eip. No segment change must happen before as a
> @@ -7108,7 +7120,10 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (s->lma) {
>              set_cc_op(s, CC_OP_EFLAGS);
>          }
> -        gen_eob(s);
> +        /* TF handling for the syscall insn is different. The TF bit is
> checked
> +           after the syscall insn completes. This allows #DB to not be
> +           generated after one has entered CPL0 if TF is set in FMASK.  */
> +        gen_eob_worker(s, false, true);
>          break;
>      case 0x107: /* sysret */
>          if (!s->pe) {
> @@ -7119,7 +7134,11 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              if (s->lma) {
>                  set_cc_op(s, CC_OP_EFLAGS);
>              }
> -            gen_eob(s);
> +            /* TF handling for the sysret insn is different. The TF bit is
> +               checked after the sysret insn completes. This allows #DB
> to be
> +               generated "as if" the syscall insn in userspace has just
> +               completed.  */
> +            gen_eob_worker(s, false, true);
>          }
>          break;
>  #endif
diff mbox

Patch

diff --git a/target-i386/bpt_helper.c b/target-i386/bpt_helper.c
index 6fd7fe0..d771461 100644
--- a/target-i386/bpt_helper.c
+++ b/target-i386/bpt_helper.c
@@ -244,6 +244,18 @@  void helper_single_step(CPUX86State *env)
      raise_exception(env, EXCP01_DB);
  }

+void helper_rechecking_single_step(CPUX86State *env)
+{
+    if ((env->eflags & TF_MASK) != 0)
+    {
+#ifndef CONFIG_USER_ONLY
+        check_hw_breakpoints(env, true);
+        env->dr[6] |= DR6_BS;
+#endif
+        raise_exception(env, EXCP01_DB);
+    }
+}
+
  void helper_set_dr(CPUX86State *env, int reg, target_ulong t0)
  {
  #ifndef CONFIG_USER_ONLY
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 4e859eb..bd9b2cf 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -79,6 +79,7 @@  DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
  DEF_HELPER_2(cmpxchg16b, void, env, tl)
  #endif
  DEF_HELPER_1(single_step, void, env)
+DEF_HELPER_1(rechecking_single_step, void, env)
  DEF_HELPER_1(cpuid, void, env)
  DEF_HELPER_1(rdtsc, void, env)
  DEF_HELPER_1(rdtscp, void, env)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 9fd1a04..42d036e 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -2500,8 +2500,10 @@  static void gen_bnd_jmp(DisasContext *s)
  }

  /* Generate an end of block. Trace exception is also generated if needed.
-   If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
-static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
+   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
+   If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
+   S->TF.  This is used by the syscall/sysret insns.  */
+static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
  {
      gen_update_cc_op(s);

@@ -2517,6 +2519,9 @@  static void gen_eob_inhibit_irq(DisasContext *s, bool  
inhibit)
      }
      if (s->singlestep_enabled) {
          gen_helper_debug(cpu_env);
+    } else if (recheck_tf) {
+        gen_helper_rechecking_single_step(cpu_env);
+        tcg_gen_exit_tb(0);
      } else if (s->tf) {
          gen_helper_single_step(cpu_env);