diff mbox series

[qemu,1/1] target/i386: Fix gen_shift_rm_T1, wrong eflags calculation

Message ID 167718710208.23058.11278141733696221981-1@git.sr.ht (mailing list archive)
State New, archived
Headers show
Series gen_shift_rm_T1 uses wrong tmp0 register | expand

Commit Message

~vilenka Feb. 23, 2023, 9:13 p.m. UTC
From: Vilen Kamalov <vilen.kamalov@gmail.com>

gen_shift_rm_T1 in the uses wrong tmp0 register, eflags calculation uses tmp4 at target/i386/tcg/translate.c, line 5488
`tcg_gen_mov_tl(cpu_cc_src, s->tmp4);`

QEMU fails to pass int3 in next sample, vs real cpu
-------------
push rcx
mov dword ptr [rsp], 010000000h
mov rcx, 01eh
sar dword ptr [rsp], cl
jnc pass1
int 3
pass1:
mov dword ptr [rsp], 0ffffffffh
mov rcx, 01eh
sar dword ptr [rsp], cl
jc pass2
int 3
pass2:
pop rcx
-------------

Signed-off-by: Vilen Kamalov <vilen.kamalov@gmail.com>
---
 target/i386/tcg/translate.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

Comments

Richard Henderson Feb. 23, 2023, 10:01 p.m. UTC | #1
On 2/23/23 11:13, ~vilenka wrote:
> From: Vilen Kamalov <vilen.kamalov@gmail.com>
> 
> gen_shift_rm_T1 in the uses wrong tmp0 register, eflags calculation uses tmp4 at target/i386/tcg/translate.c, line 5488
> `tcg_gen_mov_tl(cpu_cc_src, s->tmp4);`

The line you quote only applies to the bit instructions, bt/bts/btr/btc, so your 
explanation is clearly incorrect.

> push rcx
> mov dword ptr [rsp], 010000000h
> mov rcx, 01eh
> sar dword ptr [rsp], cl
> jnc pass1
> int 3
> pass1:
> mov dword ptr [rsp], 0ffffffffh
> mov rcx, 01eh
> sar dword ptr [rsp], cl
> jc pass2
> int 3
> pass2:
> pop rcx

Thanks for the test case.

> diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> index 9d9392b009..9048e22868 100644
> --- a/target/i386/tcg/translate.c
> +++ b/target/i386/tcg/translate.c
> @@ -1686,27 +1686,27 @@ static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
>       }
>   
>       tcg_gen_andi_tl(s->T1, s->T1, mask);
> -    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
> +    tcg_gen_subi_tl(s->tmp4, s->T1, 1);
>   
>       if (is_right) {
>           if (is_arith) {
>               gen_exts(ot, s->T0);
> -            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
> +            tcg_gen_sar_tl(s->tmp4, s->T0, s->tmp4);
>               tcg_gen_sar_tl(s->T0, s->T0, s->T1);
>           } else {
>               gen_extu(ot, s->T0);
> -            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
> +            tcg_gen_shr_tl(s->tmp4, s->T0, s->tmp4);
>               tcg_gen_shr_tl(s->T0, s->T0, s->T1);
>           }
>       } else {
> -        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
> +        tcg_gen_shl_tl(s->tmp4, s->T0, s->tmp4);
>           tcg_gen_shl_tl(s->T0, s->T0, s->T1);
>       }
>   
>       /* store */
>       gen_op_st_rm_T0_A0(s, ot, op1);
>   
> -    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
> +    gen_shift_flags(s, ot, s->T0, s->tmp4, s->T1, is_right);
>   }

The use of tmp0 vs tmp4 is completely local to this function.
Within gen_shift_flags, the 4th argument is consistently used, and neither tmp0 nor tmp4 
are referenced.

If this does fix the issue, that means there's some other explanation, and possibly some 
deeper fix is required.


r~
Vilen Kamalov Feb. 23, 2023, 10:13 p.m. UTC | #2
Yes, agree that my explanation is incorrect, just looked again, there is a
code in the default, down the line 5488

        default:
            /* Otherwise, generate EFLAGS and replace the C bit.  */
            gen_compute_eflags(s);
            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src,
* s->tmp4,*                               ctz32(CC_C), 1);
            break;

and changing tmp0 to tmp4 fixes the issue.

On Fri, Feb 24, 2023 at 1:01 AM Richard Henderson <
richard.henderson@linaro.org> wrote:

> On 2/23/23 11:13, ~vilenka wrote:
> > From: Vilen Kamalov <vilen.kamalov@gmail.com>
> >
> > gen_shift_rm_T1 in the uses wrong tmp0 register, eflags calculation uses
> tmp4 at target/i386/tcg/translate.c, line 5488
> > `tcg_gen_mov_tl(cpu_cc_src, s->tmp4);`
>
> The line you quote only applies to the bit instructions, bt/bts/btr/btc,
> so your
> explanation is clearly incorrect.
>
> > push rcx
> > mov dword ptr [rsp], 010000000h
> > mov rcx, 01eh
> > sar dword ptr [rsp], cl
> > jnc pass1
> > int 3
> > pass1:
> > mov dword ptr [rsp], 0ffffffffh
> > mov rcx, 01eh
> > sar dword ptr [rsp], cl
> > jc pass2
> > int 3
> > pass2:
> > pop rcx
>
> Thanks for the test case.
>
> > diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> > index 9d9392b009..9048e22868 100644
> > --- a/target/i386/tcg/translate.c
> > +++ b/target/i386/tcg/translate.c
> > @@ -1686,27 +1686,27 @@ static void gen_shift_rm_T1(DisasContext *s,
> MemOp ot, int op1,
> >       }
> >
> >       tcg_gen_andi_tl(s->T1, s->T1, mask);
> > -    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
> > +    tcg_gen_subi_tl(s->tmp4, s->T1, 1);
> >
> >       if (is_right) {
> >           if (is_arith) {
> >               gen_exts(ot, s->T0);
> > -            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
> > +            tcg_gen_sar_tl(s->tmp4, s->T0, s->tmp4);
> >               tcg_gen_sar_tl(s->T0, s->T0, s->T1);
> >           } else {
> >               gen_extu(ot, s->T0);
> > -            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
> > +            tcg_gen_shr_tl(s->tmp4, s->T0, s->tmp4);
> >               tcg_gen_shr_tl(s->T0, s->T0, s->T1);
> >           }
> >       } else {
> > -        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
> > +        tcg_gen_shl_tl(s->tmp4, s->T0, s->tmp4);
> >           tcg_gen_shl_tl(s->T0, s->T0, s->T1);
> >       }
> >
> >       /* store */
> >       gen_op_st_rm_T0_A0(s, ot, op1);
> >
> > -    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
> > +    gen_shift_flags(s, ot, s->T0, s->tmp4, s->T1, is_right);
> >   }
>
> The use of tmp0 vs tmp4 is completely local to this function.
> Within gen_shift_flags, the 4th argument is consistently used, and neither
> tmp0 nor tmp4
> are referenced.
>
> If this does fix the issue, that means there's some other explanation, and
> possibly some
> deeper fix is required.
>
>
> r~
>
Vilen Kamalov Feb. 23, 2023, 10:19 p.m. UTC | #3
nevermind, I do not understand how it is fixing the problem. :)

On Fri, Feb 24, 2023 at 1:13 AM Vilen Kamalov <vilen.kamalov@gmail.com>
wrote:

> Yes, agree that my explanation is incorrect, just looked again, there is a
> code in the default, down the line 5488
>
>         default:
>             /* Otherwise, generate EFLAGS and replace the C bit.  */
>             gen_compute_eflags(s);
>             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src,
> * s->tmp4,*                               ctz32(CC_C), 1);
>             break;
>
> and changing tmp0 to tmp4 fixes the issue.
>
> On Fri, Feb 24, 2023 at 1:01 AM Richard Henderson <
> richard.henderson@linaro.org> wrote:
>
>> On 2/23/23 11:13, ~vilenka wrote:
>> > From: Vilen Kamalov <vilen.kamalov@gmail.com>
>> >
>> > gen_shift_rm_T1 in the uses wrong tmp0 register, eflags calculation
>> uses tmp4 at target/i386/tcg/translate.c, line 5488
>> > `tcg_gen_mov_tl(cpu_cc_src, s->tmp4);`
>>
>> The line you quote only applies to the bit instructions, bt/bts/btr/btc,
>> so your
>> explanation is clearly incorrect.
>>
>> > push rcx
>> > mov dword ptr [rsp], 010000000h
>> > mov rcx, 01eh
>> > sar dword ptr [rsp], cl
>> > jnc pass1
>> > int 3
>> > pass1:
>> > mov dword ptr [rsp], 0ffffffffh
>> > mov rcx, 01eh
>> > sar dword ptr [rsp], cl
>> > jc pass2
>> > int 3
>> > pass2:
>> > pop rcx
>>
>> Thanks for the test case.
>>
>> > diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
>> > index 9d9392b009..9048e22868 100644
>> > --- a/target/i386/tcg/translate.c
>> > +++ b/target/i386/tcg/translate.c
>> > @@ -1686,27 +1686,27 @@ static void gen_shift_rm_T1(DisasContext *s,
>> MemOp ot, int op1,
>> >       }
>> >
>> >       tcg_gen_andi_tl(s->T1, s->T1, mask);
>> > -    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
>> > +    tcg_gen_subi_tl(s->tmp4, s->T1, 1);
>> >
>> >       if (is_right) {
>> >           if (is_arith) {
>> >               gen_exts(ot, s->T0);
>> > -            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
>> > +            tcg_gen_sar_tl(s->tmp4, s->T0, s->tmp4);
>> >               tcg_gen_sar_tl(s->T0, s->T0, s->T1);
>> >           } else {
>> >               gen_extu(ot, s->T0);
>> > -            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
>> > +            tcg_gen_shr_tl(s->tmp4, s->T0, s->tmp4);
>> >               tcg_gen_shr_tl(s->T0, s->T0, s->T1);
>> >           }
>> >       } else {
>> > -        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
>> > +        tcg_gen_shl_tl(s->tmp4, s->T0, s->tmp4);
>> >           tcg_gen_shl_tl(s->T0, s->T0, s->T1);
>> >       }
>> >
>> >       /* store */
>> >       gen_op_st_rm_T0_A0(s, ot, op1);
>> >
>> > -    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
>> > +    gen_shift_flags(s, ot, s->T0, s->tmp4, s->T1, is_right);
>> >   }
>>
>> The use of tmp0 vs tmp4 is completely local to this function.
>> Within gen_shift_flags, the 4th argument is consistently used, and
>> neither tmp0 nor tmp4
>> are referenced.
>>
>> If this does fix the issue, that means there's some other explanation,
>> and possibly some
>> deeper fix is required.
>>
>>
>> r~
>>
>
Richard Henderson Feb. 23, 2023, 10:23 p.m. UTC | #4
On 2/23/23 11:13, ~vilenka wrote:
> From: Vilen Kamalov <vilen.kamalov@gmail.com>
> 
> gen_shift_rm_T1 in the uses wrong tmp0 register, eflags calculation uses tmp4 at target/i386/tcg/translate.c, line 5488
> `tcg_gen_mov_tl(cpu_cc_src, s->tmp4);`
> 
> QEMU fails to pass int3 in next sample, vs real cpu
> -------------
> push rcx
> mov dword ptr [rsp], 010000000h
> mov rcx, 01eh
> sar dword ptr [rsp], cl
> jnc pass1
> int 3
> pass1:
> mov dword ptr [rsp], 0ffffffffh
> mov rcx, 01eh
> sar dword ptr [rsp], cl
> jc pass2
> int 3
> pass2:
> pop rcx
> -------------

Rewritten as a standalone test:

int main()
{
     unsigned m = 0x10000000;
     unsigned char c = 0x1e;

     m = 0x10000000u;
     asm volatile("sarl %1, %0; jnc 1f; ud2; 1:" : "+m"(m) : "c"(0x1e));

     m = 0xffffffffu;
     asm volatile("sarl %1, %0; jc 1f; ud2; 1:" : "+m"(m) : "c"(0x1e));

     return 0;
}

This test passes for me, for both qemu-i386 and qemu-x86_64.
So, I don't see your reported failure at all.


r~
diff mbox series

Patch

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 9d9392b009..9048e22868 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -1686,27 +1686,27 @@  static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
     }
 
     tcg_gen_andi_tl(s->T1, s->T1, mask);
-    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
+    tcg_gen_subi_tl(s->tmp4, s->T1, 1);
 
     if (is_right) {
         if (is_arith) {
             gen_exts(ot, s->T0);
-            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
+            tcg_gen_sar_tl(s->tmp4, s->T0, s->tmp4);
             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
         } else {
             gen_extu(ot, s->T0);
-            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
+            tcg_gen_shr_tl(s->tmp4, s->T0, s->tmp4);
             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
         }
     } else {
-        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
+        tcg_gen_shl_tl(s->tmp4, s->T0, s->tmp4);
         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
+    gen_shift_flags(s, ot, s->T0, s->tmp4, s->T1, is_right);
 }
 
 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,