diff mbox series

[4/9] target/ppc: Fix lxvw4x, lxvh8x and lxvb16x

Message ID 20190507004811.29968-4-anton@ozlabs.org (mailing list archive)
State New, archived
Headers show
Series [1/9] target/ppc: Fix xvxsigdp | expand

Commit Message

Anton Blanchard May 7, 2019, 12:48 a.m. UTC
During the conversion these instructions were incorrectly treated as
stores. We need to use set_cpu_vsr* and not get_cpu_vsr*.

Fixes: 8b3b2d75c7c0 ("introduce get_cpu_vsr{l,h}() and set_cpu_vsr{l,h}() helpers for VSR register access")
Signed-off-by: Anton Blanchard <anton@ozlabs.org>
---
 target/ppc/translate/vsx-impl.inc.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

Comments

David Gibson May 7, 2019, 5:28 a.m. UTC | #1
On Tue, May 07, 2019 at 10:48:06AM +1000, Anton Blanchard wrote:
> During the conversion these instructions were incorrectly treated as
> stores. We need to use set_cpu_vsr* and not get_cpu_vsr*.
> 
> Fixes: 8b3b2d75c7c0 ("introduce get_cpu_vsr{l,h}() and set_cpu_vsr{l,h}() helpers for VSR register access")
> Signed-off-by: Anton Blanchard <anton@ozlabs.org>
> ---
>  target/ppc/translate/vsx-impl.inc.c | 13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
> index 05b75105be..c13f84e745 100644
> --- a/target/ppc/translate/vsx-impl.inc.c
> +++ b/target/ppc/translate/vsx-impl.inc.c
> @@ -102,8 +102,7 @@ static void gen_lxvw4x(DisasContext *ctx)
>      }
>      xth = tcg_temp_new_i64();
>      xtl = tcg_temp_new_i64();
> -    get_cpu_vsrh(xth, xT(ctx->opcode));
> -    get_cpu_vsrl(xtl, xT(ctx->opcode));
> +

Something seems amiss here.  Clearly we do need a set..() back to the
loaded register, but with the removal of these gets, it doesn't look
like the xth and xtl temporaries are initialized any more.

>      gen_set_access_type(ctx, ACCESS_INT);
>      EA = tcg_temp_new();
>  
> @@ -126,6 +125,8 @@ static void gen_lxvw4x(DisasContext *ctx)
>          tcg_gen_addi_tl(EA, EA, 8);
>          tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
>      }
> +    set_cpu_vsrh(xT(ctx->opcode), xth);
> +    set_cpu_vsrl(xT(ctx->opcode), xtl);
>      tcg_temp_free(EA);
>      tcg_temp_free_i64(xth);
>      tcg_temp_free_i64(xtl);
> @@ -185,8 +186,6 @@ static void gen_lxvh8x(DisasContext *ctx)
>      }
>      xth = tcg_temp_new_i64();
>      xtl = tcg_temp_new_i64();
> -    get_cpu_vsrh(xth, xT(ctx->opcode));
> -    get_cpu_vsrl(xtl, xT(ctx->opcode));
>      gen_set_access_type(ctx, ACCESS_INT);
>  
>      EA = tcg_temp_new();
> @@ -197,6 +196,8 @@ static void gen_lxvh8x(DisasContext *ctx)
>      if (ctx->le_mode) {
>          gen_bswap16x8(xth, xtl, xth, xtl);
>      }
> +    set_cpu_vsrh(xT(ctx->opcode), xth);
> +    set_cpu_vsrl(xT(ctx->opcode), xtl);
>      tcg_temp_free(EA);
>      tcg_temp_free_i64(xth);
>      tcg_temp_free_i64(xtl);
> @@ -214,14 +215,14 @@ static void gen_lxvb16x(DisasContext *ctx)
>      }
>      xth = tcg_temp_new_i64();
>      xtl = tcg_temp_new_i64();
> -    get_cpu_vsrh(xth, xT(ctx->opcode));
> -    get_cpu_vsrl(xtl, xT(ctx->opcode));
>      gen_set_access_type(ctx, ACCESS_INT);
>      EA = tcg_temp_new();
>      gen_addr_reg_index(ctx, EA);
>      tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ);
>      tcg_gen_addi_tl(EA, EA, 8);
>      tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
> +    set_cpu_vsrh(xT(ctx->opcode), xth);
> +    set_cpu_vsrl(xT(ctx->opcode), xtl);
>      tcg_temp_free(EA);
>      tcg_temp_free_i64(xth);
>      tcg_temp_free_i64(xtl);
Mark Cave-Ayland May 7, 2019, 6:04 p.m. UTC | #2
On 07/05/2019 06:28, David Gibson wrote:

> On Tue, May 07, 2019 at 10:48:06AM +1000, Anton Blanchard wrote:
>> During the conversion these instructions were incorrectly treated as
>> stores. We need to use set_cpu_vsr* and not get_cpu_vsr*.
>>
>> Fixes: 8b3b2d75c7c0 ("introduce get_cpu_vsr{l,h}() and set_cpu_vsr{l,h}() helpers for VSR register access")
>> Signed-off-by: Anton Blanchard <anton@ozlabs.org>
>> ---
>>  target/ppc/translate/vsx-impl.inc.c | 13 +++++++------
>>  1 file changed, 7 insertions(+), 6 deletions(-)
>>
>> diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
>> index 05b75105be..c13f84e745 100644
>> --- a/target/ppc/translate/vsx-impl.inc.c
>> +++ b/target/ppc/translate/vsx-impl.inc.c
>> @@ -102,8 +102,7 @@ static void gen_lxvw4x(DisasContext *ctx)
>>      }
>>      xth = tcg_temp_new_i64();
>>      xtl = tcg_temp_new_i64();
>> -    get_cpu_vsrh(xth, xT(ctx->opcode));
>> -    get_cpu_vsrl(xtl, xT(ctx->opcode));
>> +
> 
> Something seems amiss here.  Clearly we do need a set..() back to the
> loaded register, but with the removal of these gets, it doesn't look
> like the xth and xtl temporaries are initialized any more.
> 
>>      gen_set_access_type(ctx, ACCESS_INT);
>>      EA = tcg_temp_new();
>>  
>> @@ -126,6 +125,8 @@ static void gen_lxvw4x(DisasContext *ctx)
>>          tcg_gen_addi_tl(EA, EA, 8);
>>          tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
>>      }
>> +    set_cpu_vsrh(xT(ctx->opcode), xth);
>> +    set_cpu_vsrl(xT(ctx->opcode), xtl);
>>      tcg_temp_free(EA);
>>      tcg_temp_free_i64(xth);
>>      tcg_temp_free_i64(xtl);
>> @@ -185,8 +186,6 @@ static void gen_lxvh8x(DisasContext *ctx)
>>      }
>>      xth = tcg_temp_new_i64();
>>      xtl = tcg_temp_new_i64();
>> -    get_cpu_vsrh(xth, xT(ctx->opcode));
>> -    get_cpu_vsrl(xtl, xT(ctx->opcode));
>>      gen_set_access_type(ctx, ACCESS_INT);
>>  
>>      EA = tcg_temp_new();
>> @@ -197,6 +196,8 @@ static void gen_lxvh8x(DisasContext *ctx)
>>      if (ctx->le_mode) {
>>          gen_bswap16x8(xth, xtl, xth, xtl);
>>      }
>> +    set_cpu_vsrh(xT(ctx->opcode), xth);
>> +    set_cpu_vsrl(xT(ctx->opcode), xtl);
>>      tcg_temp_free(EA);
>>      tcg_temp_free_i64(xth);
>>      tcg_temp_free_i64(xtl);
>> @@ -214,14 +215,14 @@ static void gen_lxvb16x(DisasContext *ctx)
>>      }
>>      xth = tcg_temp_new_i64();
>>      xtl = tcg_temp_new_i64();
>> -    get_cpu_vsrh(xth, xT(ctx->opcode));
>> -    get_cpu_vsrl(xtl, xT(ctx->opcode));
>>      gen_set_access_type(ctx, ACCESS_INT);
>>      EA = tcg_temp_new();
>>      gen_addr_reg_index(ctx, EA);
>>      tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ);
>>      tcg_gen_addi_tl(EA, EA, 8);
>>      tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
>> +    set_cpu_vsrh(xT(ctx->opcode), xth);
>> +    set_cpu_vsrl(xT(ctx->opcode), xtl);
>>      tcg_temp_free(EA);
>>      tcg_temp_free_i64(xth);
>>      tcg_temp_free_i64(xtl);

AFAICT I think that this is correct since the patterns should be as follows:

Load instructions:
    tcg_gen_qemu_ld_i64(xth, ...);
    set_cpu_vsrh(n, xth);

Store instructions:
    get_cpu_vsrh(xth, n);
    tcg_gen_qemu_st_i64(xth, ...);

I remember that when I first started experimenting with the very first version of
this patchset last year, someone on IRC (maybe Richard?) pointed out that I had
inverted the load and store operations and so I went and reworked them all from
scratch. Unfortunately with this and Greg's patch for stxsdx I have a feeling that
something when wrong during a cherry-pick or rebase of the patchset :(

Following on from this I've just gone through the load/store operations once again
and spotted two things:


1) VSX_LOAD_SCALAR_DS has an extra get_cpu_vsrh() which can be removed

diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
index 11d9b75d01..004ea56c4f 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -329,7 +329,6 @@ static void gen_##name(DisasContext *ctx)                         \
         return;                                                   \
     }                                                             \
     xth = tcg_temp_new_i64();                                     \
-    get_cpu_vsrh(xth, rD(ctx->opcode) + 32);                      \
     gen_set_access_type(ctx, ACCESS_INT);                         \
     EA = tcg_temp_new();                                          \
     gen_addr_imm_index(ctx, EA, 0x03);                            \


2) VSX_VECTOR_LOAD_STORE is confusing and should be split into separate
VSX_VECTOR_LOAD and VSX_VECTOR_STORE macros


Does that sound reasonable? I'm also thinking that we should consider adding a CC to
stable for patches 4, 5 and 9 in this series since these are genuine regressions.


ATB,

Mark.
Anton Blanchard May 9, 2019, 12:33 a.m. UTC | #3
Hi Mark,

> Following on from this I've just gone through the load/store
> operations once again and spotted two things:
> 
> 
> 1) VSX_LOAD_SCALAR_DS has an extra get_cpu_vsrh() which can be removed
> 
> diff --git a/target/ppc/translate/vsx-impl.inc.c
> b/target/ppc/translate/vsx-impl.inc.c index 11d9b75d01..004ea56c4f
> 100644 --- a/target/ppc/translate/vsx-impl.inc.c
> +++ b/target/ppc/translate/vsx-impl.inc.c
> @@ -329,7 +329,6 @@ static void gen_##name(DisasContext
> *ctx)                         \
> return;
> \ }                                                             \ xth
> = tcg_temp_new_i64();                                     \
> -    get_cpu_vsrh(xth, rD(ctx->opcode) + 32);                      \
>      gen_set_access_type(ctx, ACCESS_INT);                         \
>      EA = tcg_temp_new();                                          \
>      gen_addr_imm_index(ctx, EA, 0x03);                            \

Looks good. I also noticed we had two stores that needed to be fixed:

VSX_LOAD_SCALAR_DS(stxsd, st64_i64)
VSX_LOAD_SCALAR_DS(stxssp, st32fs)

> 2) VSX_VECTOR_LOAD_STORE is confusing and should be split into
> separate VSX_VECTOR_LOAD and VSX_VECTOR_STORE macros

Good idea. I also removed (what I assume) are redundant set_cpu_vsr*
and get_cpu_vsr* calls.

> Does that sound reasonable? I'm also thinking that we should consider
> adding a CC to stable for patches 4, 5 and 9 in this series since
> these are genuine regressions.

Fine with me. If David agrees, I'm not sure if he can rebase them or
if I can send them manually if they have been already committed.

Thanks,
Anton
David Gibson May 9, 2019, 5:35 a.m. UTC | #4
On Thu, May 09, 2019 at 10:33:24AM +1000, Anton Blanchard wrote:
> Hi Mark,
> 
> > Following on from this I've just gone through the load/store
> > operations once again and spotted two things:
> > 
> > 
> > 1) VSX_LOAD_SCALAR_DS has an extra get_cpu_vsrh() which can be removed
> > 
> > diff --git a/target/ppc/translate/vsx-impl.inc.c
> > b/target/ppc/translate/vsx-impl.inc.c index 11d9b75d01..004ea56c4f
> > 100644 --- a/target/ppc/translate/vsx-impl.inc.c
> > +++ b/target/ppc/translate/vsx-impl.inc.c
> > @@ -329,7 +329,6 @@ static void gen_##name(DisasContext
> > *ctx)                         \
> > return;
> > \ }                                                             \ xth
> > = tcg_temp_new_i64();                                     \
> > -    get_cpu_vsrh(xth, rD(ctx->opcode) + 32);                      \
> >      gen_set_access_type(ctx, ACCESS_INT);                         \
> >      EA = tcg_temp_new();                                          \
> >      gen_addr_imm_index(ctx, EA, 0x03);                            \
> 
> Looks good. I also noticed we had two stores that needed to be fixed:
> 
> VSX_LOAD_SCALAR_DS(stxsd, st64_i64)
> VSX_LOAD_SCALAR_DS(stxssp, st32fs)
> 
> > 2) VSX_VECTOR_LOAD_STORE is confusing and should be split into
> > separate VSX_VECTOR_LOAD and VSX_VECTOR_STORE macros
> 
> Good idea. I also removed (what I assume) are redundant set_cpu_vsr*
> and get_cpu_vsr* calls.
> 
> > Does that sound reasonable? I'm also thinking that we should consider
> > adding a CC to stable for patches 4, 5 and 9 in this series since
> > these are genuine regressions.
> 
> Fine with me. If David agrees, I'm not sure if he can rebase them or
> if I can send them manually if they have been already committed.

Usually going to stable is just a matter of pinging Mike Roth and
getting him to include it.  I can queue if somewhere if you like, but
the stable cadance is so slow it tends to get forgotten a bit.
Mark Cave-Ayland May 10, 2019, 3:11 p.m. UTC | #5
On 07/05/2019 01:48, Anton Blanchard wrote:

> During the conversion these instructions were incorrectly treated as
> stores. We need to use set_cpu_vsr* and not get_cpu_vsr*.
> 
> Fixes: 8b3b2d75c7c0 ("introduce get_cpu_vsr{l,h}() and set_cpu_vsr{l,h}() helpers for VSR register access")
> Signed-off-by: Anton Blanchard <anton@ozlabs.org>
> ---
>  target/ppc/translate/vsx-impl.inc.c | 13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
> index 05b75105be..c13f84e745 100644
> --- a/target/ppc/translate/vsx-impl.inc.c
> +++ b/target/ppc/translate/vsx-impl.inc.c
> @@ -102,8 +102,7 @@ static void gen_lxvw4x(DisasContext *ctx)
>      }
>      xth = tcg_temp_new_i64();
>      xtl = tcg_temp_new_i64();
> -    get_cpu_vsrh(xth, xT(ctx->opcode));
> -    get_cpu_vsrl(xtl, xT(ctx->opcode));
> +
>      gen_set_access_type(ctx, ACCESS_INT);
>      EA = tcg_temp_new();
>  
> @@ -126,6 +125,8 @@ static void gen_lxvw4x(DisasContext *ctx)
>          tcg_gen_addi_tl(EA, EA, 8);
>          tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
>      }
> +    set_cpu_vsrh(xT(ctx->opcode), xth);
> +    set_cpu_vsrl(xT(ctx->opcode), xtl);
>      tcg_temp_free(EA);
>      tcg_temp_free_i64(xth);
>      tcg_temp_free_i64(xtl);
> @@ -185,8 +186,6 @@ static void gen_lxvh8x(DisasContext *ctx)
>      }
>      xth = tcg_temp_new_i64();
>      xtl = tcg_temp_new_i64();
> -    get_cpu_vsrh(xth, xT(ctx->opcode));
> -    get_cpu_vsrl(xtl, xT(ctx->opcode));
>      gen_set_access_type(ctx, ACCESS_INT);
>  
>      EA = tcg_temp_new();
> @@ -197,6 +196,8 @@ static void gen_lxvh8x(DisasContext *ctx)
>      if (ctx->le_mode) {
>          gen_bswap16x8(xth, xtl, xth, xtl);
>      }
> +    set_cpu_vsrh(xT(ctx->opcode), xth);
> +    set_cpu_vsrl(xT(ctx->opcode), xtl);
>      tcg_temp_free(EA);
>      tcg_temp_free_i64(xth);
>      tcg_temp_free_i64(xtl);
> @@ -214,14 +215,14 @@ static void gen_lxvb16x(DisasContext *ctx)
>      }
>      xth = tcg_temp_new_i64();
>      xtl = tcg_temp_new_i64();
> -    get_cpu_vsrh(xth, xT(ctx->opcode));
> -    get_cpu_vsrl(xtl, xT(ctx->opcode));
>      gen_set_access_type(ctx, ACCESS_INT);
>      EA = tcg_temp_new();
>      gen_addr_reg_index(ctx, EA);
>      tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ);
>      tcg_gen_addi_tl(EA, EA, 8);
>      tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
> +    set_cpu_vsrh(xT(ctx->opcode), xth);
> +    set_cpu_vsrl(xT(ctx->opcode), xtl);
>      tcg_temp_free(EA);
>      tcg_temp_free_i64(xth);
>      tcg_temp_free_i64(xtl);

I've now had a bit of time to look through this and I believe it is correct, so:

Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>


ATB,

Mark.
Anton Blanchard May 21, 2019, 8:11 p.m. UTC | #6
Hi,

> I've now had a bit of time to look through this and I believe it is
> correct, so:
> 
> Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>

Thanks Mark. David: any chance we could get this merged? I can't run a
recent Ubuntu image successfully without it. sshd hangs when I try to
ssh into it.

Thanks,
Anton
David Gibson May 22, 2019, 12:49 a.m. UTC | #7
On Wed, May 22, 2019 at 06:11:12AM +1000, Anton Blanchard wrote:
> Hi,
> 
> > I've now had a bit of time to look through this and I believe it is
> > correct, so:
> > 
> > Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
> 
> Thanks Mark. David: any chance we could get this merged? I can't run a
> recent Ubuntu image successfully without it. sshd hangs when I try to
> ssh into it.

I had a comment that was never addressed - it didn't look like the xth
and xtl temporaries were initialized after the patch.
Mark Cave-Ayland May 22, 2019, 4:37 a.m. UTC | #8
On 22/05/2019 01:49, David Gibson wrote:

> On Wed, May 22, 2019 at 06:11:12AM +1000, Anton Blanchard wrote:
>> Hi,
>>
>>> I've now had a bit of time to look through this and I believe it is
>>> correct, so:
>>>
>>> Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
>>
>> Thanks Mark. David: any chance we could get this merged? I can't run a
>> recent Ubuntu image successfully without it. sshd hangs when I try to
>> ssh into it.
> 
> I had a comment that was never addressed - it didn't look like the xth
> and xtl temporaries were initialized after the patch.

If it helps, here was my analysis at the time (looks like you were also included on
the reply?): https://lists.gnu.org/archive/html/qemu-devel/2019-05/msg01515.html.


ATB,

Mark.
David Gibson May 22, 2019, 6:10 a.m. UTC | #9
On Wed, May 22, 2019 at 05:37:47AM +0100, Mark Cave-Ayland wrote:
> On 22/05/2019 01:49, David Gibson wrote:
> 
> > On Wed, May 22, 2019 at 06:11:12AM +1000, Anton Blanchard wrote:
> >> Hi,
> >>
> >>> I've now had a bit of time to look through this and I believe it is
> >>> correct, so:
> >>>
> >>> Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
> >>
> >> Thanks Mark. David: any chance we could get this merged? I can't run a
> >> recent Ubuntu image successfully without it. sshd hangs when I try to
> >> ssh into it.
> > 
> > I had a comment that was never addressed - it didn't look like the xth
> > and xtl temporaries were initialized after the patch.
> 
> If it helps, here was my analysis at the time (looks like you were also included on
> the reply?): https://lists.gnu.org/archive/html/qemu-devel/2019-05/msg01515.html.

Sorry, I missed that.  Looks reasonable, I think I failed to spot the
generated load instructions which effectively initialize the temps.

This is all at some remove now, can you resend the patch on top of the
latest tree please and I'll apply.  It's missed the pull request I
sent today, obviously, but I know I have some other stuff I want to
get in pretty soon, so I expect to send another one relatively soon.
Greg Kurz May 22, 2019, 7:39 a.m. UTC | #10
On Wed, 22 May 2019 06:11:12 +1000
Anton Blanchard <anton@ozlabs.org> wrote:

> Hi,
> 
> > I've now had a bit of time to look through this and I believe it is
> > correct, so:
> > 
> > Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>  
> 
> Thanks Mark. David: any chance we could get this merged? I can't run a
> recent Ubuntu image successfully without it. sshd hangs when I try to
> ssh into it.
> 

Ha ! I also had troubles ssh'ing into a fedora guest, but couldn't find
time to investigate:

$ ssh 192.168.122.76
ssh_dispatch_run_fatal: Connection to 192.168.122.76 port 22: incorrect signature

It doesn't happen anymore with this patch. Maybe worth mentioning it in the
changelog, and Cc stable of course since this is a regression in QEMU 4.0.

Tested-by: Greg Kurz <groug@kaod.org>

Also, as Mark mentioned in another mail, this looks very much like the other
bug I had fixed with 3e5365b7aa6c "target/ppc: Fix QEMU crash with stxsdx".
The patch looks correct:

Reviewed-by: Greg Kurz <groug@kaod.org>

> Thanks,
> Anton
>
Mark Cave-Ayland May 24, 2019, 6:54 a.m. UTC | #11
On 22/05/2019 07:10, David Gibson wrote:

> On Wed, May 22, 2019 at 05:37:47AM +0100, Mark Cave-Ayland wrote:
>> On 22/05/2019 01:49, David Gibson wrote:
>>
>>> On Wed, May 22, 2019 at 06:11:12AM +1000, Anton Blanchard wrote:
>>>> Hi,
>>>>
>>>>> I've now had a bit of time to look through this and I believe it is
>>>>> correct, so:
>>>>>
>>>>> Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
>>>>
>>>> Thanks Mark. David: any chance we could get this merged? I can't run a
>>>> recent Ubuntu image successfully without it. sshd hangs when I try to
>>>> ssh into it.
>>>
>>> I had a comment that was never addressed - it didn't look like the xth
>>> and xtl temporaries were initialized after the patch.
>>
>> If it helps, here was my analysis at the time (looks like you were also included on
>> the reply?): https://lists.gnu.org/archive/html/qemu-devel/2019-05/msg01515.html.
> 
> Sorry, I missed that.  Looks reasonable, I think I failed to spot the
> generated load instructions which effectively initialize the temps.
> 
> This is all at some remove now, can you resend the patch on top of the
> latest tree please and I'll apply.  It's missed the pull request I
> sent today, obviously, but I know I have some other stuff I want to
> get in pretty soon, so I expect to send another one relatively soon.

All done - I've just sent a v2 rebased upon your ppc-for-4.1 branch with R-B and T-B
tags included.


ATB,

Mark.
diff mbox series

Patch

diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
index 05b75105be..c13f84e745 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -102,8 +102,7 @@  static void gen_lxvw4x(DisasContext *ctx)
     }
     xth = tcg_temp_new_i64();
     xtl = tcg_temp_new_i64();
-    get_cpu_vsrh(xth, xT(ctx->opcode));
-    get_cpu_vsrl(xtl, xT(ctx->opcode));
+
     gen_set_access_type(ctx, ACCESS_INT);
     EA = tcg_temp_new();
 
@@ -126,6 +125,8 @@  static void gen_lxvw4x(DisasContext *ctx)
         tcg_gen_addi_tl(EA, EA, 8);
         tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
     }
+    set_cpu_vsrh(xT(ctx->opcode), xth);
+    set_cpu_vsrl(xT(ctx->opcode), xtl);
     tcg_temp_free(EA);
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);
@@ -185,8 +186,6 @@  static void gen_lxvh8x(DisasContext *ctx)
     }
     xth = tcg_temp_new_i64();
     xtl = tcg_temp_new_i64();
-    get_cpu_vsrh(xth, xT(ctx->opcode));
-    get_cpu_vsrl(xtl, xT(ctx->opcode));
     gen_set_access_type(ctx, ACCESS_INT);
 
     EA = tcg_temp_new();
@@ -197,6 +196,8 @@  static void gen_lxvh8x(DisasContext *ctx)
     if (ctx->le_mode) {
         gen_bswap16x8(xth, xtl, xth, xtl);
     }
+    set_cpu_vsrh(xT(ctx->opcode), xth);
+    set_cpu_vsrl(xT(ctx->opcode), xtl);
     tcg_temp_free(EA);
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);
@@ -214,14 +215,14 @@  static void gen_lxvb16x(DisasContext *ctx)
     }
     xth = tcg_temp_new_i64();
     xtl = tcg_temp_new_i64();
-    get_cpu_vsrh(xth, xT(ctx->opcode));
-    get_cpu_vsrl(xtl, xT(ctx->opcode));
     gen_set_access_type(ctx, ACCESS_INT);
     EA = tcg_temp_new();
     gen_addr_reg_index(ctx, EA);
     tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ);
     tcg_gen_addi_tl(EA, EA, 8);
     tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
+    set_cpu_vsrh(xT(ctx->opcode), xth);
+    set_cpu_vsrl(xT(ctx->opcode), xtl);
     tcg_temp_free(EA);
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);