diff mbox series

[v4,24/47] target/ppc: move vrl[bhwd]nm/vrl[bhwd]mi to decodetree

Message ID 20220222143646.1268606-25-matheus.ferst@eldorado.org.br (mailing list archive)
State New, archived
Headers show
Series target/ppc: PowerISA Vector/VSX instruction batch | expand

Commit Message

Matheus K. Ferst Feb. 22, 2022, 2:36 p.m. UTC
From: Matheus Ferst <matheus.ferst@eldorado.org.br>

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
---
v4:
 -  New in v4.
---
 target/ppc/helper.h                 |   8 +-
 target/ppc/insn32.decode            |   6 ++
 target/ppc/int_helper.c             |  50 ++++-----
 target/ppc/translate/vmx-impl.c.inc | 152 ++++++++++++++++++++++++++--
 target/ppc/translate/vmx-ops.c.inc  |   5 +-
 5 files changed, 182 insertions(+), 39 deletions(-)

Comments

Richard Henderson Feb. 22, 2022, 10:30 p.m. UTC | #1
On 2/22/22 04:36, matheus.ferst@eldorado.org.br wrote:
> +static void gen_vrlnm_vec(unsigned vece, TCGv_vec vrt, TCGv_vec vra,
> +                          TCGv_vec vrb)
> +{
> +    TCGv_vec mask, n = tcg_temp_new_vec_matching(vrt);
> +
> +    /* Create the mask */
> +    mask = do_vrl_mask_vec(vece, vrb);
> +
> +    /* Extract n */
> +    tcg_gen_dupi_vec(vece, n, (8 << vece) - 1);
> +    tcg_gen_and_vec(vece, n, vrb, n);
> +
> +    /* Rotate and mask */
> +    tcg_gen_rotlv_vec(vece, vrt, vra, n);

Note that rotlv does the masking itself:

/*
  * Expand D = A << (B % element bits)
  *
  * Unlike scalar shifts, where it is easy for the target front end
  * to include the modulo as part of the expansion.  If the target
  * naturally includes the modulo as part of the operation, great!
  * If the target has some other behaviour from out-of-range shifts,
  * then it could not use this function anyway, and would need to
  * do it's own expansion with custom functions.
  */

> +static bool do_vrlnm(DisasContext *ctx, arg_VX *a, int vece)
> +{
> +    static const TCGOpcode vecop_list[] = {
> +        INDEX_op_cmp_vec, INDEX_op_rotlv_vec, INDEX_op_sari_vec,
> +        INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_shrv_vec, 0
> +    };

Where is sari used?



r~
Matheus K. Ferst Feb. 23, 2022, 9:43 p.m. UTC | #2
On 22/02/2022 19:30, Richard Henderson wrote:
> On 2/22/22 04:36, matheus.ferst@eldorado.org.br wrote:
>> +static void gen_vrlnm_vec(unsigned vece, TCGv_vec vrt, TCGv_vec vra,
>> +                          TCGv_vec vrb)
>> +{
>> +    TCGv_vec mask, n = tcg_temp_new_vec_matching(vrt);
>> +
>> +    /* Create the mask */
>> +    mask = do_vrl_mask_vec(vece, vrb);
>> +
>> +    /* Extract n */
>> +    tcg_gen_dupi_vec(vece, n, (8 << vece) - 1);
>> +    tcg_gen_and_vec(vece, n, vrb, n);
>> +
>> +    /* Rotate and mask */
>> +    tcg_gen_rotlv_vec(vece, vrt, vra, n);
> 
> Note that rotlv does the masking itself:
> 
> /*
>   * Expand D = A << (B % element bits)
>   *
>   * Unlike scalar shifts, where it is easy for the target front end
>   * to include the modulo as part of the expansion.  If the target
>   * naturally includes the modulo as part of the operation, great!
>   * If the target has some other behaviour from out-of-range shifts,
>   * then it could not use this function anyway, and would need to
>   * do it's own expansion with custom functions.
>   */
> 

Using tcg_gen_rotlv_vec(vece, vrt, vra, vrb) works on PPC but fails on 
x86. It looks like a problem on the i386 backend. It's using 
VPS[RL]LV[DQ], but instead of this modulo behavior, these instructions 
write zero to the element[1]. I'm not sure how to fix that. Do we need 
an INDEX_op_shlv_vec case in i386 tcg_expand_vec_op?

>> +static bool do_vrlnm(DisasContext *ctx, arg_VX *a, int vece)
>> +{
>> +    static const TCGOpcode vecop_list[] = {
>> +        INDEX_op_cmp_vec, INDEX_op_rotlv_vec, INDEX_op_sari_vec,
>> +        INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_shrv_vec, 0
>> +    };
> 
> Where is sari used?
> 

I'll remove in v5.

[1] Section 5.3 of 
https://www.intel.com/content/dam/develop/external/us/en/documents/36945

Thanks,
Matheus K. Ferst
Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/>
Analista de Software
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Richard Henderson Feb. 23, 2022, 10:19 p.m. UTC | #3
On 2/23/22 11:43, Matheus K. Ferst wrote:
>> Note that rotlv does the masking itself:
>>
>> /*
>>   * Expand D = A << (B % element bits)
>>   *
>>   * Unlike scalar shifts, where it is easy for the target front end
>>   * to include the modulo as part of the expansion.  If the target
>>   * naturally includes the modulo as part of the operation, great!
>>   * If the target has some other behaviour from out-of-range shifts,
>>   * then it could not use this function anyway, and would need to
>>   * do it's own expansion with custom functions.
>>   */
>>
> 
> Using tcg_gen_rotlv_vec(vece, vrt, vra, vrb) works on PPC but fails on x86. It looks like 
> a problem on the i386 backend. It's using VPS[RL]LV[DQ], but instead of this modulo 
> behavior, these instructions write zero to the element[1]. I'm not sure how to fix that. 

You don't want to use tcg_gen_rotlv_vec directly, but tcg_gen_rotlv_vec.

The generic modulo is being applied here:

static void tcg_gen_rotlv_mod_vec(unsigned vece, TCGv_vec d,
                                   TCGv_vec a, TCGv_vec b)
{
     TCGv_vec t = tcg_temp_new_vec_matching(d);
     TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);

     tcg_gen_and_vec(vece, t, b, m);
     tcg_gen_rotlv_vec(vece, d, a, t);
     tcg_temp_free_vec(t);
}


r~
Matheus K. Ferst Feb. 24, 2022, 8:23 p.m. UTC | #4
On 23/02/2022 19:19, Richard Henderson wrote:
> On 2/23/22 11:43, Matheus K. Ferst wrote:
>>> Note that rotlv does the masking itself:
>>>
>>> /*
>>>   * Expand D = A << (B % element bits)
>>>   *
>>>   * Unlike scalar shifts, where it is easy for the target front end
>>>   * to include the modulo as part of the expansion.  If the target
>>>   * naturally includes the modulo as part of the operation, great!
>>>   * If the target has some other behaviour from out-of-range shifts,
>>>   * then it could not use this function anyway, and would need to
>>>   * do it's own expansion with custom functions.
>>>   */
>>>
>>
>> Using tcg_gen_rotlv_vec(vece, vrt, vra, vrb) works on PPC but fails on 
>> x86. It looks like
>> a problem on the i386 backend. It's using VPS[RL]LV[DQ], but instead 
>> of this modulo
>> behavior, these instructions write zero to the element[1]. I'm not 
>> sure how to fix that.
> 
> You don't want to use tcg_gen_rotlv_vec directly, but tcg_gen_rotlv_vec.
> 

I guess there is a typo here. Did you mean tcg_gen_gvec_rotlv? Or 
tcg_gen_rotlv_mod_vec?

> The generic modulo is being applied here:
> 
> static void tcg_gen_rotlv_mod_vec(unsigned vece, TCGv_vec d,
>                                    TCGv_vec a, TCGv_vec b)
> {
>      TCGv_vec t = tcg_temp_new_vec_matching(d);
>      TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
> 
>      tcg_gen_and_vec(vece, t, b, m);
>      tcg_gen_rotlv_vec(vece, d, a, t);
>      tcg_temp_free_vec(t);
> }

I can see that this method is called when we use tcg_gen_gvec_rotlv to 
implement vrl[bhwd], and they are working as expected. For vrl[wd]nm and 
vrl[wd]mi, however, we can't call tcg_gen_rotlv_mod_vec directly in the 
.fniv implementation because it is not exposed in tcg-op.h. Is there any 
other way to use this method? Should we add it to the header file?

Thanks,
Matheus K. Ferst
Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/>
Analista de Software
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Richard Henderson Feb. 24, 2022, 9:26 p.m. UTC | #5
On 2/24/22 10:23, Matheus K. Ferst wrote:
>> You don't want to use tcg_gen_rotlv_vec directly, but tcg_gen_rotlv_vec.
>>
> 
> I guess there is a typo here. Did you mean tcg_gen_gvec_rotlv? Or tcg_gen_rotlv_mod_vec?

Dangit.  Paste-paste error.  The first: tcg_gen_gvec_rotlv.


r~
diff mbox series

Patch

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 269150b197..a2a0d461dd 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -275,10 +275,10 @@  DEF_HELPER_4(vmaxfp, void, env, avr, avr, avr)
 DEF_HELPER_4(vminfp, void, env, avr, avr, avr)
 DEF_HELPER_3(vrefp, void, env, avr, avr)
 DEF_HELPER_3(vrsqrtefp, void, env, avr, avr)
-DEF_HELPER_3(vrlwmi, void, avr, avr, avr)
-DEF_HELPER_3(vrldmi, void, avr, avr, avr)
-DEF_HELPER_3(vrldnm, void, avr, avr, avr)
-DEF_HELPER_3(vrlwnm, void, avr, avr, avr)
+DEF_HELPER_4(VRLWMI, void, avr, avr, avr, i32)
+DEF_HELPER_4(VRLDMI, void, avr, avr, avr, i32)
+DEF_HELPER_4(VRLDNM, void, avr, avr, avr, i32)
+DEF_HELPER_4(VRLWNM, void, avr, avr, avr, i32)
 DEF_HELPER_5(vmaddfp, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vnmsubfp, void, env, avr, avr, avr, avr)
 DEF_HELPER_3(vexptefp, void, env, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index d918e2d0f2..e788dc5152 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -492,6 +492,12 @@  VRLH            000100 ..... ..... ..... 00001000100    @VX
 VRLW            000100 ..... ..... ..... 00010000100    @VX
 VRLD            000100 ..... ..... ..... 00011000100    @VX
 
+VRLWMI          000100 ..... ..... ..... 00010000101    @VX
+VRLDMI          000100 ..... ..... ..... 00011000101    @VX
+
+VRLWNM          000100 ..... ..... ..... 00110000101    @VX
+VRLDNM          000100 ..... ..... ..... 00111000101    @VX
+
 ## Vector Integer Arithmetic Instructions
 
 VEXTSB2W        000100 ..... 10000 ..... 11000000010    @VX_tb
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 0a094b535a..58e57b2563 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1291,33 +1291,33 @@  void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
     }
 }
 
-#define VRLMI(name, size, element, insert)                            \
-void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)          \
-{                                                                     \
-    int i;                                                            \
-    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                    \
-        uint##size##_t src1 = a->element[i];                          \
-        uint##size##_t src2 = b->element[i];                          \
-        uint##size##_t src3 = r->element[i];                          \
-        uint##size##_t begin, end, shift, mask, rot_val;              \
-                                                                      \
-        shift = extract##size(src2, 0, 6);                            \
-        end   = extract##size(src2, 8, 6);                            \
-        begin = extract##size(src2, 16, 6);                           \
-        rot_val = rol##size(src1, shift);                             \
-        mask = mask_u##size(begin, end);                              \
-        if (insert) {                                                 \
-            r->element[i] = (rot_val & mask) | (src3 & ~mask);        \
-        } else {                                                      \
-            r->element[i] = (rot_val & mask);                         \
-        }                                                             \
-    }                                                                 \
+#define VRLMI(name, size, element, insert)                                  \
+void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
+{                                                                           \
+    int i;                                                                  \
+    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                          \
+        uint##size##_t src1 = a->element[i];                                \
+        uint##size##_t src2 = b->element[i];                                \
+        uint##size##_t src3 = r->element[i];                                \
+        uint##size##_t begin, end, shift, mask, rot_val;                    \
+                                                                            \
+        shift = extract##size(src2, 0, 6);                                  \
+        end   = extract##size(src2, 8, 6);                                  \
+        begin = extract##size(src2, 16, 6);                                 \
+        rot_val = rol##size(src1, shift);                                   \
+        mask = mask_u##size(begin, end);                                    \
+        if (insert) {                                                       \
+            r->element[i] = (rot_val & mask) | (src3 & ~mask);              \
+        } else {                                                            \
+            r->element[i] = (rot_val & mask);                               \
+        }                                                                   \
+    }                                                                       \
 }
 
-VRLMI(vrldmi, 64, u64, 1);
-VRLMI(vrlwmi, 32, u32, 1);
-VRLMI(vrldnm, 64, u64, 0);
-VRLMI(vrlwnm, 32, u32, 0);
+VRLMI(VRLDMI, 64, u64, 1);
+VRLMI(VRLWMI, 32, u32, 1);
+VRLMI(VRLDNM, 64, u64, 0);
+VRLMI(VRLWNM, 32, u32, 0);
 
 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
                  ppc_avr_t *c)
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 9dcac4243f..a025404032 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -799,7 +799,6 @@  static void trans_vclzd(DisasContext *ctx)
 }
 
 GEN_VXFORM_V(vmuluwm, MO_32, tcg_gen_gvec_mul, 4, 2);
-GEN_VXFORM(vrlwnm, 2, 6);
 GEN_VXFORM(vsrv, 2, 28);
 GEN_VXFORM(vslv, 2, 29);
 GEN_VXFORM(vslo, 6, 16);
@@ -839,6 +838,152 @@  TRANS_FLAGS(ALTIVEC, VRLH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_rotlv)
 TRANS_FLAGS(ALTIVEC, VRLW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_rotlv)
 TRANS_FLAGS2(ALTIVEC_207, VRLD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_rotlv)
 
+static TCGv_vec do_vrl_mask_vec(unsigned vece, TCGv_vec vrb)
+{
+    TCGv_vec t0 = tcg_temp_new_vec_matching(vrb),
+             t1 = tcg_temp_new_vec_matching(vrb),
+             t2 = tcg_temp_new_vec_matching(vrb),
+             ones = tcg_constant_vec_matching(vrb, vece, -1);
+
+    /* Extract b and e */
+    tcg_gen_dupi_vec(vece, t2, (8 << vece) - 1);
+
+    tcg_gen_shri_vec(vece, t0, vrb, 16);
+    tcg_gen_and_vec(vece, t0, t0, t2);
+
+    tcg_gen_shri_vec(vece, t1, vrb, 8);
+    tcg_gen_and_vec(vece, t1, t1, t2);
+
+    /* Compare b and e to negate the mask where begin > end */
+    tcg_gen_cmp_vec(TCG_COND_GT, vece, t2, t0, t1);
+
+    /* Create the mask with (~0 >> b) ^ ((~0 >> e) >> 1) */
+    tcg_gen_shrv_vec(vece, t0, ones, t0);
+    tcg_gen_shrv_vec(vece, t1, ones, t1);
+    tcg_gen_shri_vec(vece, t1, t1, 1);
+    tcg_gen_xor_vec(vece, t0, t0, t1);
+
+    /* negate the mask */
+    tcg_gen_xor_vec(vece, t0, t0, t2);
+
+    tcg_temp_free_vec(t1);
+    tcg_temp_free_vec(t2);
+
+    return t0;
+}
+
+static void gen_vrlnm_vec(unsigned vece, TCGv_vec vrt, TCGv_vec vra,
+                          TCGv_vec vrb)
+{
+    TCGv_vec mask, n = tcg_temp_new_vec_matching(vrt);
+
+    /* Create the mask */
+    mask = do_vrl_mask_vec(vece, vrb);
+
+    /* Extract n */
+    tcg_gen_dupi_vec(vece, n, (8 << vece) - 1);
+    tcg_gen_and_vec(vece, n, vrb, n);
+
+    /* Rotate and mask */
+    tcg_gen_rotlv_vec(vece, vrt, vra, n);
+    tcg_gen_and_vec(vece, vrt, vrt, mask);
+
+    tcg_temp_free_vec(n);
+    tcg_temp_free_vec(mask);
+}
+
+static bool do_vrlnm(DisasContext *ctx, arg_VX *a, int vece)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_cmp_vec, INDEX_op_rotlv_vec, INDEX_op_sari_vec,
+        INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_shrv_vec, 0
+    };
+    static const GVecGen3 ops[2] = {
+        {
+            .fniv = gen_vrlnm_vec,
+            .fno = gen_helper_VRLWNM,
+            .opt_opc = vecop_list,
+            .load_dest = true,
+            .vece = MO_32
+        },
+        {
+            .fniv = gen_vrlnm_vec,
+            .fno = gen_helper_VRLDNM,
+            .opt_opc = vecop_list,
+            .load_dest = true,
+            .vece = MO_64
+        }
+    };
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    REQUIRE_VSX(ctx);
+
+    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
+                   avr_full_offset(a->vrb), 16, 16, &ops[vece - 2]);
+
+    return true;
+}
+
+TRANS(VRLWNM, do_vrlnm, MO_32)
+TRANS(VRLDNM, do_vrlnm, MO_64)
+
+static void gen_vrlmi_vec(unsigned vece, TCGv_vec vrt, TCGv_vec vra,
+                          TCGv_vec vrb)
+{
+    TCGv_vec mask, n = tcg_temp_new_vec_matching(vrt),
+             tmp = tcg_temp_new_vec_matching(vrt);
+
+    /* Create the mask */
+    mask = do_vrl_mask_vec(vece, vrb);
+
+    /* Extract n */
+    tcg_gen_dupi_vec(vece, n, (8 << vece) - 1);
+    tcg_gen_and_vec(vece, n, vrb, n);
+
+    /* Rotate and insert */
+    tcg_gen_rotlv_vec(vece, tmp, vra, n);
+    tcg_gen_bitsel_vec(vece, vrt, mask, tmp, vrt);
+
+    tcg_temp_free_vec(n);
+    tcg_temp_free_vec(tmp);
+    tcg_temp_free_vec(mask);
+}
+
+static bool do_vrlmi(DisasContext *ctx, arg_VX *a, int vece)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_cmp_vec, INDEX_op_rotlv_vec, INDEX_op_sari_vec,
+        INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_shrv_vec, 0
+    };
+    static const GVecGen3 ops[2] = {
+        {
+            .fniv = gen_vrlmi_vec,
+            .fno = gen_helper_VRLWMI,
+            .opt_opc = vecop_list,
+            .load_dest = true,
+            .vece = MO_32
+        },
+        {
+            .fniv = gen_vrlnm_vec,
+            .fno = gen_helper_VRLDMI,
+            .opt_opc = vecop_list,
+            .load_dest = true,
+            .vece = MO_64
+        }
+    };
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    REQUIRE_VSX(ctx);
+
+    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
+                   avr_full_offset(a->vrb), 16, 16, &ops[vece - 2]);
+
+    return true;
+}
+
+TRANS(VRLWMI, do_vrlmi, MO_32)
+TRANS(VRLDMI, do_vrlmi, MO_64)
+
 static bool do_vector_shift_quad(DisasContext *ctx, arg_VX *a, bool right,
                                  bool alg)
 {
@@ -973,12 +1118,7 @@  GEN_VXFORM3(vsubeuqm, 31, 0);
 GEN_VXFORM3(vsubecuq, 31, 0);
 GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
             vsubecuq, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM(vrlwmi, 2, 2);
-GEN_VXFORM(vrldmi, 2, 3);
 GEN_VXFORM_TRANS(vsl, 2, 7);
-GEN_VXFORM(vrldnm, 2, 7);
-GEN_VXFORM_DUAL(vsl, PPC_ALTIVEC, PPC_NONE, \
-                vrldnm, PPC_NONE, PPC2_ISA300)
 GEN_VXFORM_TRANS(vsr, 2, 11);
 GEN_VXFORM_ENV(vpkuhum, 7, 0);
 GEN_VXFORM_ENV(vpkuwum, 7, 1);
diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc
index a7acea3ca7..3a8a9cc564 100644
--- a/target/ppc/translate/vmx-ops.c.inc
+++ b/target/ppc/translate/vmx-ops.c.inc
@@ -102,7 +102,6 @@  GEN_VXFORM_300(vextubrx, 6, 28),
 GEN_VXFORM_300(vextuhrx, 6, 29),
 GEN_VXFORM_DUAL(vmrgew, vextuwrx, 6, 30, PPC_NONE, PPC2_ALTIVEC_207),
 GEN_VXFORM_207(vmuluwm, 4, 2),
-GEN_VXFORM_300(vrlwnm, 2, 6),
 GEN_VXFORM_300(vsrv, 2, 28),
 GEN_VXFORM_300(vslv, 2, 29),
 GEN_VXFORM(vslo, 6, 16),
@@ -133,9 +132,7 @@  GEN_VXFORM_DUAL(vaddeuqm, vaddecuq, 30, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
 GEN_VXFORM_DUAL(vsubuqm, bcdtrunc, 0, 20, PPC2_ALTIVEC_207, PPC2_ISA300),
 GEN_VXFORM_DUAL(vsubcuq, bcdutrunc, 0, 21, PPC2_ALTIVEC_207, PPC2_ISA300),
 GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
-GEN_VXFORM_300(vrlwmi, 2, 2),
-GEN_VXFORM_300(vrldmi, 2, 3),
-GEN_VXFORM_DUAL(vsl, vrldnm, 2, 7, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM(vsl, 2, 7),
 GEN_VXFORM(vsr, 2, 11),
 GEN_VXFORM(vpkuhum, 7, 0),
 GEN_VXFORM(vpkuwum, 7, 1),