@@ -209,6 +209,30 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
@2reg_shift .... ... . . . ...... .... .... . q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp
+# Right shifts are encoded as N - shift, where N is the element size in bits.
+%neon_rshift_i6 16:6 !function=rsub_64
+%neon_rshift_i5 16:5 !function=rsub_32
+%neon_rshift_i4 16:4 !function=rsub_16
+%neon_rshift_i3 16:3 !function=rsub_8
+
+VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 1 . . 1 .... \
+ @2reg_shift size=3 shift=%neon_rshift_i6
+VSHR_S_2sh 1111 001 0 1 . 1 ..... .... 0000 0 . . 1 .... \
+ @2reg_shift size=2 shift=%neon_rshift_i5
+VSHR_S_2sh 1111 001 0 1 . 01 .... .... 0000 0 . . 1 .... \
+ @2reg_shift size=1 shift=%neon_rshift_i4
+VSHR_S_2sh 1111 001 0 1 . 001 ... .... 0000 0 . . 1 .... \
+ @2reg_shift size=0 shift=%neon_rshift_i3
+
+VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 1 . . 1 .... \
+ @2reg_shift size=3 shift=%neon_rshift_i6
+VSHR_U_2sh 1111 001 1 1 . 1 ..... .... 0000 0 . . 1 .... \
+ @2reg_shift size=2 shift=%neon_rshift_i5
+VSHR_U_2sh 1111 001 1 1 . 01 .... .... 0000 0 . . 1 .... \
+ @2reg_shift size=1 shift=%neon_rshift_i4
+VSHR_U_2sh 1111 001 1 1 . 001 ... .... 0000 0 . . 1 .... \
+ @2reg_shift size=0 shift=%neon_rshift_i3
+
VSHL_2sh 1111 001 0 1 . shift:6 .... 0101 1 . . 1 .... \
@2reg_shift size=3
VSHL_2sh 1111 001 0 1 . 1 shift:5 .... 0101 0 . . 1 .... \
@@ -31,6 +31,24 @@ static inline int plus1(DisasContext *s, int x)
return x + 1;
}
+static inline int rsub_64(DisasContext *s, int x)
+{
+ return 64 - x;
+}
+
+static inline int rsub_32(DisasContext *s, int x)
+{
+ return 32 - x;
+}
+static inline int rsub_16(DisasContext *s, int x)
+{
+ return 16 - x;
+}
+static inline int rsub_8(DisasContext *s, int x)
+{
+ return 8 - x;
+}
+
/* Include the generated Neon decoder */
#include "decode-neon-dp.inc.c"
#include "decode-neon-ls.inc.c"
@@ -1348,3 +1366,26 @@ static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
DO_2SH(VSHL, tcg_gen_gvec_shli)
DO_2SH(VSLI, gen_gvec_sli)
+
+static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+ /* Signed shift out of range results in all-sign-bits */
+ a->shift = MIN(a->shift, (8 << a->size) - 1);
+ return do_vector_2sh(s, a, tcg_gen_gvec_sari);
+}
+
+static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
+}
+
+static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+ /* Shift out of range is architecturally valid and results in zero. */
+ if (a->shift >= (8 << a->size)) {
+ return do_vector_2sh(s, a, gen_zero_rd_2sh);
+ } else {
+ return do_vector_2sh(s, a, tcg_gen_gvec_shri);
+ }
+}
@@ -5296,6 +5296,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
op = (insn >> 8) & 0xf;
switch (op) {
+ case 0: /* VSHR */
case 5: /* VSHL, VSLI */
return 1; /* handled by decodetree */
default:
@@ -5330,26 +5331,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
switch (op) {
- case 0: /* VSHR */
- /* Right shift comes here negative. */
- shift = -shift;
- /* Shifts larger than the element size are architecturally
- * valid. Unsigned results in all zeros; signed results
- * in all sign bits.
- */
- if (!u) {
- tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
- MIN(shift, (8 << size) - 1),
- vec_size, vec_size);
- } else if (shift >= 8 << size) {
- tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
- vec_size, 0);
- } else {
- tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- }
- return 0;
-
case 1: /* VSRA */
/* Right shift comes here negative. */
shift = -shift;
Convert the VSHR 2-reg-shift insns to decodetree. Note that unlike the legacy decoder, we present the right shift amount to the trans_ function as a positive integer. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> --- target/arm/neon-dp.decode | 24 +++++++++++++++++++ target/arm/translate-neon.inc.c | 41 +++++++++++++++++++++++++++++++++ target/arm/translate.c | 21 +---------------- 3 files changed, 66 insertions(+), 20 deletions(-)