Message ID | 20240908185110.485573-1-richard.henderson@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | tcg/i386: Implement vector TST{EQ,NE} for avx512 | expand |
On 8/9/24 20:51, Richard Henderson wrote: > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > > Based-on: <20240908022632.459477-1-richard.henderson@linaro.org> > ("tcg: Improve support for cmpsel_vec") > > --- > tcg/i386/tcg-target.h | 2 +- > tcg/i386/tcg-target.c.inc | 31 ++++++++++++++++++++++++++++--- > 2 files changed, 29 insertions(+), 4 deletions(-) > > diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h > index 342be30c4c..c68ac023d8 100644 > --- a/tcg/i386/tcg-target.h > +++ b/tcg/i386/tcg-target.h > @@ -224,7 +224,7 @@ typedef enum { > #define TCG_TARGET_HAS_minmax_vec 1 > #define TCG_TARGET_HAS_bitsel_vec have_avx512vl > #define TCG_TARGET_HAS_cmpsel_vec 1 > -#define TCG_TARGET_HAS_tst_vec 0 > +#define TCG_TARGET_HAS_tst_vec have_avx512bw > > #define TCG_TARGET_deposit_i32_valid(ofs, len) \ > (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \ > diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc > index 8c363b7bfc..afeaab313a 100644 > --- a/tcg/i386/tcg-target.c.inc > +++ b/tcg/i386/tcg-target.c.inc > @@ -462,6 +462,14 @@ static bool tcg_target_const_match(int64_t val, int ct, > #define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) > #define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW) > #define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) > +#define OPC_VPTESTMB (0x26 | P_EXT38 | P_DATA16 | P_EVEX) > +#define OPC_VPTESTMW (0x26 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) > +#define OPC_VPTESTMD (0x27 | P_EXT38 | P_DATA16 | P_EVEX) > +#define OPC_VPTESTMQ (0x27 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) > +#define OPC_VPTESTNMB (0x26 | P_EXT38 | P_SIMDF3 | P_EVEX) > +#define OPC_VPTESTNMW (0x26 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) > +#define OPC_VPTESTNMD (0x27 | P_EXT38 | P_SIMDF3 | P_EVEX) > +#define OPC_VPTESTNMQ (0x27 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) > #define OPC_VZEROUPPER (0x77 | P_EXT) > #define OPC_XCHG_ax_r32 (0x90) > #define OPC_XCHG_EvGv (0x87) > @@ -3145,6 +3153,13 @@ static void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, unsigned vece, > { OPC_VPCMPB, OPC_VPCMPW, OPC_VPCMPD, OPC_VPCMPQ }, > { OPC_VPCMPUB, OPC_VPCMPUW, OPC_VPCMPUD, OPC_VPCMPUQ } > }; > + static const int testm_insn[4] = { > + OPC_VPTESTMB, OPC_VPTESTMW, OPC_VPTESTMD, OPC_VPTESTMQ > + }; > + static const int testnm_insn[4] = { > + OPC_VPTESTMB, OPC_VPTESTMW, OPC_VPTESTMD, OPC_VPTESTMQ OPC_VPTESTNMB, OPC_VPTESTNMW, OPC_VPTESTNMD, OPC_VPTESTNMQ ;) Otherwise LGTM. > + }; > + > static const int cond_ext[16] = { > [TCG_COND_EQ] = 0, > [TCG_COND_NE] = 4, > @@ -3160,9 +3175,19 @@ static void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, unsigned vece, > [TCG_COND_ALWAYS] = 7, > }; > > - tcg_out_vex_modrm_type(s, cmpm_insn[is_unsigned_cond(cond)][vece], > - /* k1 */ 1, v1, v2, type); > - tcg_out8(s, cond_ext[cond]); > + switch (cond) { > + case TCG_COND_TSTNE: > + tcg_out_vex_modrm_type(s, testm_insn[vece], /* k1 */ 1, v1, v2, type); > + break; > + case TCG_COND_TSTEQ: > + tcg_out_vex_modrm_type(s, testnm_insn[vece], /* k1 */ 1, v1, v2, type); > + break; > + default: > + tcg_out_vex_modrm_type(s, cmpm_insn[is_unsigned_cond(cond)][vece], > + /* k1 */ 1, v1, v2, type); > + tcg_out8(s, cond_ext[cond]); > + break; > + } > } > > static void tcg_out_k1_to_vec(TCGContext *s, TCGType type,
On 9/9/24 02:53, Philippe Mathieu-Daudé wrote: >> @@ -3145,6 +3153,13 @@ static void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, >> unsigned vece, >> { OPC_VPCMPB, OPC_VPCMPW, OPC_VPCMPD, OPC_VPCMPQ }, >> { OPC_VPCMPUB, OPC_VPCMPUW, OPC_VPCMPUD, OPC_VPCMPUQ } >> }; >> + static const int testm_insn[4] = { >> + OPC_VPTESTMB, OPC_VPTESTMW, OPC_VPTESTMD, OPC_VPTESTMQ >> + }; >> + static const int testnm_insn[4] = { >> + OPC_VPTESTMB, OPC_VPTESTMW, OPC_VPTESTMD, OPC_VPTESTMQ > > OPC_VPTESTNMB, OPC_VPTESTNMW, OPC_VPTESTNMD, OPC_VPTESTNMQ ;) Oops, yes. There are not so many uses of this in target/ yet, and I happened to pick one that used testm. :-/ r~
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 342be30c4c..c68ac023d8 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -224,7 +224,7 @@ typedef enum { #define TCG_TARGET_HAS_minmax_vec 1 #define TCG_TARGET_HAS_bitsel_vec have_avx512vl #define TCG_TARGET_HAS_cmpsel_vec 1 -#define TCG_TARGET_HAS_tst_vec 0 +#define TCG_TARGET_HAS_tst_vec have_avx512bw #define TCG_TARGET_deposit_i32_valid(ofs, len) \ (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \ diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 8c363b7bfc..afeaab313a 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -462,6 +462,14 @@ static bool tcg_target_const_match(int64_t val, int ct, #define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) #define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW) #define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPTESTMB (0x26 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPTESTMW (0x26 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPTESTMD (0x27 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPTESTMQ (0x27 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPTESTNMB (0x26 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPTESTNMW (0x26 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) +#define OPC_VPTESTNMD (0x27 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPTESTNMQ (0x27 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) #define OPC_VZEROUPPER (0x77 | P_EXT) #define OPC_XCHG_ax_r32 (0x90) #define OPC_XCHG_EvGv (0x87) @@ -3145,6 +3153,13 @@ static void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, unsigned vece, { OPC_VPCMPB, OPC_VPCMPW, OPC_VPCMPD, OPC_VPCMPQ }, { OPC_VPCMPUB, OPC_VPCMPUW, OPC_VPCMPUD, OPC_VPCMPUQ } }; + static const int testm_insn[4] = { + OPC_VPTESTMB, OPC_VPTESTMW, OPC_VPTESTMD, OPC_VPTESTMQ + }; + static const int testnm_insn[4] = { + OPC_VPTESTMB, OPC_VPTESTMW, OPC_VPTESTMD, OPC_VPTESTMQ + }; + static const int cond_ext[16] = { [TCG_COND_EQ] = 0, [TCG_COND_NE] = 4, @@ -3160,9 +3175,19 @@ static void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, unsigned vece, [TCG_COND_ALWAYS] = 7, }; - tcg_out_vex_modrm_type(s, cmpm_insn[is_unsigned_cond(cond)][vece], - /* k1 */ 1, v1, v2, type); - tcg_out8(s, cond_ext[cond]); + switch (cond) { + case TCG_COND_TSTNE: + tcg_out_vex_modrm_type(s, testm_insn[vece], /* k1 */ 1, v1, v2, type); + break; + case TCG_COND_TSTEQ: + tcg_out_vex_modrm_type(s, testnm_insn[vece], /* k1 */ 1, v1, v2, type); + break; + default: + tcg_out_vex_modrm_type(s, cmpm_insn[is_unsigned_cond(cond)][vece], + /* k1 */ 1, v1, v2, type); + tcg_out8(s, cond_ext[cond]); + break; + } } static void tcg_out_k1_to_vec(TCGContext *s, TCGType type,
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- Based-on: <20240908022632.459477-1-richard.henderson@linaro.org> ("tcg: Improve support for cmpsel_vec") --- tcg/i386/tcg-target.h | 2 +- tcg/i386/tcg-target.c.inc | 31 ++++++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-)