Message ID | 20220911230418.340941-12-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | target/i386: new decoder + AVX implementation | expand |
On 9/12/22 00:03, Paolo Bonzini wrote: > Many SSE and AVX instructions are only valid with specific prefixes > (none, 66, F3, F2). Introduce a direct way to encode this in the > decoding table to avoid using decode groups too much. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > target/i386/tcg/decode-new.c.inc | 37 ++++++++++++++++++++++++++++++++ > target/i386/tcg/decode-new.h | 1 + > 2 files changed, 38 insertions(+) > > diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc > index f6c032c694..7b4fd9fb54 100644 > --- a/target/i386/tcg/decode-new.c.inc > +++ b/target/i386/tcg/decode-new.c.inc > @@ -108,6 +108,22 @@ > > #define avx2_256 .vex_special = X86_VEX_AVX2_256, > > +#define P_00 1 > +#define P_66 (1 << PREFIX_DATA) > +#define P_F3 (1 << PREFIX_REPZ) > +#define P_F2 (1 << PREFIX_REPNZ) These prefixes are already flags. Do you really need to shift the shifted value? I guess you need to choose a value for "no prefix", but I think you could also (ab,re)use PREFIX_LOCK or something... > @@ -212,6 +212,7 @@ struct X86OpEntry { > X86CPUIDFeature cpuid : 8; > uint8_t vex_class : 8; > X86VEXSpecial vex_special : 8; > + uint16_t valid_prefix : 16; Anyway, if you did, you'd only need 4 bits instead of 16. That said, the logic is sound, and saving a few bits doesn't matter much. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index f6c032c694..7b4fd9fb54 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -108,6 +108,22 @@ #define avx2_256 .vex_special = X86_VEX_AVX2_256, +#define P_00 1 +#define P_66 (1 << PREFIX_DATA) +#define P_F3 (1 << PREFIX_REPZ) +#define P_F2 (1 << PREFIX_REPNZ) + +#define p_00 .valid_prefix = P_00, +#define p_66 .valid_prefix = P_66, +#define p_f3 .valid_prefix = P_F3, +#define p_f2 .valid_prefix = P_F2, +#define p_00_66 .valid_prefix = P_00|P_66, +#define p_00_f3 .valid_prefix = P_00|P_F3, +#define p_66_f2 .valid_prefix = P_66|P_F2, +#define p_00_66_f3 .valid_prefix = P_00|P_66|P_F3, +#define p_66_f3_f2 .valid_prefix = P_66|P_F3|P_F2, +#define p_00_66_f3_f2 .valid_prefix = P_00|P_66|P_F3|P_F2, + static uint8_t get_modrm(DisasContext *s, CPUX86State *env) { if (!s->has_modrm) { @@ -473,6 +489,23 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, return true; } +static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e) +{ + uint16_t sse_prefixes; + + if (!e->valid_prefix) { + return true; + } + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66. */ + s->prefix &= ~PREFIX_DATA; + } + + /* Now, either zero or one bit is set in sse_prefixes. */ + sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); + return e->valid_prefix & (1 << sse_prefixes); +} + static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func, X86DecodedInsn *decode) { @@ -484,6 +517,10 @@ static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_ e->decode(s, env, e, &decode->b); } + if (!validate_sse_prefix(s, e)) { + return false; + } + /* First compute size of operands in order to initialize s->rip_offset. */ if (e->op0 != X86_TYPE_None) { if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) { diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index b5299d0dd2..3db7b82506 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -212,6 +212,7 @@ struct X86OpEntry { X86CPUIDFeature cpuid : 8; uint8_t vex_class : 8; X86VEXSpecial vex_special : 8; + uint16_t valid_prefix : 16; bool is_decode : 1; };
Many SSE and AVX instructions are only valid with specific prefixes (none, 66, F3, F2). Introduce a direct way to encode this in the decoding table to avoid using decode groups too much. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/tcg/decode-new.c.inc | 37 ++++++++++++++++++++++++++++++++ target/i386/tcg/decode-new.h | 1 + 2 files changed, 38 insertions(+)