Message ID | 20240608084113.2770363-11-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | target/i386: more progress towards new decoder | expand |
On 6/8/24 01:40, Paolo Bonzini wrote: > +static void gen_FXRSTOR(DisasContext *s, X86DecodedInsn *decode) > +{ > + if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { > + gen_NM_exception(s); > + } > + gen_helper_fxrstor(tcg_env, s->A0); > +} > + > +static void gen_FXSAVE(DisasContext *s, X86DecodedInsn *decode) > +{ > + if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { > + gen_NM_exception(s); > + } > + gen_helper_fxsave(tcg_env, s->A0); > +} You want else's here, to not emit fxsave/restore after raising NM. Otherwise, Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
Hi, On Sat, Jun 08, 2024 at 10:40:58AM +0200, Paolo Bonzini wrote: > This is already partly implemented due to VLDMXCSR and VSTMXCSR; finish > the job. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> While testing qemu v9.1, I noticed the following crash when testing qemu-system-i386 with pentium3 CPU. Oops: invalid opcode: 0000 [#1] PREEMPT SMP CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc4 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 EIP: raid6_sse12_gen_syndrome+0xfa/0x10c Code: 83 e8 01 73 bf 8b 4d ec 0f e7 53 f8 0f e7 1b 0f e7 61 f8 0f e7 31 8b 45 e8 83 c6 10 83 c3 10 83 c1 10 39 c6 0f 82 6a ff ff ff <0f> 9 EAX: 00001000 EBX: c1367008 ECX: c1368008 EDX: c119deb0 ESI: 00001000 EDI: 00000ff8 EBP: c119de84 ESP: c119de68 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010246 CR0: 80050033 CR2: ffd39000 CR3: 06144000 CR4: 000006d0 Call Trace: ? show_regs+0x4d/0x54 ? die+0x2f/0x88 ? do_trap+0xc6/0xc8 ? do_error_trap+0x6c/0x100 ? raid6_sse12_gen_syndrome+0xfa/0x10c ? exc_overflow+0x50/0x50 ? exc_invalid_op+0x5b/0x70 ? raid6_sse12_gen_syndrome+0xfa/0x10c ? handle_exception+0x14b/0x14b ? exc_overflow+0x50/0x50 ? raid6_sse12_gen_syndrome+0xfa/0x10c ? exc_overflow+0x50/0x50 ? raid6_sse12_gen_syndrome+0xfa/0x10c ? raid6_sse11_gen_syndrome+0xfc/0xfc raid6_select_algo+0x144/0x420 ? libcrc32c_mod_init+0x24/0x24 do_one_initcall+0x63/0x284 ? rdinit_setup+0x40/0x40 ? parse_args+0x14b/0x3f4 kernel_init_freeable+0x238/0x44c ? rdinit_setup+0x40/0x40 ? rest_init+0x164/0x164 kernel_init+0x15/0x1dc ? schedule_tail+0x50/0x64 ret_from_fork+0x38/0x44 ? rest_init+0x164/0x164 ret_from_fork_asm+0x12/0x18 entry_INT80_32+0x108/0x108 Modules linked in: ---[ end trace 0000000000000000 ]--- Bisect points to this patch. Bisect log as well as the decoded stacktrace is attached below. The problem is still seen in qemu mainline (v9.1.0-997-g72b0b80714). Reverting the patch is not straightforward and results in a number of conflicts, so I was not able to test qemu with the patch reverted. Guenter --- Bisect log: # bad: [fd1952d814da738ed107e05583b3e02ac11e88ff] Update version for v9.1.0 release # good: [c25df57ae8f9fe1c72eee2dab37d76d904ac382e] Update version for 9.0.0 release git bisect start 'v9.1.0' 'v9.0.0' # bad: [2529ea2d561ea9fe359fb19ebdcfeb8b6cddd219] hw/acpi/ich9: Remove dead code related to 'acpi_memory_hotplug' git bisect bad 2529ea2d561ea9fe359fb19ebdcfeb8b6cddd219 # good: [544595e73007c824b7435b52519cc578586783a6] tests/plugin/inline: add test for conditional callback git bisect good 544595e73007c824b7435b52519cc578586783a6 # good: [039003995047b2f7911142c7c5cfb845fda044fd] hw/riscv/boot.c: Support 64-bit address for initrd git bisect good 039003995047b2f7911142c7c5cfb845fda044fd # good: [a73f7a00eea15c75fe9cfbeeaff5228f5ee24b61] tests/qtest: Add numa test for loongarch system git bisect good a73f7a00eea15c75fe9cfbeeaff5228f5ee24b61 # bad: [11ffaf8c73aae1a70f4640ada14a437a78d06efb] target/i386: convert LZCNT/TZCNT/BSF/BSR/POPCNT to new decoder git bisect bad 11ffaf8c73aae1a70f4640ada14a437a78d06efb # good: [fc00123f3abeb027cd51eb58ea8845377794b3bc] python: mkvenv: remove ensure command git bisect good fc00123f3abeb027cd51eb58ea8845377794b3bc # good: [593aab332f048347bd19893071caf44e1fb742ff] Merge tag 'pull-hex-20240608' of https://github.com/quic/qemu into staging git bisect good 593aab332f048347bd19893071caf44e1fb742ff # good: [c2b6b6a65a227d2bb45e1b2694cf064b881543e4] target/i386: change X86_ENTRYr to use T0 git bisect good c2b6b6a65a227d2bb45e1b2694cf064b881543e4 # good: [10340080cd501b1aba23c3e502e2e0aa7c825fbf] target/i386: fix bad sorting of entries in the 0F table git bisect good 10340080cd501b1aba23c3e502e2e0aa7c825fbf # bad: [ae541c0eb47f2fbcfd975c8e2fcb0e3a2613dc1c] target/i386: convert non-grouped, helper-based 2-byte opcodes git bisect bad ae541c0eb47f2fbcfd975c8e2fcb0e3a2613dc1c # bad: [556c4c5cc44c3454f78d796b6050c6d574a35dd2] target/i386: split X86_CHECK_prot into PE and VM86 checks git bisect bad 556c4c5cc44c3454f78d796b6050c6d574a35dd2 # bad: [ea89aa895e98fd8a1b9ebf7e3dc8bfcd863b9466] target/i386: finish converting 0F AE to the new decoder git bisect bad ea89aa895e98fd8a1b9ebf7e3dc8bfcd863b9466 # first bad commit: [ea89aa895e98fd8a1b9ebf7e3dc8bfcd863b9466] target/i386: finish converting 0F AE to the new decoder --- Decoded stacktrace: CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc4 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 EIP: raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147) Code: 83 e8 01 73 bf 8b 4d ec 0f e7 53 f8 0f e7 1b 0f e7 61 f8 0f e7 31 8b 45 e8 83 c6 10 83 c3 10 83 c1 10 39 c6 0f 82 6a ff ff ff <0f> 9 All code ======== 0: 83 e8 01 sub $0x1,%eax 3: 73 bf jae 0xffffffffffffffc4 5: 8b 4d ec mov -0x14(%rbp),%ecx 8: 0f e7 53 f8 movntq %mm2,-0x8(%rbx) c: 0f e7 1b movntq %mm3,(%rbx) f: 0f e7 61 f8 movntq %mm4,-0x8(%rcx) 13: 0f e7 31 movntq %mm6,(%rcx) 16: 8b 45 e8 mov -0x18(%rbp),%eax 19: 83 c6 10 add $0x10,%esi 1c: 83 c3 10 add $0x10,%ebx 1f: 83 c1 10 add $0x10,%ecx 22: 39 c6 cmp %eax,%esi 24: 0f 82 6a ff ff ff jb 0xffffffffffffff94 2a:* 0f 09 wbinvd <-- trapping instruction Code starting with the faulting instruction =========================================== 0: 0f 09 wbinvd EAX: 00001000 EBX: c1367008 ECX: c1368008 EDX: c119deb0 ESI: 00001000 EDI: 00000ff8 EBP: c119de84 ESP: c119de68 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010246 CR0: 80050033 CR2: ffd39000 CR3: 06144000 CR4: 000006d0 Call Trace: ? show_regs (arch/x86/kernel/dumpstack.c:479) ? die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434 arch/x86/kernel/dumpstack.c:447) ? do_trap (arch/x86/kernel/traps.c:156 arch/x86/kernel/traps.c:197) ? do_error_trap (arch/x86/include/asm/traps.h:58 arch/x86/kernel/traps.c:218) ? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147) ? exc_overflow (arch/x86/kernel/traps.c:301) ? exc_invalid_op (arch/x86/kernel/traps.c:316) ? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147) ? handle_exception (arch/x86/entry/entry_32.S:1055) ? exc_overflow (arch/x86/kernel/traps.c:301) ? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147) ? exc_overflow (arch/x86/kernel/traps.c:301) ? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147) ? raid6_sse11_gen_syndrome (lib/raid6/sse1.c:100) raid6_select_algo (lib/raid6/algos.c:179 (discriminator 2) lib/raid6/algos.c:273 (discriminator 2)) ? libcrc32c_mod_init (lib/raid6/algos.c:243) do_one_initcall (init/main.c:1269) ? rdinit_setup (init/main.c:1317) ? parse_args (kernel/params.c:153 kernel/params.c:186) kernel_init_freeable (init/main.c:1330 (discriminator 1) init/main.c:1347 (discriminator 1) init/main.c:1366 (discriminator 1) init/main.c:1580 (discrim ? rdinit_setup (init/main.c:1317) ? rest_init (init/main.c:1461) kernel_init (init/main.c:1471) ? schedule_tail (kernel/sched/core.c:5266) ret_from_fork (arch/x86/kernel/process.c:153) ? rest_init (init/main.c:1461) ret_from_fork_asm (arch/x86/entry/entry_32.S:737) entry_INT80_32 (arch/x86/entry/entry_32.S:945)
On 10/21/24 03:49, Guenter Roeck wrote: > Hi, > > On Sat, Jun 08, 2024 at 10:40:58AM +0200, Paolo Bonzini wrote: >> This is already partly implemented due to VLDMXCSR and VSTMXCSR; finish >> the job. >> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > > While testing qemu v9.1, I noticed the following crash when testing qemu-system-i386 > with pentium3 CPU. Is this enough to fix it? diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index ee2a508ae9a..cda32ee6784 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -345,9 +345,9 @@ static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, [1] = X86_OP_ENTRYw(RDxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3), [2] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0), [3] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0), - [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE2) p_00), + [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE) p_00), [6] = X86_OP_ENTRY0(MFENCE, cpuid(SSE2) p_00), - [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE2) p_00), + [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE) p_00), }; static const X86OpEntry group15_mem[8] = { > 22: 39 c6 cmp %eax,%esi > 24: 0f 82 6a ff ff ff jb 0xffffffffffffff94 > 2a:* 0f 09 wbinvd <-- trapping instruction This is a bit weird, as wbinvd is not affected by this patch. However, a checkout of Linux has asm volatile("sfence" : :: "memory"); kernel_fpu_end(); } at the end of lib/raid6/sse1.c and it would indeed be affected by this patch. SSE2 was not present in Pentium III, but SSE was. Paolo > Code starting with the faulting instruction > =========================================== > 0: 0f 09 wbinvd > EAX: 00001000 EBX: c1367008 ECX: c1368008 EDX: c119deb0 > ESI: 00001000 EDI: 00000ff8 EBP: c119de84 ESP: c119de68 > DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010246 > CR0: 80050033 CR2: ffd39000 CR3: 06144000 CR4: 000006d0 > Call Trace: > ? show_regs (arch/x86/kernel/dumpstack.c:479) > ? die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434 arch/x86/kernel/dumpstack.c:447) > ? do_trap (arch/x86/kernel/traps.c:156 arch/x86/kernel/traps.c:197) > ? do_error_trap (arch/x86/include/asm/traps.h:58 arch/x86/kernel/traps.c:218) > ? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147) > ? exc_overflow (arch/x86/kernel/traps.c:301) > ? exc_invalid_op (arch/x86/kernel/traps.c:316) > ? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147) > ? handle_exception (arch/x86/entry/entry_32.S:1055) > ? exc_overflow (arch/x86/kernel/traps.c:301) > ? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147) > ? exc_overflow (arch/x86/kernel/traps.c:301) > ? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147) > ? raid6_sse11_gen_syndrome (lib/raid6/sse1.c:100) > raid6_select_algo (lib/raid6/algos.c:179 (discriminator 2) lib/raid6/algos.c:273 (discriminator 2)) > ? libcrc32c_mod_init (lib/raid6/algos.c:243) > do_one_initcall (init/main.c:1269) > ? rdinit_setup (init/main.c:1317) > ? parse_args (kernel/params.c:153 kernel/params.c:186) > kernel_init_freeable (init/main.c:1330 (discriminator 1) init/main.c:1347 (discriminator 1) init/main.c:1366 (discriminator 1) init/main.c:1580 (discrim > ? rdinit_setup (init/main.c:1317) > ? rest_init (init/main.c:1461) > kernel_init (init/main.c:1471) > ? schedule_tail (kernel/sched/core.c:5266) > ret_from_fork (arch/x86/kernel/process.c:153) > ? rest_init (init/main.c:1461) > ret_from_fork_asm (arch/x86/entry/entry_32.S:737) > entry_INT80_32 (arch/x86/entry/entry_32.S:945) > >
On 10/20/24 23:57, Paolo Bonzini wrote: > On 10/21/24 03:49, Guenter Roeck wrote: >> Hi, >> >> On Sat, Jun 08, 2024 at 10:40:58AM +0200, Paolo Bonzini wrote: >>> This is already partly implemented due to VLDMXCSR and VSTMXCSR; finish >>> the job. >>> >>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> >> >> While testing qemu v9.1, I noticed the following crash when testing qemu-system-i386 >> with pentium3 CPU. > > Is this enough to fix it? > Yes. > diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc > index ee2a508ae9a..cda32ee6784 100644 > --- a/target/i386/tcg/decode-new.c.inc > +++ b/target/i386/tcg/decode-new.c.inc > @@ -345,9 +345,9 @@ static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, > [1] = X86_OP_ENTRYw(RDxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3), > [2] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0), > [3] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0), > - [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE2) p_00), > + [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE) p_00), > [6] = X86_OP_ENTRY0(MFENCE, cpuid(SSE2) p_00), > - [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE2) p_00), > + [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE) p_00), > }; > > static const X86OpEntry group15_mem[8] = { > >> 22: 39 c6 cmp %eax,%esi >> 24: 0f 82 6a ff ff ff jb 0xffffffffffffff94 >> 2a:* 0f 09 wbinvd <-- trapping instruction > > This is a bit weird, as wbinvd is not affected by this patch. However, > a checkout of Linux has > > asm volatile("sfence" : :: "memory"); > kernel_fpu_end(); > } > > at the end of lib/raid6/sse1.c and it would indeed be affected by this > patch. SSE2 was not present in Pentium III, but SSE was. > No idea how the 0x0f 0x09 ends up in the log. I wondered about that as well. Thanks, Guenter
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index 8465717ea21..5577f7509aa 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -108,10 +108,15 @@ typedef enum X86CPUIDFeature { X86_FEAT_AVX2, X86_FEAT_BMI1, X86_FEAT_BMI2, + X86_FEAT_CLFLUSH, + X86_FEAT_CLFLUSHOPT, + X86_FEAT_CLWB, X86_FEAT_CMOV, X86_FEAT_CMPCCXADD, X86_FEAT_F16C, X86_FEAT_FMA, + X86_FEAT_FSGSBASE, + X86_FEAT_FXSR, X86_FEAT_MOVBE, X86_FEAT_PCLMULQDQ, X86_FEAT_SHA_NI, @@ -122,6 +127,8 @@ typedef enum X86CPUIDFeature { X86_FEAT_SSE41, X86_FEAT_SSE42, X86_FEAT_SSE4A, + X86_FEAT_XSAVE, + X86_FEAT_XSAVEOPT, } X86CPUIDFeature; /* Execution flags */ diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 4958f4c45d5..ebae745ecba 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4197,194 +4197,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) s->base.is_jmp = DISAS_EOB_NEXT; } break; - /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */ - case 0x1ae: - modrm = x86_ldub_code(env, s); - switch (modrm) { - CASE_MODRM_MEM_OP(0): /* fxsave */ - if (!(s->cpuid_features & CPUID_FXSR) - || (prefixes & PREFIX_LOCK)) { - goto illegal_op; - } - if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { - gen_exception(s, EXCP07_PREX); - break; - } - gen_lea_modrm(env, s, modrm); - gen_helper_fxsave(tcg_env, s->A0); - break; - - CASE_MODRM_MEM_OP(1): /* fxrstor */ - if (!(s->cpuid_features & CPUID_FXSR) - || (prefixes & PREFIX_LOCK)) { - goto illegal_op; - } - if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { - gen_exception(s, EXCP07_PREX); - break; - } - gen_lea_modrm(env, s, modrm); - gen_helper_fxrstor(tcg_env, s->A0); - break; - - CASE_MODRM_MEM_OP(2): /* ldmxcsr */ - if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) { - goto illegal_op; - } - if (s->flags & HF_TS_MASK) { - gen_exception(s, EXCP07_PREX); - break; - } - gen_lea_modrm(env, s, modrm); - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_helper_ldmxcsr(tcg_env, s->tmp2_i32); - break; - - CASE_MODRM_MEM_OP(3): /* stmxcsr */ - if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) { - goto illegal_op; - } - if (s->flags & HF_TS_MASK) { - gen_exception(s, EXCP07_PREX); - break; - } - gen_helper_update_mxcsr(tcg_env); - gen_lea_modrm(env, s, modrm); - tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr)); - gen_op_st_v(s, MO_32, s->T0, s->A0); - break; - - CASE_MODRM_MEM_OP(4): /* xsave */ - if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 - || (prefixes & (PREFIX_LOCK | PREFIX_DATA - | PREFIX_REPZ | PREFIX_REPNZ))) { - goto illegal_op; - } - gen_lea_modrm(env, s, modrm); - tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], - cpu_regs[R_EDX]); - gen_helper_xsave(tcg_env, s->A0, s->tmp1_i64); - break; - - CASE_MODRM_MEM_OP(5): /* xrstor */ - if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 - || (prefixes & (PREFIX_LOCK | PREFIX_DATA - | PREFIX_REPZ | PREFIX_REPNZ))) { - goto illegal_op; - } - gen_lea_modrm(env, s, modrm); - tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], - cpu_regs[R_EDX]); - gen_helper_xrstor(tcg_env, s->A0, s->tmp1_i64); - /* XRSTOR is how MPX is enabled, which changes how - we translate. Thus we need to end the TB. */ - s->base.is_jmp = DISAS_EOB_NEXT; - break; - - CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */ - if (prefixes & PREFIX_LOCK) { - goto illegal_op; - } - if (prefixes & PREFIX_DATA) { - /* clwb */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) { - goto illegal_op; - } - gen_nop_modrm(env, s, modrm); - } else { - /* xsaveopt */ - if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 - || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0 - || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) { - goto illegal_op; - } - gen_lea_modrm(env, s, modrm); - tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], - cpu_regs[R_EDX]); - gen_helper_xsaveopt(tcg_env, s->A0, s->tmp1_i64); - } - break; - - CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */ - if (prefixes & PREFIX_LOCK) { - goto illegal_op; - } - if (prefixes & PREFIX_DATA) { - /* clflushopt */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) { - goto illegal_op; - } - } else { - /* clflush */ - if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) - || !(s->cpuid_features & CPUID_CLFLUSH)) { - goto illegal_op; - } - } - gen_nop_modrm(env, s, modrm); - break; - - case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */ - case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */ - case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */ - case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */ - if (CODE64(s) - && (prefixes & PREFIX_REPZ) - && !(prefixes & PREFIX_LOCK) - && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) { - TCGv base, treg, src, dst; - - /* Preserve hflags bits by testing CR4 at runtime. */ - tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK); - gen_helper_cr4_testbit(tcg_env, s->tmp2_i32); - - base = cpu_seg_base[modrm & 8 ? R_GS : R_FS]; - treg = cpu_regs[(modrm & 7) | REX_B(s)]; - - if (modrm & 0x10) { - /* wr*base */ - dst = base, src = treg; - } else { - /* rd*base */ - dst = treg, src = base; - } - - if (s->dflag == MO_32) { - tcg_gen_ext32u_tl(dst, src); - } else { - tcg_gen_mov_tl(dst, src); - } - break; - } - goto unknown_op; - - case 0xf8 ... 0xff: /* sfence */ - if (!(s->cpuid_features & CPUID_SSE) - || (prefixes & PREFIX_LOCK)) { - goto illegal_op; - } - tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC); - break; - case 0xe8 ... 0xef: /* lfence */ - if (!(s->cpuid_features & CPUID_SSE) - || (prefixes & PREFIX_LOCK)) { - goto illegal_op; - } - tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC); - break; - case 0xf0 ... 0xf7: /* mfence */ - if (!(s->cpuid_features & CPUID_SSE2) - || (prefixes & PREFIX_LOCK)) { - goto illegal_op; - } - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); - break; - - default: - goto unknown_op; - } - break; - case 0x1aa: /* rsm */ gen_svm_check_intercept(s, SVM_EXIT_RSM); if (!(s->flags & HF_SMM_MASK)) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 4e745f10dd8..1c6fa39c3eb 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -269,20 +269,41 @@ static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEnt static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) { - /* only includes ldmxcsr and stmxcsr, because they have AVX variants. */ static const X86OpEntry group15_reg[8] = { + [0] = X86_OP_ENTRYw(RDxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3), + [1] = X86_OP_ENTRYw(RDxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3), + [2] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0), + [3] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0), + [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE2) p_00), + [6] = X86_OP_ENTRY0(MFENCE, cpuid(SSE2) p_00), + [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE2) p_00), }; static const X86OpEntry group15_mem[8] = { - [2] = X86_OP_ENTRYr(LDMXCSR, E,d, vex5 chk(VEX128)), - [3] = X86_OP_ENTRYw(STMXCSR, E,d, vex5 chk(VEX128)), + [0] = X86_OP_ENTRYw(FXSAVE, M,y, cpuid(FXSR) p_00), + [1] = X86_OP_ENTRYr(FXRSTOR, M,y, cpuid(FXSR) p_00), + [2] = X86_OP_ENTRYr(LDMXCSR, E,d, vex5 chk(VEX128) p_00), + [3] = X86_OP_ENTRYw(STMXCSR, E,d, vex5 chk(VEX128) p_00), + [4] = X86_OP_ENTRYw(XSAVE, M,y, cpuid(XSAVE) p_00), + [5] = X86_OP_ENTRYr(XRSTOR, M,y, cpuid(XSAVE) p_00), + [6] = X86_OP_ENTRYw(XSAVEOPT, M,b, cpuid(XSAVEOPT) p_00), + [7] = X86_OP_ENTRYw(NOP, M,b, cpuid(CLFLUSH) p_00), + }; + + static const X86OpEntry group15_mem_66[8] = { + [6] = X86_OP_ENTRYw(NOP, M,b, cpuid(CLWB)), + [7] = X86_OP_ENTRYw(NOP, M,b, cpuid(CLFLUSHOPT)), }; uint8_t modrm = get_modrm(s, env); + int op = (modrm >> 3) & 7; + if ((modrm >> 6) == 3) { - *entry = group15_reg[(modrm >> 3) & 7]; + *entry = group15_reg[op]; + } else if (s->prefix & PREFIX_DATA) { + *entry = group15_mem_66[op]; } else { - *entry = group15_mem[(modrm >> 3) & 7]; + *entry = group15_mem[op]; } } @@ -2094,6 +2115,10 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) return true; case X86_FEAT_CMOV: return (s->cpuid_features & CPUID_CMOV); + case X86_FEAT_CLFLUSH: + return (s->cpuid_features & CPUID_CLFLUSH); + case X86_FEAT_FXSR: + return (s->cpuid_features & CPUID_FXSR); case X86_FEAT_F16C: return (s->cpuid_ext_features & CPUID_EXT_F16C); case X86_FEAT_FMA: @@ -2127,6 +2152,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) case X86_FEAT_AVX: return (s->cpuid_ext_features & CPUID_EXT_AVX); + case X86_FEAT_XSAVE: + return (s->cpuid_ext_features & CPUID_EXT_XSAVE); case X86_FEAT_3DNOW: return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW); @@ -2141,11 +2168,20 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2); case X86_FEAT_AVX2: return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2); + case X86_FEAT_CLFLUSHOPT: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT); + case X86_FEAT_CLWB: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB); + case X86_FEAT_FSGSBASE: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE); case X86_FEAT_SHA_NI: return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI); case X86_FEAT_CMPCCXADD: return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD); + + case X86_FEAT_XSAVEOPT: + return (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT); } g_assert_not_reached(); } @@ -2480,7 +2516,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu) case 0x1a ... 0x1b: /* MPX */ case 0x30 ... 0x35: /* more privileged instructions */ case 0xa2 ... 0xa5: /* CPUID, BT, SHLD */ - case 0xaa ... 0xae: /* RSM, SHRD, grp15 */ + case 0xaa ... 0xad: /* RSM, SHRD */ case 0xb0 ... 0xb1: /* cmpxchg */ case 0xb3: /* btr */ case 0xb8: /* integer ops */ diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index bcb6bccbd75..5ca3764e006 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1653,6 +1653,22 @@ static void gen_EXTRQ_r(DisasContext *s, X86DecodedInsn *decode) gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2); } +static void gen_FXRSTOR(DisasContext *s, X86DecodedInsn *decode) +{ + if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { + gen_NM_exception(s); + } + gen_helper_fxrstor(tcg_env, s->A0); +} + +static void gen_FXSAVE(DisasContext *s, X86DecodedInsn *decode) +{ + if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { + gen_NM_exception(s); + } + gen_helper_fxsave(tcg_env, s->A0); +} + static void gen_HLT(DisasContext *s, X86DecodedInsn *decode) { #ifdef CONFIG_SYSTEM_ONLY @@ -2000,6 +2016,11 @@ static void gen_LES(DisasContext *s, X86DecodedInsn *decode) gen_lxx_seg(s, decode, R_ES); } +static void gen_LFENCE(DisasContext *s, X86DecodedInsn *decode) +{ + tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC); +} + static void gen_LFS(DisasContext *s, X86DecodedInsn *decode) { gen_lxx_seg(s, decode, R_FS); @@ -2059,6 +2080,11 @@ static void gen_LSS(DisasContext *s, X86DecodedInsn *decode) gen_lxx_seg(s, decode, R_SS); } +static void gen_MFENCE(DisasContext *s, X86DecodedInsn *decode) +{ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); +} + static void gen_MOV(DisasContext *s, X86DecodedInsn *decode) { /* nothing to do! */ @@ -3092,6 +3118,15 @@ static void gen_RCR(DisasContext *s, X86DecodedInsn *decode) } } +static void gen_RDxxBASE(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv base = cpu_seg_base[s->modrm & 8 ? R_GS : R_FS]; + + /* Preserve hflags bits by testing CR4 at runtime. */ + gen_helper_cr4_testbit(tcg_env, tcg_constant_i32(CR4_FSGSBASE_MASK)); + tcg_gen_mov_tl(s->T0, base); +} + static void gen_RET(DisasContext *s, X86DecodedInsn *decode) { int16_t adjust = decode->e.op1 == X86_TYPE_I ? decode->immediate : 0; @@ -3372,6 +3407,11 @@ static void gen_SETcc(DisasContext *s, X86DecodedInsn *decode) gen_setcc1(s, decode->b & 0xf, s->T0); } +static void gen_SFENCE(DisasContext *s, X86DecodedInsn *decode) +{ + tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC); +} + static void gen_SHA1NEXTE(DisasContext *s, X86DecodedInsn *decode) { gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2); @@ -4042,6 +4082,15 @@ static void gen_WAIT(DisasContext *s, X86DecodedInsn *decode) } } +static void gen_WRxxBASE(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv base = cpu_seg_base[s->modrm & 8 ? R_GS : R_FS]; + + /* Preserve hflags bits by testing CR4 at runtime. */ + gen_helper_cr4_testbit(tcg_env, tcg_constant_i32(CR4_FSGSBASE_MASK)); + tcg_gen_mov_tl(base, s->T0); +} + static void gen_XCHG(DisasContext *s, X86DecodedInsn *decode) { if (s->prefix & PREFIX_LOCK) { @@ -4084,3 +4133,34 @@ static void gen_XOR(DisasContext *s, X86DecodedInsn *decode) prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); } } + +static void gen_XRSTOR(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv_i64 features = tcg_temp_new_i64(); + + tcg_gen_concat_tl_i64(features, cpu_regs[R_EAX], cpu_regs[R_EDX]); + gen_helper_xrstor(tcg_env, s->A0, features); + if (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_MPX) { + /* + * XRSTOR is how MPX is enabled, which changes how + * we translate. Thus we need to end the TB. + */ + s->base.is_jmp = DISAS_EOB_NEXT; + } +} + +static void gen_XSAVE(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv_i64 features = tcg_temp_new_i64(); + + tcg_gen_concat_tl_i64(features, cpu_regs[R_EAX], cpu_regs[R_EDX]); + gen_helper_xsave(tcg_env, s->A0, features); +} + +static void gen_XSAVEOPT(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv_i64 features = tcg_temp_new_i64(); + + tcg_gen_concat_tl_i64(features, cpu_regs[R_EAX], cpu_regs[R_EDX]); + gen_helper_xsave(tcg_env, s->A0, features); +}
This is already partly implemented due to VLDMXCSR and VSTMXCSR; finish the job. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/tcg/decode-new.h | 7 ++ target/i386/tcg/translate.c | 188 ------------------------------- target/i386/tcg/decode-new.c.inc | 48 +++++++- target/i386/tcg/emit.c.inc | 80 +++++++++++++ 4 files changed, 129 insertions(+), 194 deletions(-)