@@ -223,6 +223,7 @@
tools/security/xensec_tool
tools/tests/x86_emulator/*.bin
tools/tests/x86_emulator/*.tmp
+tools/tests/x86_emulator/3dnow*.[ch]
tools/tests/x86_emulator/asm
tools/tests/x86_emulator/avx*.[ch]
tools/tests/x86_emulator/blowfish.h
@@ -11,7 +11,7 @@ all: $(TARGET)
run: $(TARGET)
./$(TARGET)
-SIMD := sse sse2 sse4 avx avx2 xop
+SIMD := 3dnow sse sse2 sse4 avx avx2 xop
FMA := fma4 fma
SG := avx2-sg
TESTCASES := blowfish $(SIMD) $(FMA) $(SG)
@@ -19,6 +19,9 @@ TESTCASES := blowfish $(SIMD) $(FMA) $(S
blowfish-cflags := ""
blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
+3dnow-vecs := 8
+3dnow-ints :=
+3dnow-flts := 4
sse-vecs := 16
sse-ints :=
sse-flts := 4
@@ -49,8 +52,13 @@ xop-ints := 1 2 4 8
xop-flts := $(avx-flts)
# For AVX and later, have the compiler avoid XMM0 to widen coverage of
-# the VEX.vvvv checks in the emulator.
-non-sse = $(if $(filter sse%,$(1)),,-ffixed-xmm0)
+# the VEX.vvvv checks in the emulator. For 3DNow!, however, force SSE
+# use for floating point operations, to avoid mixing MMX and FPU register
+# uses. Also enable 3DNow! extensions, but note that we can't use 3dnowa
+# as the test flavor right away since -m3dnowa is being understood only
+# by gcc 7.x and newer (older ones want a specific machine model instead).
+3dnowa := $(call cc-option,$(CC),-m3dnowa,-march=k8)
+non-sse = $(if $(filter sse%,$(1)),,$(if $(filter 3dnow%,$(1)),-msse -mfpmath=sse $(3dnowa),-ffixed-xmm0))
define simd-defs
$(1)-cflags := \
@@ -81,8 +89,9 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c t
$(foreach arch,$(filter-out $(XEN_COMPILE_ARCH),x86_32) $(XEN_COMPILE_ARCH), \
for cflags in $($*-cflags) $($*-cflags-$(arch)); do \
$(MAKE) -f testcase.mk TESTCASE=$* XEN_TARGET_ARCH=$(arch) $*-cflags="$$cflags" all; \
+ prefix=$(shell echo $(subst -,_,$*) | sed -e 's,^\([0-9]\),_\1,'); \
flavor=$$(echo $${cflags} | sed -e 's, .*,,' -e 'y,-=,__,') ; \
- (echo "static const unsigned int $(subst -,_,$*)_$(arch)$${flavor}[] = {"; \
+ (echo "static const unsigned int $${prefix}_$(arch)$${flavor}[] = {"; \
od -v -t x $*.bin | sed -e 's/^[0-9]* /0x/' -e 's/ /, 0x/g' -e 's/$$/,/'; \
echo "};") >>$@.new; \
rm -f $*.bin; \
@@ -48,6 +48,8 @@ static inline bool _to_bool(byte_vec_t b
#if VEC_SIZE == FLOAT_SIZE
# define to_int(x) ((vec_t){ (int)(x)[0] })
+#elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__)
+# define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))
#elif VEC_SIZE == 16 && defined(__SSE2__)
# if FLOAT_SIZE == 4
# define to_int(x) __builtin_ia32_cvtdq2ps(__builtin_ia32_cvtps2dq(x))
@@ -70,7 +72,24 @@ static inline bool _to_bool(byte_vec_t b
})
#endif
-#if FLOAT_SIZE == 4 && defined(__SSE__)
+#if VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW_A__)
+# define max __builtin_ia32_pfmax
+# define min __builtin_ia32_pfmin
+# define recip(x) ({ \
+ vec_t t_ = __builtin_ia32_pfrcp(x); \
+ touch(x); \
+ t_[1] = __builtin_ia32_pfrcp(__builtin_ia32_pswapdsf(x))[0]; \
+ touch(x); \
+ __builtin_ia32_pfrcpit2(__builtin_ia32_pfrcpit1(t_, x), t_); \
+})
+# define rsqrt(x) ({ \
+ vec_t t_ = __builtin_ia32_pfrsqrt(x); \
+ touch(x); \
+ t_[1] = __builtin_ia32_pfrsqrt(__builtin_ia32_pswapdsf(x))[0]; \
+ touch(x); \
+ __builtin_ia32_pfrcpit2(__builtin_ia32_pfrsqit1(__builtin_ia32_pfmul(t_, t_), x), t_); \
+})
+#elif FLOAT_SIZE == 4 && defined(__SSE__)
# if VEC_SIZE == 32 && defined(__AVX__)
# define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss256(&t_); })
# define max(x, y) __builtin_ia32_maxps256(x, y)
@@ -5,6 +5,7 @@
#include "x86_emulate.h"
#include "blowfish.h"
+#include "3dnow.h"
#include "sse.h"
#include "sse2.h"
#include "sse4.h"
@@ -28,6 +29,11 @@ static bool blowfish_check_regs(const st
return regs->eax == 2 && regs->edx == 1;
}
+static bool simd_check__3dnow(void)
+{
+ return cpu_has_3dnow_ext && cpu_has_sse;
+}
+
static bool simd_check_sse(void)
{
return cpu_has_sse;
@@ -117,6 +123,7 @@ static const struct {
#else
# define SIMD(desc, feat, form) SIMD_(32, desc, feat, form)
#endif
+ SIMD(3DNow! single, _3dnow, 8f4),
SIMD(SSE scalar single, sse, f4),
SIMD(SSE packed single, sse, 16f4),
SIMD(SSE2 scalar single, sse2, f4),
@@ -166,6 +166,12 @@ static inline uint64_t xgetbv(uint32_t x
(res.b & (1U << 8)) != 0; \
})
+#define cpu_has_3dnow_ext ({ \
+ struct cpuid_leaf res; \
+ emul_test_cpuid(0x80000001, 0, &res, NULL); \
+ (res.d & (1U << 30)) != 0; \
+})
+
#define cpu_has_sse4a ({ \
struct cpuid_leaf res; \
emul_test_cpuid(0x80000001, 0, &res, NULL); \
@@ -355,6 +355,36 @@ static const struct {
[0xff] = { ModRM }
};
+static const uint16_t _3dnow_table[16] = {
+ [0x0] = (1 << 0xd) /* pi2fd */,
+ [0x1] = (1 << 0xd) /* pf2id */,
+ [0x9] = (1 << 0x0) /* pfcmpge */ |
+ (1 << 0x4) /* pfmin */ |
+ (1 << 0x6) /* pfrcp */ |
+ (1 << 0x7) /* pfrsqrt */ |
+ (1 << 0xa) /* pfsub */ |
+ (1 << 0xe) /* pfadd */,
+ [0xa] = (1 << 0x0) /* pfcmpge */ |
+ (1 << 0x4) /* pfmax */ |
+ (1 << 0x6) /* pfrcpit1 */ |
+ (1 << 0x7) /* pfrsqit1 */ |
+ (1 << 0xa) /* pfsubr */ |
+ (1 << 0xe) /* pfacc */,
+ [0xb] = (1 << 0x0) /* pfcmpeq */ |
+ (1 << 0x4) /* pfmul */ |
+ (1 << 0x6) /* pfrcpit2 */ |
+ (1 << 0x7) /* pmulhrw */ |
+ (1 << 0xf) /* pavgusb */,
+};
+
+static const uint16_t _3dnow_ext_table[16] = {
+ [0x1] = (1 << 0xd) /* pi2fw */,
+ [0x1] = (1 << 0xc) /* pf2iw */,
+ [0x8] = (1 << 0xa) /* pfnacc */ |
+ (1 << 0xa) /* pfpnacc */,
+ [0xb] = (1 << 0xb) /* pfswapd */,
+};
+
/*
* "two_op" and "four_op" below refer to the number of register operands
* (one of which possibly also allowing to be a memory one). The named
@@ -1670,6 +1700,8 @@ static bool vcpu_has(
#define vcpu_has_rdrand() vcpu_has( 1, ECX, 30, ctxt, ops)
#define vcpu_has_mmxext() (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \
vcpu_has_sse())
+#define vcpu_has_3dnow_ext() vcpu_has(0x80000001, EDX, 30, ctxt, ops)
+#define vcpu_has_3dnow() vcpu_has(0x80000001, EDX, 31, ctxt, ops)
#define vcpu_has_lahf_lm() vcpu_has(0x80000001, ECX, 0, ctxt, ops)
#define vcpu_has_cr8_legacy() vcpu_has(0x80000001, ECX, 4, ctxt, ops)
#define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX, 5, ctxt, ops)
@@ -5502,6 +5534,26 @@ x86_emulate(
case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */
break;
+ case X86EMUL_OPC(0x0f, 0x0e): /* femms */
+ host_and_vcpu_must_have(3dnow);
+ asm volatile ( "femms" );
+ break;
+
+ case X86EMUL_OPC(0x0f, 0x0f): /* 3DNow! */
+ if ( _3dnow_ext_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) )
+ host_and_vcpu_must_have(3dnow_ext);
+ else if ( _3dnow_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) )
+ host_and_vcpu_must_have(3dnow);
+ else
+ generate_exception(EXC_UD);
+
+ get_fpu(X86EMUL_FPU_mmx, &fic);
+
+ d = DstReg | SrcMem;
+ op_bytes = 8;
+ state->simd_size = simd_other;
+ goto simd_0f_imm8;
+
#define CASE_SIMD_PACKED_INT(pfx, opc) \
case X86EMUL_OPC(pfx, opc): \
case X86EMUL_OPC_66(pfx, opc)
@@ -71,6 +71,8 @@
&& boot_cpu_has(X86_FEATURE_FFXSR))
#define cpu_has_page1gb boot_cpu_has(X86_FEATURE_PAGE1GB)
#define cpu_has_rdtscp boot_cpu_has(X86_FEATURE_RDTSCP)
+#define cpu_has_3dnow_ext boot_cpu_has(X86_FEATURE_3DNOWEXT)
+#define cpu_has_3dnow boot_cpu_has(X86_FEATURE_3DNOW)
/* CPUID level 0x80000001.ecx */
#define cpu_has_cmp_legacy boot_cpu_has(X86_FEATURE_CMP_LEGACY)
Yes, recent AMD CPUs don't support them anymore, but I think we should nevertheless cope. Signed-off-by: Jan Beulich <jbeulich@suse.com>