@@ -2843,4 +2843,29 @@ static inline bool ctl_has_irq(CPUX86State *env)
# define TARGET_VSYSCALL_PAGE (UINT64_C(-10) << 20)
#endif
+/* majority(NOT a, b, c) = (a ^ b) ? b : c */
+#define MAJ_INV1(a, b, c) ((((a) ^ (b)) & ((b) ^ (c))) ^ (c))
+
+/*
+ * ADD_COUT_VEC(x, y) = majority((x + y) ^ x ^ y, x, y)
+ *
+ * If two corresponding bits in x and y are the same, that's the carry
+ * independent of the value (x+y)^x^y. Hence x^y can be replaced with
+ * 1 in (x+y)^x^y, resulting in majority(NOT (x+y), x, y)
+ */
+#define ADD_COUT_VEC(op1, op2, result) \
+ MAJ_INV1(result, op1, op2)
+
+/*
+ * SUB_COUT_VEC(x, y) = NOT majority(x, NOT y, (x - y) ^ x ^ NOT y)
+ * = majority(NOT x, y, (x - y) ^ x ^ y)
+ *
+ * Note that the carry out is actually a borrow, i.e. it is inverted.
+ * If two corresponding bits in x and y are different, the value that
+ * the bit has in (x-y)^x^y likewise does not Hence x^y can be replaced
+ * with 0 in (x-y)^x^y, resulting in majority(NOT x, y, x-y)
+ */
+#define SUB_COUT_VEC(op1, op2, result) \
+ MAJ_INV1(op1, op2, result)
+
#endif /* I386_CPU_H */
@@ -22,20 +22,24 @@
#if DATA_BITS == 8
#define SUFFIX b
#define DATA_TYPE uint8_t
+#define SDATA_TYPE int8_t
#define WIDER_TYPE uint32_t
#elif DATA_BITS == 16
#define SUFFIX w
#define DATA_TYPE uint16_t
+#define SDATA_TYPE int16_t
#define WIDER_TYPE uint32_t
#elif DATA_BITS == 32
#define SUFFIX l
#define DATA_TYPE uint32_t
+#define SDATA_TYPE int32_t
#if HOST_LONG_BITS >= 64
#define WIDER_TYPE uint64_t
#endif
#elif DATA_BITS == 64
#define SUFFIX q
#define DATA_TYPE uint64_t
+#define SDATA_TYPE int64_t
#else
#error unhandled operand size
#endif
@@ -44,20 +48,33 @@
/* dynamic flags computation */
-static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+static uint32_t glue(compute_all_cout, SUFFIX)(DATA_TYPE dst, SDATA_TYPE carries)
{
uint32_t cf, pf, af, zf, sf, of;
- DATA_TYPE src2 = dst - src1;
- cf = dst < src1;
+ /* PF, ZF, SF computed from result. */
pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & CC_A;
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
- of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
+
+ /*
+ * AF, CF, OF computed from carry out vector. To compute OF put the highest
+ * two carry bits in OF and the bit immediately to the right; adding CC_O / 2
+ * XORs them.
+ */
+ af = (carries << 1) & CC_A;
+ cf = carries < 0;
+ of = (lshift(carries, 12 - DATA_BITS) + CC_O / 2) & CC_O;
return cf + pf + af + zf + sf + of;
}
+static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+{
+ DATA_TYPE src2 = dst - src1;
+ target_long carries = ADD_COUT_VEC(src1, src2, dst);
+ return glue(compute_all_cout, SUFFIX)(dst, carries);
+}
+
static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
return dst < src1;
@@ -66,25 +83,9 @@ static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
static uint32_t glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
DATA_TYPE src3)
{
- uint32_t cf, pf, af, zf, sf, of;
-
-#ifdef WIDER_TYPE
- WIDER_TYPE src13 = (WIDER_TYPE) src1 + (WIDER_TYPE) src3;
- DATA_TYPE src2 = dst - src13;
-
- cf = dst < src13;
-#else
DATA_TYPE src2 = dst - src1 - src3;
-
- cf = (src3 ? dst <= src1 : dst < src1);
-#endif
-
- pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & 0x10;
- zf = (dst == 0) << 6;
- sf = lshift(dst, 8 - DATA_BITS) & 0x80;
- of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
- return cf + pf + af + zf + sf + of;
+ target_long carries = ADD_COUT_VEC(src1, src2, dst);
+ return glue(compute_all_cout, SUFFIX)(dst, carries);
}
static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
@@ -101,16 +102,9 @@ static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
static uint32_t glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
{
- uint32_t cf, pf, af, zf, sf, of;
DATA_TYPE src1 = dst + src2;
-
- cf = src1 < src2;
- pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & CC_A;
- zf = (dst == 0) * CC_Z;
- sf = lshift(dst, 8 - DATA_BITS) & CC_S;
- of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
- return cf + pf + af + zf + sf + of;
+ target_long carries = SUB_COUT_VEC(src1, src2, dst);
+ return glue(compute_all_cout, SUFFIX)(dst, carries);
}
static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
@@ -123,25 +117,9 @@ static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
static uint32_t glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
DATA_TYPE src3)
{
- uint32_t cf, pf, af, zf, sf, of;
-
-#ifdef WIDER_TYPE
- WIDER_TYPE src23 = (WIDER_TYPE) src2 + (WIDER_TYPE) src3;
- DATA_TYPE src1 = dst + src23;
-
- cf = src1 < src23;
-#else
DATA_TYPE src1 = dst + src2 + src3;
-
- cf = (src3 ? src1 <= src2 : src1 < src2);
-#endif
-
- pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & 0x10;
- zf = (dst == 0) << 6;
- sf = lshift(dst, 8 - DATA_BITS) & 0x80;
- of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
- return cf + pf + af + zf + sf + of;
+ target_long carries = SUB_COUT_VEC(src1, src2, dst);
+ return glue(compute_all_cout, SUFFIX)(dst, carries);
}
static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
@@ -286,6 +264,6 @@ static int glue(compute_c_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
#undef DATA_BITS
#undef SIGN_MASK
#undef DATA_TYPE
-#undef DATA_MASK
+#undef SDATA_TYPE
#undef SUFFIX
#undef WIDER_TYPE
@@ -45,31 +45,6 @@
#define LF_MASK_CF (0x01 << LF_BIT_CF)
#define LF_MASK_PO (0x01 << LF_BIT_PO)
-/* majority(NOT a, b, c) = (a ^ b) ? b : c */
-#define MAJ_INV1(a, b, c) ((((a) ^ (b)) & ((b) ^ (c))) ^ (c))
-
-/*
- * ADD_COUT_VEC(x, y) = majority((x + y) ^ x ^ y, x, y)
- *
- * If two corresponding bits in x and y are the same, that's the carry
- * independent of the value (x+y)^x^y. Hence x^y can be replaced with
- * 1 in (x+y)^x^y, resulting in majority(NOT (x+y), x, y)
- */
-#define ADD_COUT_VEC(op1, op2, result) \
- MAJ_INV1(result, op1, op2)
-
-/*
- * SUB_COUT_VEC(x, y) = NOT majority(x, NOT y, (x - y) ^ x ^ NOT y)
- * = majority(NOT x, y, (x - y) ^ x ^ y)
- *
- * Note that the carry out is actually a borrow, i.e. it is inverted.
- * If two corresponding bits in x and y are different, the value that
- * the bit has in (x-y)^x^y likewise does not Hence x^y can be replaced
- * with 0 in (x-y)^x^y, resulting in majority(NOT x, y, x-y)
- */
-#define SUB_COUT_VEC(op1, op2, result) \
- MAJ_INV1(op1, op2, result)
-
/* ******************* */
/* OSZAPC */
/* ******************* */
Use the carry-out vector as the basis to compute AF, CF and OF. The cost is pretty much the same, because the carry-out is just four boolean operations, and the code is much smaller because add/adc/sub/sbb now share most of it. A similar algorithm to what is used in target/i386/emulate can also be used for APX, in order to build the result of CCMP/CTEST with a new CC_OP_*. CCMP needs to place into the flags from either a subtraction or a constant value; CTEST likewise place into the flags either an AND or a constant value. The new CC_OP for CCMP and CTEST would store for a successful predcate: - in DST and SRC2, the result of the operation; - in SRC, a carry-out vector for CCMP or zero for CTEST; If the default flag value is used, DST/SRC/SRC2 can be filled with constants: - in DST the negated ZF; - in SRC's top 2 bits, a value that results in the desired OF and CF; - in SRC2 a suitable value (any of 0/1/~0/~1) that can be used instead of DST to compute the desired SF and PF. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/cpu.h | 25 ++++++++ target/i386/tcg/cc_helper_template.h.inc | 80 +++++++++--------------- target/i386/hvf/x86_flags.c | 25 -------- 3 files changed, 54 insertions(+), 76 deletions(-)