Message ID | 1467392693-22715-11-git-send-email-rth@twiddle.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Richard Henderson <rth@twiddle.net> writes: > Force the use of cmpxchg16b on x86_64. > > Wikipedia suggests that only very old AMD64 (circa 2004) did not have > this instruction. Further, it's required by Windows 8 so no new cpus > will ever omit it. > > If we truely care about these, then we could check this at startup time > and then avoid executing paths that use it. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > configure | 29 ++++++++++++- > cputlb.c | 6 +++ > include/qemu/int128.h | 6 +++ > softmmu_template.h | 110 +++++++++++++++++++++++++++++++++++++------------- > tcg/tcg.h | 22 ++++++++++ > 5 files changed, 144 insertions(+), 29 deletions(-) > <snip> > diff --git a/softmmu_template.h b/softmmu_template.h > index 76712b9..0a9f49b 100644 > --- a/softmmu_template.h > +++ b/softmmu_template.h > @@ -27,25 +27,30 @@ > > #define DATA_SIZE (1 << SHIFT) > > -#if DATA_SIZE == 8 > -#define SUFFIX q > -#define LSUFFIX q > -#define SDATA_TYPE int64_t > +#if DATA_SIZE == 16 > +#define SUFFIX o > +#define LSUFFIX o > +#define SDATA_TYPE Int128 > +#define DATA_TYPE Int128 > +#elif DATA_SIZE == 8 > +#define SUFFIX q > +#define LSUFFIX q > +#define SDATA_TYPE int64_t > #define DATA_TYPE uint64_t > #elif DATA_SIZE == 4 > -#define SUFFIX l > -#define LSUFFIX l > -#define SDATA_TYPE int32_t > +#define SUFFIX l > +#define LSUFFIX l > +#define SDATA_TYPE int32_t > #define DATA_TYPE uint32_t > #elif DATA_SIZE == 2 > -#define SUFFIX w > -#define LSUFFIX uw > -#define SDATA_TYPE int16_t > +#define SUFFIX w > +#define LSUFFIX uw > +#define SDATA_TYPE int16_t > #define DATA_TYPE uint16_t > #elif DATA_SIZE == 1 > -#define SUFFIX b > -#define LSUFFIX ub > -#define SDATA_TYPE int8_t > +#define SUFFIX b > +#define LSUFFIX ub > +#define SDATA_TYPE int8_t > #define DATA_TYPE uint8_t > #else > #error unsupported data size > @@ -56,7 +61,7 @@ > to the register size of the host. This is tcg_target_long, except in the > case of a 32-bit host and 64-bit data, and for that we always have > uint64_t. Don't bother with this widened value for SOFTMMU_CODE_ACCESS. */ > -#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8 > +#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE >= 8 > # define WORD_TYPE DATA_TYPE > # define USUFFIX SUFFIX > #else > @@ -73,7 +78,9 @@ > #define ADDR_READ addr_read > #endif > > -#if DATA_SIZE == 8 > +#if DATA_SIZE == 16 > +# define BSWAP(X) bswap128(X) > +#elif DATA_SIZE == 8 > # define BSWAP(X) bswap64(X) > #elif DATA_SIZE == 4 > # define BSWAP(X) bswap32(X) > @@ -140,6 +147,7 @@ > vidx >= 0; \ > }) This currently merge conflicts with the current master due to the move of the VICTIM_TLB code. > > +#if DATA_SIZE < 16 > #ifndef SOFTMMU_CODE_ACCESS > static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, > CPUIOTLBEntry *iotlbentry, > @@ -307,9 +315,10 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, > return res; > } > #endif /* DATA_SIZE > 1 */ > +#endif /* DATA_SIZE < 16 */ > > #ifndef SOFTMMU_CODE_ACCESS > - > +#if DATA_SIZE < 16 > /* Provide signed versions of the load routines as well. We can of course > avoid this for 64-bit data, or for 32-bit data on 32-bit host. */ > #if DATA_SIZE * 8 < TCG_TARGET_REG_BITS > @@ -507,6 +516,7 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, > } > } > #endif > +#endif /* DATA_SIZE < 16 */ > > #if DATA_SIZE == 1 > # define HE_SUFFIX _mmu > @@ -573,9 +583,30 @@ DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX) > TCGMemOpIdx oi, uintptr_t retaddr) > { > ATOMIC_MMU_BODY; > +#if DATA_SIZE < 16 > return atomic_cmpxchg(haddr, cmpv, newv); > +#else > + __atomic_compare_exchange(haddr, &cmpv, &newv, false, > + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); > + return cmpv; > +#endif > } > > +#if DATA_SIZE > 1 > +DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX) > + (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv, > + TCGMemOpIdx oi, uintptr_t retaddr) > +{ > + DATA_TYPE retv; > + cmpv = BSWAP(cmpv); > + newv = BSWAP(newv); > + retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX) > + (env, addr, cmpv, newv, oi, retaddr)); > + return BSWAP(retv); > +} > +#endif > + > +#if DATA_SIZE < 16 > #define GEN_ATOMIC_HELPER(NAME) \ > DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), HE_SUFFIX) \ > (CPUArchState *env, target_ulong addr, DATA_TYPE val, \ > @@ -600,18 +631,6 @@ GEN_ATOMIC_HELPER(xchg) > #undef GEN_ATOMIC_HELPER > > #if DATA_SIZE > 1 > -DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX) > - (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv, > - TCGMemOpIdx oi, uintptr_t retaddr) > -{ > - DATA_TYPE retv; > - cmpv = BSWAP(cmpv); > - newv = BSWAP(newv); > - retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX) > - (env, addr, cmpv, newv, oi, retaddr)); > - return BSWAP(retv); > -} > - > #define GEN_ATOMIC_HELPER(NAME) \ > DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), RE_SUFFIX) \ > (CPUArchState *env, target_ulong addr, DATA_TYPE val, \ > @@ -676,6 +695,41 @@ DATA_TYPE glue(glue(helper_atomic_add_fetch, SUFFIX), RE_SUFFIX) > } > } > #endif /* DATA_SIZE > 1 */ > +#else /* DATA_SIZE >= 16 */ > +DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX) > + (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) > +{ > + DATA_TYPE res; > + ATOMIC_MMU_BODY; > + __atomic_load(haddr, &res, __ATOMIC_RELAXED); > + return res; > +} > + > +DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), RE_SUFFIX) > + (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) > +{ > + DATA_TYPE res; > + res = (glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX) > + (env, addr, oi, retaddr)); > + return BSWAP(res); > +} > + > +void glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX) > + (CPUArchState *env, target_ulong addr, DATA_TYPE val, > + TCGMemOpIdx oi, uintptr_t retaddr) > +{ > + ATOMIC_MMU_BODY; > + __atomic_store(haddr, &val, __ATOMIC_RELAXED); > +} > + > +void glue(glue(helper_atomic_st, SUFFIX), RE_SUFFIX) > + (CPUArchState *env, target_ulong addr, DATA_TYPE val, > + TCGMemOpIdx oi, uintptr_t retaddr) > +{ > + (glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX) > + (env, addr, BSWAP(val), oi, retaddr)); > +} > +#endif /* DATA_SIZE < 16 */ > > #undef ATOMIC_MMU_BODY > > diff --git a/tcg/tcg.h b/tcg/tcg.h > index 4e60498..1304a42 100644 > --- a/tcg/tcg.h > +++ b/tcg/tcg.h > @@ -1216,6 +1216,28 @@ GEN_ATOMIC_HELPER_ALL(xchg) > #undef GEN_ATOMIC_HELPER_ALL > #undef GEN_ATOMIC_HELPER > > +#ifdef CONFIG_ATOMIC128 > +#include "qemu/int128.h" > + > +/* These aren't really a "proper" helpers because TCG cannot manage Int128. > + However, use the same format as the others, for use by the backends. */ > +Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, > + Int128 cmpv, Int128 newv, > + TCGMemOpIdx oi, uintptr_t retaddr); > +Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr, > + Int128 cmpv, Int128 newv, > + TCGMemOpIdx oi, uintptr_t retaddr); > + > +Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr, > + TCGMemOpIdx oi, uintptr_t retaddr); > +Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr, > + TCGMemOpIdx oi, uintptr_t retaddr); > +void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, > + TCGMemOpIdx oi, uintptr_t retaddr); > +void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, > + TCGMemOpIdx oi, uintptr_t retaddr); > + > +#endif /* CONFIG_ATOMIC128 */ > #endif /* CONFIG_SOFTMMU */ > > #endif /* TCG_H */ -- Alex Bennée
diff --git a/configure b/configure index 59ea124..586abd6 100755 --- a/configure +++ b/configure @@ -1201,7 +1201,10 @@ case "$cpu" in cc_i386='$(CC) -m32' ;; x86_64) - CPU_CFLAGS="-m64" + # ??? Only extremely old AMD cpus do not have cmpxchg16b. + # If we truly care, we should simply detect this case at + # runtime and generate the fallback to serial emulation. + CPU_CFLAGS="-m64 -mcx16" LDFLAGS="-m64 $LDFLAGS" cc_i386='$(CC) -m32' ;; @@ -4434,6 +4437,26 @@ if compile_prog "" "" ; then int128=yes fi +######################################### +# See if 128-bit atomic operations are supported. + +atomic128=no +if test "$int128" = "yes"; then + cat > $TMPC << EOF +int main(void) +{ + unsigned __int128 x = 0, y = 0; + y = __atomic_load_16(&x, 0); + __atomic_store_16(&x, y, 0); + __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0); + return 0; +} +EOF + if compile_prog "" "" ; then + atomic128=yes + fi +fi + ######################################## # check if getauxval is available. @@ -5383,6 +5406,10 @@ if test "$int128" = "yes" ; then echo "CONFIG_INT128=y" >> $config_host_mak fi +if test "$atomic128" = "yes" ; then + echo "CONFIG_ATOMIC128=y" >> $config_host_mak +fi + if test "$getauxval" = "yes" ; then echo "CONFIG_GETAUXVAL=y" >> $config_host_mak fi diff --git a/cputlb.c b/cputlb.c index 5272456..660f824 100644 --- a/cputlb.c +++ b/cputlb.c @@ -510,6 +510,12 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) #define SHIFT 3 #include "softmmu_template.h" + +#ifdef CONFIG_ATOMIC128 +#define SHIFT 4 +#include "softmmu_template.h" +#endif + #undef MMUSUFFIX #define MMUSUFFIX _cmmu diff --git a/include/qemu/int128.h b/include/qemu/int128.h index ab67275..5819da4 100644 --- a/include/qemu/int128.h +++ b/include/qemu/int128.h @@ -2,6 +2,7 @@ #define INT128_H #ifdef CONFIG_INT128 +#include "qemu/bswap.h" typedef __int128 Int128; @@ -137,6 +138,11 @@ static inline void int128_subfrom(Int128 *a, Int128 b) *a -= b; } +static inline Int128 bswap128(Int128 a) +{ + return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a))); +} + #else /* !CONFIG_INT128 */ /* Here we are catering to the ABI of the host. If the host returns diff --git a/softmmu_template.h b/softmmu_template.h index 76712b9..0a9f49b 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -27,25 +27,30 @@ #define DATA_SIZE (1 << SHIFT) -#if DATA_SIZE == 8 -#define SUFFIX q -#define LSUFFIX q -#define SDATA_TYPE int64_t +#if DATA_SIZE == 16 +#define SUFFIX o +#define LSUFFIX o +#define SDATA_TYPE Int128 +#define DATA_TYPE Int128 +#elif DATA_SIZE == 8 +#define SUFFIX q +#define LSUFFIX q +#define SDATA_TYPE int64_t #define DATA_TYPE uint64_t #elif DATA_SIZE == 4 -#define SUFFIX l -#define LSUFFIX l -#define SDATA_TYPE int32_t +#define SUFFIX l +#define LSUFFIX l +#define SDATA_TYPE int32_t #define DATA_TYPE uint32_t #elif DATA_SIZE == 2 -#define SUFFIX w -#define LSUFFIX uw -#define SDATA_TYPE int16_t +#define SUFFIX w +#define LSUFFIX uw +#define SDATA_TYPE int16_t #define DATA_TYPE uint16_t #elif DATA_SIZE == 1 -#define SUFFIX b -#define LSUFFIX ub -#define SDATA_TYPE int8_t +#define SUFFIX b +#define LSUFFIX ub +#define SDATA_TYPE int8_t #define DATA_TYPE uint8_t #else #error unsupported data size @@ -56,7 +61,7 @@ to the register size of the host. This is tcg_target_long, except in the case of a 32-bit host and 64-bit data, and for that we always have uint64_t. Don't bother with this widened value for SOFTMMU_CODE_ACCESS. */ -#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8 +#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE >= 8 # define WORD_TYPE DATA_TYPE # define USUFFIX SUFFIX #else @@ -73,7 +78,9 @@ #define ADDR_READ addr_read #endif -#if DATA_SIZE == 8 +#if DATA_SIZE == 16 +# define BSWAP(X) bswap128(X) +#elif DATA_SIZE == 8 # define BSWAP(X) bswap64(X) #elif DATA_SIZE == 4 # define BSWAP(X) bswap32(X) @@ -140,6 +147,7 @@ vidx >= 0; \ }) +#if DATA_SIZE < 16 #ifndef SOFTMMU_CODE_ACCESS static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, CPUIOTLBEntry *iotlbentry, @@ -307,9 +315,10 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, return res; } #endif /* DATA_SIZE > 1 */ +#endif /* DATA_SIZE < 16 */ #ifndef SOFTMMU_CODE_ACCESS - +#if DATA_SIZE < 16 /* Provide signed versions of the load routines as well. We can of course avoid this for 64-bit data, or for 32-bit data on 32-bit host. */ #if DATA_SIZE * 8 < TCG_TARGET_REG_BITS @@ -507,6 +516,7 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, } } #endif +#endif /* DATA_SIZE < 16 */ #if DATA_SIZE == 1 # define HE_SUFFIX _mmu @@ -573,9 +583,30 @@ DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX) TCGMemOpIdx oi, uintptr_t retaddr) { ATOMIC_MMU_BODY; +#if DATA_SIZE < 16 return atomic_cmpxchg(haddr, cmpv, newv); +#else + __atomic_compare_exchange(haddr, &cmpv, &newv, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return cmpv; +#endif } +#if DATA_SIZE > 1 +DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX) + (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + DATA_TYPE retv; + cmpv = BSWAP(cmpv); + newv = BSWAP(newv); + retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX) + (env, addr, cmpv, newv, oi, retaddr)); + return BSWAP(retv); +} +#endif + +#if DATA_SIZE < 16 #define GEN_ATOMIC_HELPER(NAME) \ DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), HE_SUFFIX) \ (CPUArchState *env, target_ulong addr, DATA_TYPE val, \ @@ -600,18 +631,6 @@ GEN_ATOMIC_HELPER(xchg) #undef GEN_ATOMIC_HELPER #if DATA_SIZE > 1 -DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX) - (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - DATA_TYPE retv; - cmpv = BSWAP(cmpv); - newv = BSWAP(newv); - retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX) - (env, addr, cmpv, newv, oi, retaddr)); - return BSWAP(retv); -} - #define GEN_ATOMIC_HELPER(NAME) \ DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), RE_SUFFIX) \ (CPUArchState *env, target_ulong addr, DATA_TYPE val, \ @@ -676,6 +695,41 @@ DATA_TYPE glue(glue(helper_atomic_add_fetch, SUFFIX), RE_SUFFIX) } } #endif /* DATA_SIZE > 1 */ +#else /* DATA_SIZE >= 16 */ +DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX) + (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) +{ + DATA_TYPE res; + ATOMIC_MMU_BODY; + __atomic_load(haddr, &res, __ATOMIC_RELAXED); + return res; +} + +DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), RE_SUFFIX) + (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) +{ + DATA_TYPE res; + res = (glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX) + (env, addr, oi, retaddr)); + return BSWAP(res); +} + +void glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX) + (CPUArchState *env, target_ulong addr, DATA_TYPE val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + ATOMIC_MMU_BODY; + __atomic_store(haddr, &val, __ATOMIC_RELAXED); +} + +void glue(glue(helper_atomic_st, SUFFIX), RE_SUFFIX) + (CPUArchState *env, target_ulong addr, DATA_TYPE val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + (glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX) + (env, addr, BSWAP(val), oi, retaddr)); +} +#endif /* DATA_SIZE < 16 */ #undef ATOMIC_MMU_BODY diff --git a/tcg/tcg.h b/tcg/tcg.h index 4e60498..1304a42 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -1216,6 +1216,28 @@ GEN_ATOMIC_HELPER_ALL(xchg) #undef GEN_ATOMIC_HELPER_ALL #undef GEN_ATOMIC_HELPER +#ifdef CONFIG_ATOMIC128 +#include "qemu/int128.h" + +/* These aren't really a "proper" helpers because TCG cannot manage Int128. + However, use the same format as the others, for use by the backends. */ +Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, + TCGMemOpIdx oi, uintptr_t retaddr); +Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, + TCGMemOpIdx oi, uintptr_t retaddr); + +Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, + TCGMemOpIdx oi, uintptr_t retaddr); + +#endif /* CONFIG_ATOMIC128 */ #endif /* CONFIG_SOFTMMU */ #endif /* TCG_H */
Force the use of cmpxchg16b on x86_64. Wikipedia suggests that only very old AMD64 (circa 2004) did not have this instruction. Further, it's required by Windows 8 so no new cpus will ever omit it. If we truely care about these, then we could check this at startup time and then avoid executing paths that use it. Signed-off-by: Richard Henderson <rth@twiddle.net> --- configure | 29 ++++++++++++- cputlb.c | 6 +++ include/qemu/int128.h | 6 +++ softmmu_template.h | 110 +++++++++++++++++++++++++++++++++++++------------- tcg/tcg.h | 22 ++++++++++ 5 files changed, 144 insertions(+), 29 deletions(-)