diff mbox

[v2,10/27] tcg: Add atomic128 helpers

Message ID 1467392693-22715-11-git-send-email-rth@twiddle.net (mailing list archive)
State New, archived
Headers show

Commit Message

Richard Henderson July 1, 2016, 5:04 p.m. UTC
Force the use of cmpxchg16b on x86_64.

Wikipedia suggests that only very old AMD64 (circa 2004) did not have
this instruction.  Further, it's required by Windows 8 so no new cpus
will ever omit it.

If we truely care about these, then we could check this at startup time
and then avoid executing paths that use it.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 configure             |  29 ++++++++++++-
 cputlb.c              |   6 +++
 include/qemu/int128.h |   6 +++
 softmmu_template.h    | 110 +++++++++++++++++++++++++++++++++++++-------------
 tcg/tcg.h             |  22 ++++++++++
 5 files changed, 144 insertions(+), 29 deletions(-)

Comments

Alex Bennée Aug. 11, 2016, 10:02 a.m. UTC | #1
Richard Henderson <rth@twiddle.net> writes:

> Force the use of cmpxchg16b on x86_64.
>
> Wikipedia suggests that only very old AMD64 (circa 2004) did not have
> this instruction.  Further, it's required by Windows 8 so no new cpus
> will ever omit it.
>
> If we truely care about these, then we could check this at startup time
> and then avoid executing paths that use it.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  configure             |  29 ++++++++++++-
>  cputlb.c              |   6 +++
>  include/qemu/int128.h |   6 +++
>  softmmu_template.h    | 110 +++++++++++++++++++++++++++++++++++++-------------
>  tcg/tcg.h             |  22 ++++++++++
>  5 files changed, 144 insertions(+), 29 deletions(-)
>
<snip>
> diff --git a/softmmu_template.h b/softmmu_template.h
> index 76712b9..0a9f49b 100644
> --- a/softmmu_template.h
> +++ b/softmmu_template.h
> @@ -27,25 +27,30 @@
>
>  #define DATA_SIZE (1 << SHIFT)
>
> -#if DATA_SIZE == 8
> -#define SUFFIX q
> -#define LSUFFIX q
> -#define SDATA_TYPE  int64_t
> +#if DATA_SIZE == 16
> +#define SUFFIX     o
> +#define LSUFFIX    o
> +#define SDATA_TYPE Int128
> +#define DATA_TYPE  Int128
> +#elif DATA_SIZE == 8
> +#define SUFFIX     q
> +#define LSUFFIX    q
> +#define SDATA_TYPE int64_t
>  #define DATA_TYPE  uint64_t
>  #elif DATA_SIZE == 4
> -#define SUFFIX l
> -#define LSUFFIX l
> -#define SDATA_TYPE  int32_t
> +#define SUFFIX     l
> +#define LSUFFIX    l
> +#define SDATA_TYPE int32_t
>  #define DATA_TYPE  uint32_t
>  #elif DATA_SIZE == 2
> -#define SUFFIX w
> -#define LSUFFIX uw
> -#define SDATA_TYPE  int16_t
> +#define SUFFIX     w
> +#define LSUFFIX    uw
> +#define SDATA_TYPE int16_t
>  #define DATA_TYPE  uint16_t
>  #elif DATA_SIZE == 1
> -#define SUFFIX b
> -#define LSUFFIX ub
> -#define SDATA_TYPE  int8_t
> +#define SUFFIX     b
> +#define LSUFFIX    ub
> +#define SDATA_TYPE int8_t
>  #define DATA_TYPE  uint8_t
>  #else
>  #error unsupported data size
> @@ -56,7 +61,7 @@
>     to the register size of the host.  This is tcg_target_long, except in the
>     case of a 32-bit host and 64-bit data, and for that we always have
>     uint64_t.  Don't bother with this widened value for SOFTMMU_CODE_ACCESS.  */
> -#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8
> +#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE >= 8
>  # define WORD_TYPE  DATA_TYPE
>  # define USUFFIX    SUFFIX
>  #else
> @@ -73,7 +78,9 @@
>  #define ADDR_READ addr_read
>  #endif
>
> -#if DATA_SIZE == 8
> +#if DATA_SIZE == 16
> +# define BSWAP(X)  bswap128(X)
> +#elif DATA_SIZE == 8
>  # define BSWAP(X)  bswap64(X)
>  #elif DATA_SIZE == 4
>  # define BSWAP(X)  bswap32(X)
> @@ -140,6 +147,7 @@
>      vidx >= 0;                                                                \
>  })

This currently merge conflicts with the current master due to the move
of the VICTIM_TLB code.

>
> +#if DATA_SIZE < 16
>  #ifndef SOFTMMU_CODE_ACCESS
>  static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
>                                                CPUIOTLBEntry *iotlbentry,
> @@ -307,9 +315,10 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
>      return res;
>  }
>  #endif /* DATA_SIZE > 1 */
> +#endif /* DATA_SIZE < 16 */
>
>  #ifndef SOFTMMU_CODE_ACCESS
> -
> +#if DATA_SIZE < 16
>  /* Provide signed versions of the load routines as well.  We can of course
>     avoid this for 64-bit data, or for 32-bit data on 32-bit host.  */
>  #if DATA_SIZE * 8 < TCG_TARGET_REG_BITS
> @@ -507,6 +516,7 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
>      }
>  }
>  #endif
> +#endif /* DATA_SIZE < 16 */
>
>  #if DATA_SIZE == 1
>  # define HE_SUFFIX  _mmu
> @@ -573,9 +583,30 @@ DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX)
>       TCGMemOpIdx oi, uintptr_t retaddr)
>  {
>      ATOMIC_MMU_BODY;
> +#if DATA_SIZE < 16
>      return atomic_cmpxchg(haddr, cmpv, newv);
> +#else
> +    __atomic_compare_exchange(haddr, &cmpv, &newv, false,
> +                              __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
> +    return cmpv;
> +#endif
>  }
>
> +#if DATA_SIZE > 1
> +DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX)
> +    (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv,
> +     TCGMemOpIdx oi, uintptr_t retaddr)
> +{
> +    DATA_TYPE retv;
> +    cmpv = BSWAP(cmpv);
> +    newv = BSWAP(newv);
> +    retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX)
> +            (env, addr, cmpv, newv, oi, retaddr));
> +    return BSWAP(retv);
> +}
> +#endif
> +
> +#if DATA_SIZE < 16
>  #define GEN_ATOMIC_HELPER(NAME)                                         \
>  DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), HE_SUFFIX)     \
>      (CPUArchState *env, target_ulong addr, DATA_TYPE val,               \
> @@ -600,18 +631,6 @@ GEN_ATOMIC_HELPER(xchg)
>  #undef GEN_ATOMIC_HELPER
>
>  #if DATA_SIZE > 1
> -DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX)
> -    (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv,
> -     TCGMemOpIdx oi, uintptr_t retaddr)
> -{
> -    DATA_TYPE retv;
> -    cmpv = BSWAP(cmpv);
> -    newv = BSWAP(newv);
> -    retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX)
> -            (env, addr, cmpv, newv, oi, retaddr));
> -    return BSWAP(retv);
> -}
> -
>  #define GEN_ATOMIC_HELPER(NAME)                                         \
>  DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), RE_SUFFIX)     \
>      (CPUArchState *env, target_ulong addr, DATA_TYPE val,               \
> @@ -676,6 +695,41 @@ DATA_TYPE glue(glue(helper_atomic_add_fetch, SUFFIX), RE_SUFFIX)
>      }
>  }
>  #endif /* DATA_SIZE > 1 */
> +#else /* DATA_SIZE >= 16 */
> +DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX)
> +    (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr)
> +{
> +    DATA_TYPE res;
> +    ATOMIC_MMU_BODY;
> +    __atomic_load(haddr, &res, __ATOMIC_RELAXED);
> +    return res;
> +}
> +
> +DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), RE_SUFFIX)
> +    (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr)
> +{
> +    DATA_TYPE res;
> +    res = (glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX)
> +           (env, addr, oi, retaddr));
> +    return BSWAP(res);
> +}
> +
> +void glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX)
> +    (CPUArchState *env, target_ulong addr, DATA_TYPE val,
> +     TCGMemOpIdx oi, uintptr_t retaddr)
> +{
> +    ATOMIC_MMU_BODY;
> +    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
> +}
> +
> +void glue(glue(helper_atomic_st, SUFFIX), RE_SUFFIX)
> +    (CPUArchState *env, target_ulong addr, DATA_TYPE val,
> +     TCGMemOpIdx oi, uintptr_t retaddr)
> +{
> +    (glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX)
> +     (env, addr, BSWAP(val), oi, retaddr));
> +}
> +#endif /* DATA_SIZE < 16 */
>
>  #undef ATOMIC_MMU_BODY
>
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 4e60498..1304a42 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -1216,6 +1216,28 @@ GEN_ATOMIC_HELPER_ALL(xchg)
>  #undef GEN_ATOMIC_HELPER_ALL
>  #undef GEN_ATOMIC_HELPER
>
> +#ifdef CONFIG_ATOMIC128
> +#include "qemu/int128.h"
> +
> +/* These aren't really a "proper" helpers because TCG cannot manage Int128.
> +   However, use the same format as the others, for use by the backends. */
> +Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
> +                                     Int128 cmpv, Int128 newv,
> +                                     TCGMemOpIdx oi, uintptr_t retaddr);
> +Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
> +                                     Int128 cmpv, Int128 newv,
> +                                     TCGMemOpIdx oi, uintptr_t retaddr);
> +
> +Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
> +                                TCGMemOpIdx oi, uintptr_t retaddr);
> +Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
> +                                TCGMemOpIdx oi, uintptr_t retaddr);
> +void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
> +                              TCGMemOpIdx oi, uintptr_t retaddr);
> +void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
> +                              TCGMemOpIdx oi, uintptr_t retaddr);
> +
> +#endif /* CONFIG_ATOMIC128 */
>  #endif /* CONFIG_SOFTMMU */
>
>  #endif /* TCG_H */


--
Alex Bennée
diff mbox

Patch

diff --git a/configure b/configure
index 59ea124..586abd6 100755
--- a/configure
+++ b/configure
@@ -1201,7 +1201,10 @@  case "$cpu" in
            cc_i386='$(CC) -m32'
            ;;
     x86_64)
-           CPU_CFLAGS="-m64"
+           # ??? Only extremely old AMD cpus do not have cmpxchg16b.
+           # If we truly care, we should simply detect this case at
+           # runtime and generate the fallback to serial emulation.
+           CPU_CFLAGS="-m64 -mcx16"
            LDFLAGS="-m64 $LDFLAGS"
            cc_i386='$(CC) -m32'
            ;;
@@ -4434,6 +4437,26 @@  if compile_prog "" "" ; then
     int128=yes
 fi
 
+#########################################
+# See if 128-bit atomic operations are supported.
+
+atomic128=no
+if test "$int128" = "yes"; then
+  cat > $TMPC << EOF
+int main(void)
+{
+  unsigned __int128 x = 0, y = 0;
+  y = __atomic_load_16(&x, 0);
+  __atomic_store_16(&x, y, 0);
+  __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0);
+  return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+    atomic128=yes
+  fi
+fi
+
 ########################################
 # check if getauxval is available.
 
@@ -5383,6 +5406,10 @@  if test "$int128" = "yes" ; then
   echo "CONFIG_INT128=y" >> $config_host_mak
 fi
 
+if test "$atomic128" = "yes" ; then
+  echo "CONFIG_ATOMIC128=y" >> $config_host_mak
+fi
+
 if test "$getauxval" = "yes" ; then
   echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
 fi
diff --git a/cputlb.c b/cputlb.c
index 5272456..660f824 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -510,6 +510,12 @@  tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+
+#ifdef CONFIG_ATOMIC128
+#define SHIFT 4
+#include "softmmu_template.h"
+#endif
+
 #undef MMUSUFFIX
 
 #define MMUSUFFIX _cmmu
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index ab67275..5819da4 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -2,6 +2,7 @@ 
 #define INT128_H
 
 #ifdef CONFIG_INT128
+#include "qemu/bswap.h"
 
 typedef __int128 Int128;
 
@@ -137,6 +138,11 @@  static inline void int128_subfrom(Int128 *a, Int128 b)
     *a -= b;
 }
 
+static inline Int128 bswap128(Int128 a)
+{
+    return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a)));
+}
+
 #else /* !CONFIG_INT128 */
 
 /* Here we are catering to the ABI of the host.  If the host returns
diff --git a/softmmu_template.h b/softmmu_template.h
index 76712b9..0a9f49b 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -27,25 +27,30 @@ 
 
 #define DATA_SIZE (1 << SHIFT)
 
-#if DATA_SIZE == 8
-#define SUFFIX q
-#define LSUFFIX q
-#define SDATA_TYPE  int64_t
+#if DATA_SIZE == 16
+#define SUFFIX     o
+#define LSUFFIX    o
+#define SDATA_TYPE Int128
+#define DATA_TYPE  Int128
+#elif DATA_SIZE == 8
+#define SUFFIX     q
+#define LSUFFIX    q
+#define SDATA_TYPE int64_t
 #define DATA_TYPE  uint64_t
 #elif DATA_SIZE == 4
-#define SUFFIX l
-#define LSUFFIX l
-#define SDATA_TYPE  int32_t
+#define SUFFIX     l
+#define LSUFFIX    l
+#define SDATA_TYPE int32_t
 #define DATA_TYPE  uint32_t
 #elif DATA_SIZE == 2
-#define SUFFIX w
-#define LSUFFIX uw
-#define SDATA_TYPE  int16_t
+#define SUFFIX     w
+#define LSUFFIX    uw
+#define SDATA_TYPE int16_t
 #define DATA_TYPE  uint16_t
 #elif DATA_SIZE == 1
-#define SUFFIX b
-#define LSUFFIX ub
-#define SDATA_TYPE  int8_t
+#define SUFFIX     b
+#define LSUFFIX    ub
+#define SDATA_TYPE int8_t
 #define DATA_TYPE  uint8_t
 #else
 #error unsupported data size
@@ -56,7 +61,7 @@ 
    to the register size of the host.  This is tcg_target_long, except in the
    case of a 32-bit host and 64-bit data, and for that we always have
    uint64_t.  Don't bother with this widened value for SOFTMMU_CODE_ACCESS.  */
-#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8
+#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE >= 8
 # define WORD_TYPE  DATA_TYPE
 # define USUFFIX    SUFFIX
 #else
@@ -73,7 +78,9 @@ 
 #define ADDR_READ addr_read
 #endif
 
-#if DATA_SIZE == 8
+#if DATA_SIZE == 16
+# define BSWAP(X)  bswap128(X)
+#elif DATA_SIZE == 8
 # define BSWAP(X)  bswap64(X)
 #elif DATA_SIZE == 4
 # define BSWAP(X)  bswap32(X)
@@ -140,6 +147,7 @@ 
     vidx >= 0;                                                                \
 })
 
+#if DATA_SIZE < 16
 #ifndef SOFTMMU_CODE_ACCESS
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
                                               CPUIOTLBEntry *iotlbentry,
@@ -307,9 +315,10 @@  WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
     return res;
 }
 #endif /* DATA_SIZE > 1 */
+#endif /* DATA_SIZE < 16 */
 
 #ifndef SOFTMMU_CODE_ACCESS
-
+#if DATA_SIZE < 16
 /* Provide signed versions of the load routines as well.  We can of course
    avoid this for 64-bit data, or for 32-bit data on 32-bit host.  */
 #if DATA_SIZE * 8 < TCG_TARGET_REG_BITS
@@ -507,6 +516,7 @@  void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
     }
 }
 #endif
+#endif /* DATA_SIZE < 16 */
 
 #if DATA_SIZE == 1
 # define HE_SUFFIX  _mmu
@@ -573,9 +583,30 @@  DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX)
      TCGMemOpIdx oi, uintptr_t retaddr)
 {
     ATOMIC_MMU_BODY;
+#if DATA_SIZE < 16
     return atomic_cmpxchg(haddr, cmpv, newv);
+#else
+    __atomic_compare_exchange(haddr, &cmpv, &newv, false,
+                              __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+    return cmpv;
+#endif
 }
 
+#if DATA_SIZE > 1
+DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv,
+     TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE retv;
+    cmpv = BSWAP(cmpv);
+    newv = BSWAP(newv);
+    retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX)
+            (env, addr, cmpv, newv, oi, retaddr));
+    return BSWAP(retv);
+}
+#endif
+
+#if DATA_SIZE < 16
 #define GEN_ATOMIC_HELPER(NAME)                                         \
 DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), HE_SUFFIX)     \
     (CPUArchState *env, target_ulong addr, DATA_TYPE val,               \
@@ -600,18 +631,6 @@  GEN_ATOMIC_HELPER(xchg)
 #undef GEN_ATOMIC_HELPER
 
 #if DATA_SIZE > 1
-DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX)
-    (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv,
-     TCGMemOpIdx oi, uintptr_t retaddr)
-{
-    DATA_TYPE retv;
-    cmpv = BSWAP(cmpv);
-    newv = BSWAP(newv);
-    retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX)
-            (env, addr, cmpv, newv, oi, retaddr));
-    return BSWAP(retv);
-}
-
 #define GEN_ATOMIC_HELPER(NAME)                                         \
 DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), RE_SUFFIX)     \
     (CPUArchState *env, target_ulong addr, DATA_TYPE val,               \
@@ -676,6 +695,41 @@  DATA_TYPE glue(glue(helper_atomic_add_fetch, SUFFIX), RE_SUFFIX)
     }
 }
 #endif /* DATA_SIZE > 1 */
+#else /* DATA_SIZE >= 16 */
+DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE res;
+    ATOMIC_MMU_BODY;
+    __atomic_load(haddr, &res, __ATOMIC_RELAXED);
+    return res;
+}
+
+DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), RE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE res;
+    res = (glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX)
+           (env, addr, oi, retaddr));
+    return BSWAP(res);
+}
+
+void glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, DATA_TYPE val,
+     TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    ATOMIC_MMU_BODY;
+    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+}
+
+void glue(glue(helper_atomic_st, SUFFIX), RE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, DATA_TYPE val,
+     TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    (glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX)
+     (env, addr, BSWAP(val), oi, retaddr));
+}
+#endif /* DATA_SIZE < 16 */
 
 #undef ATOMIC_MMU_BODY
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 4e60498..1304a42 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -1216,6 +1216,28 @@  GEN_ATOMIC_HELPER_ALL(xchg)
 #undef GEN_ATOMIC_HELPER_ALL
 #undef GEN_ATOMIC_HELPER
 
+#ifdef CONFIG_ATOMIC128
+#include "qemu/int128.h"
+
+/* These aren't really a "proper" helpers because TCG cannot manage Int128.
+   However, use the same format as the others, for use by the backends. */
+Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
+                                     Int128 cmpv, Int128 newv,
+                                     TCGMemOpIdx oi, uintptr_t retaddr);
+Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
+                                     Int128 cmpv, Int128 newv,
+                                     TCGMemOpIdx oi, uintptr_t retaddr);
+
+Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
+                                TCGMemOpIdx oi, uintptr_t retaddr);
+Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
+                                TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+                              TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+                              TCGMemOpIdx oi, uintptr_t retaddr);
+
+#endif /* CONFIG_ATOMIC128 */
 #endif /* CONFIG_SOFTMMU */
 
 #endif /* TCG_H */