diff mbox

[RFC,v7,14/16] target-arm: translate: Use ld/st excl for atomic insns

Message ID 1454059965-23402-15-git-send-email-a.rigo@virtualopensystems.com (mailing list archive)
State New, archived
Headers show

Commit Message

alvise rigo Jan. 29, 2016, 9:32 a.m. UTC
Use the new LL/SC runtime helpers to handle the ARM atomic instructions
in softmmu_llsc_template.h.

In general, the helper generator
gen_{ldrex,strex}_{8,16a,32a,64a}() calls the function
helper_{le,be}_{ldlink,stcond}{ub,uw,ul,q}_mmu() implemented in
softmmu_llsc_template.h, doing an alignment check.

In addition, add a simple helper function to emulate the CLREX instruction.

Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
---
 target-arm/cpu.h       |   2 +
 target-arm/helper.h    |   4 ++
 target-arm/machine.c   |   2 +
 target-arm/op_helper.c |  10 +++
 target-arm/translate.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 202 insertions(+), 4 deletions(-)

Comments

Alex Bennée Feb. 18, 2016, 5:02 p.m. UTC | #1
Alvise Rigo <a.rigo@virtualopensystems.com> writes:

> Use the new LL/SC runtime helpers to handle the ARM atomic instructions
> in softmmu_llsc_template.h.
>
> In general, the helper generator
> gen_{ldrex,strex}_{8,16a,32a,64a}() calls the function
> helper_{le,be}_{ldlink,stcond}{ub,uw,ul,q}_mmu() implemented in
> softmmu_llsc_template.h, doing an alignment check.
>
> In addition, add a simple helper function to emulate the CLREX instruction.
>
> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
> ---
>  target-arm/cpu.h       |   2 +
>  target-arm/helper.h    |   4 ++
>  target-arm/machine.c   |   2 +
>  target-arm/op_helper.c |  10 +++
>  target-arm/translate.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++--
>  5 files changed, 202 insertions(+), 4 deletions(-)
>
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index b8b3364..bb5361f 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -462,9 +462,11 @@ typedef struct CPUARMState {
>          float_status fp_status;
>          float_status standard_fp_status;
>      } vfp;
> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>      uint64_t exclusive_addr;
>      uint64_t exclusive_val;
>      uint64_t exclusive_high;
> +#endif
>  #if defined(CONFIG_USER_ONLY)
>      uint64_t exclusive_test;
>      uint32_t exclusive_info;
> diff --git a/target-arm/helper.h b/target-arm/helper.h
> index c2a85c7..6bc3c0a 100644
> --- a/target-arm/helper.h
> +++ b/target-arm/helper.h
> @@ -532,6 +532,10 @@ DEF_HELPER_2(dc_zva, void, env, i64)
>  DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>  DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>
> +#ifdef CONFIG_ARM_USE_LDST_EXCL
> +DEF_HELPER_1(atomic_clear, void, env)
> +#endif
> +
>  #ifdef TARGET_AARCH64
>  #include "helper-a64.h"
>  #endif
> diff --git a/target-arm/machine.c b/target-arm/machine.c
> index ed1925a..7adfb4d 100644
> --- a/target-arm/machine.c
> +++ b/target-arm/machine.c
> @@ -309,9 +309,11 @@ const VMStateDescription vmstate_arm_cpu = {
>          VMSTATE_VARRAY_INT32(cpreg_vmstate_values, ARMCPU,
>                               cpreg_vmstate_array_len,
>                               0, vmstate_info_uint64, uint64_t),
> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>          VMSTATE_UINT64(env.exclusive_addr, ARMCPU),
>          VMSTATE_UINT64(env.exclusive_val, ARMCPU),
>          VMSTATE_UINT64(env.exclusive_high, ARMCPU),
> +#endif

Hmm this does imply we either need to support migration of the LL/SC
state in the generic code or map the generic state into the ARM specific
machine state or we'll break migration.

The later if probably better so you can save machine state from a
pre-LL/SC build and migrate to a new LL/SC enabled build.

>          VMSTATE_UINT64(env.features, ARMCPU),
>          VMSTATE_UINT32(env.exception.syndrome, ARMCPU),
>          VMSTATE_UINT32(env.exception.fsr, ARMCPU),
> diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
> index a5ee65f..404c13b 100644
> --- a/target-arm/op_helper.c
> +++ b/target-arm/op_helper.c
> @@ -51,6 +51,14 @@ static int exception_target_el(CPUARMState *env)
>      return target_el;
>  }
>
> +#ifdef CONFIG_ARM_USE_LDST_EXCL
> +void HELPER(atomic_clear)(CPUARMState *env)
> +{
> +    ENV_GET_CPU(env)->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
> +    ENV_GET_CPU(env)->ll_sc_context = false;
> +}
> +#endif
> +

Given this is just touching generic CPU state this helper should probably be
part of the generic TCG runtime. I assume other arches will just call
this helper as well.


>  uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def,
>                            uint32_t rn, uint32_t maxindex)
>  {
> @@ -689,7 +697,9 @@ void HELPER(exception_return)(CPUARMState *env)
>
>      aarch64_save_sp(env, cur_el);
>
> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>      env->exclusive_addr = -1;
> +#endif
>
>      /* We must squash the PSTATE.SS bit to zero unless both of the
>       * following hold:
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index cff511b..5150841 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -60,8 +60,10 @@ TCGv_ptr cpu_env;
>  static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
>  static TCGv_i32 cpu_R[16];
>  TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>  TCGv_i64 cpu_exclusive_addr;
>  TCGv_i64 cpu_exclusive_val;
> +#endif
>  #ifdef CONFIG_USER_ONLY
>  TCGv_i64 cpu_exclusive_test;
>  TCGv_i32 cpu_exclusive_info;
> @@ -94,10 +96,12 @@ void arm_translate_init(void)
>      cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
>      cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
>
> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>      cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
>          offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
>      cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
>          offsetof(CPUARMState, exclusive_val), "exclusive_val");
> +#endif
>  #ifdef CONFIG_USER_ONLY
>      cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
>          offsetof(CPUARMState, exclusive_test), "exclusive_test");
> @@ -7413,15 +7417,145 @@ static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
>      tcg_gen_or_i32(cpu_ZF, lo, hi);
>  }
>
> -/* Load/Store exclusive instructions are implemented by remembering
> +/* If the softmmu is enabled, the translation of Load/Store exclusive
> +   instructions will rely on the gen_helper_{ldlink,stcond} helpers,
> +   offloading most of the work to the softmmu_llsc_template.h functions.
> +   All the accesses made by the exclusive instructions include an
> +   alignment check.
> +
> +   Otherwise, these instructions are implemented by remembering
>     the value/address loaded, and seeing if these are the same
>     when the store is performed. This should be sufficient to implement
>     the architecturally mandated semantics, and avoids having to monitor
>     regular stores.
>
> -   In system emulation mode only one CPU will be running at once, so
> -   this sequence is effectively atomic.  In user emulation mode we
> -   throw an exception and handle the atomic operation elsewhere.  */
> +   In user emulation mode we throw an exception and handle the atomic
> +   operation elsewhere.  */
> +#ifdef CONFIG_ARM_USE_LDST_EXCL
> +
> +#if TARGET_LONG_BITS == 32
> +#define DO_GEN_LDREX(SUFF)                                             \
> +static inline void gen_ldrex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
> +                                    TCGv_i32 index)                    \
> +{                                                                      \
> +    gen_helper_ldlink_##SUFF(dst, cpu_env, addr, index);               \
> +}
> +
> +#define DO_GEN_STREX(SUFF)                                             \
> +static inline void gen_strex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
> +                                    TCGv_i32 val, TCGv_i32 index)      \
> +{                                                                      \
> +    gen_helper_stcond_##SUFF(dst, cpu_env, addr, val, index);          \
> +}
> +
> +static inline void gen_ldrex_i64a(TCGv_i64 dst, TCGv_i32 addr, TCGv_i32 index)
> +{
> +    gen_helper_ldlink_i64a(dst, cpu_env, addr, index);
> +}
> +
> +static inline void gen_strex_i64a(TCGv_i32 dst, TCGv_i32 addr, TCGv_i64 val,
> +                                  TCGv_i32 index)
> +{
> +
> +    gen_helper_stcond_i64a(dst, cpu_env, addr, val, index);
> +}
> +#else
> +#define DO_GEN_LDREX(SUFF)                                             \
> +static inline void gen_ldrex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
> +                                         TCGv_i32 index)               \
> +{                                                                      \
> +    TCGv addr64 = tcg_temp_new();                                      \
> +    tcg_gen_extu_i32_i64(addr64, addr);                                \
> +    gen_helper_ldlink_##SUFF(dst, cpu_env, addr64, index);             \
> +    tcg_temp_free(addr64);                                             \
> +}
> +
> +#define DO_GEN_STREX(SUFF)                                             \
> +static inline void gen_strex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
> +                                    TCGv_i32 val, TCGv_i32 index)      \
> +{                                                                      \
> +    TCGv addr64 = tcg_temp_new();                                      \
> +    TCGv dst64 = tcg_temp_new();                                       \
> +    tcg_gen_extu_i32_i64(addr64, addr);                                \
> +    gen_helper_stcond_##SUFF(dst64, cpu_env, addr64, val, index);      \
> +    tcg_gen_extrl_i64_i32(dst, dst64);                                 \
> +    tcg_temp_free(dst64);                                              \
> +    tcg_temp_free(addr64);                                             \
> +}
> +
> +static inline void gen_ldrex_i64a(TCGv_i64 dst, TCGv_i32 addr, TCGv_i32 index)
> +{
> +    TCGv addr64 = tcg_temp_new();
> +    tcg_gen_extu_i32_i64(addr64, addr);
> +    gen_helper_ldlink_i64a(dst, cpu_env, addr64, index);
> +    tcg_temp_free(addr64);
> +}
> +
> +static inline void gen_strex_i64a(TCGv_i32 dst, TCGv_i32 addr, TCGv_i64 val,
> +                                  TCGv_i32 index)
> +{
> +    TCGv addr64 = tcg_temp_new();
> +    TCGv dst64 = tcg_temp_new();
> +
> +    tcg_gen_extu_i32_i64(addr64, addr);
> +    gen_helper_stcond_i64a(dst64, cpu_env, addr64, val, index);
> +    tcg_gen_extrl_i64_i32(dst, dst64);
> +
> +    tcg_temp_free(dst64);
> +    tcg_temp_free(addr64);
> +}
> +#endif
> +
> +#if defined(CONFIG_ARM_USE_LDST_EXCL)
> +DO_GEN_LDREX(i8)
> +DO_GEN_LDREX(i16a)
> +DO_GEN_LDREX(i32a)
> +
> +DO_GEN_STREX(i8)
> +DO_GEN_STREX(i16a)
> +DO_GEN_STREX(i32a)
> +#endif
> +
> +static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
> +                               TCGv_i32 addr, int size)
> + {
> +    TCGv_i32 tmp = tcg_temp_new_i32();
> +    TCGv_i32 mem_idx = tcg_temp_new_i32();
> +
> +    tcg_gen_movi_i32(mem_idx, get_mem_index(s));
> +
> +    if (size != 3) {
> +        switch (size) {
> +        case 0:
> +            gen_ldrex_i8(tmp, addr, mem_idx);
> +            break;
> +        case 1:
> +            gen_ldrex_i16a(tmp, addr, mem_idx);
> +            break;
> +        case 2:
> +            gen_ldrex_i32a(tmp, addr, mem_idx);
> +            break;
> +        default:
> +            abort();
> +        }
> +
> +        store_reg(s, rt, tmp);
> +    } else {
> +        TCGv_i64 tmp64 = tcg_temp_new_i64();
> +        TCGv_i32 tmph = tcg_temp_new_i32();
> +
> +        gen_ldrex_i64a(tmp64, addr, mem_idx);
> +        tcg_gen_extr_i64_i32(tmp, tmph, tmp64);
> +
> +        store_reg(s, rt, tmp);
> +        store_reg(s, rt2, tmph);
> +
> +        tcg_temp_free_i64(tmp64);
> +    }
> +
> +    tcg_temp_free_i32(mem_idx);
> +}
> +#else
>  static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>                                 TCGv_i32 addr, int size)
>  {
> @@ -7460,10 +7594,15 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>      store_reg(s, rt, tmp);
>      tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
>  }
> +#endif
>
>  static void gen_clrex(DisasContext *s)
>  {
> +#ifdef CONFIG_ARM_USE_LDST_EXCL
> +    gen_helper_atomic_clear(cpu_env);
> +#else
>      tcg_gen_movi_i64(cpu_exclusive_addr, -1);
> +#endif
>  }
>
>  #ifdef CONFIG_USER_ONLY
> @@ -7475,6 +7614,47 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>                       size | (rd << 4) | (rt << 8) | (rt2 << 12));
>      gen_exception_internal_insn(s, 4, EXCP_STREX);
>  }
> +#elif defined CONFIG_ARM_USE_LDST_EXCL
> +static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
> +                                TCGv_i32 addr, int size)
> +{
> +    TCGv_i32 tmp, mem_idx;
> +
> +    mem_idx = tcg_temp_new_i32();
> +
> +    tcg_gen_movi_i32(mem_idx, get_mem_index(s));
> +    tmp = load_reg(s, rt);
> +
> +    if (size != 3) {
> +        switch (size) {
> +        case 0:
> +            gen_strex_i8(cpu_R[rd], addr, tmp, mem_idx);
> +            break;
> +        case 1:
> +            gen_strex_i16a(cpu_R[rd], addr, tmp, mem_idx);
> +            break;
> +        case 2:
> +            gen_strex_i32a(cpu_R[rd], addr, tmp, mem_idx);
> +            break;
> +        default:
> +            abort();
> +        }
> +    } else {
> +        TCGv_i64 tmp64;
> +        TCGv_i32 tmp2;
> +
> +        tmp64 = tcg_temp_new_i64();
> +        tmp2 = load_reg(s, rt2);
> +        tcg_gen_concat_i32_i64(tmp64, tmp, tmp2);
> +        gen_strex_i64a(cpu_R[rd], addr, tmp64, mem_idx);
> +
> +        tcg_temp_free_i32(tmp2);
> +        tcg_temp_free_i64(tmp64);
> +    }
> +
> +    tcg_temp_free_i32(tmp);
> +    tcg_temp_free_i32(mem_idx);
> +}
>  #else
>  static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>                                  TCGv_i32 addr, int size)


--
Alex Bennée
alvise rigo March 7, 2016, 6:39 p.m. UTC | #2
On Thu, Feb 18, 2016 at 6:02 PM, Alex Bennée <alex.bennee@linaro.org> wrote:
>
> Alvise Rigo <a.rigo@virtualopensystems.com> writes:
>
>> Use the new LL/SC runtime helpers to handle the ARM atomic instructions
>> in softmmu_llsc_template.h.
>>
>> In general, the helper generator
>> gen_{ldrex,strex}_{8,16a,32a,64a}() calls the function
>> helper_{le,be}_{ldlink,stcond}{ub,uw,ul,q}_mmu() implemented in
>> softmmu_llsc_template.h, doing an alignment check.
>>
>> In addition, add a simple helper function to emulate the CLREX instruction.
>>
>> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
>> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
>> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
>> ---
>>  target-arm/cpu.h       |   2 +
>>  target-arm/helper.h    |   4 ++
>>  target-arm/machine.c   |   2 +
>>  target-arm/op_helper.c |  10 +++
>>  target-arm/translate.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++--
>>  5 files changed, 202 insertions(+), 4 deletions(-)
>>
>> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
>> index b8b3364..bb5361f 100644
>> --- a/target-arm/cpu.h
>> +++ b/target-arm/cpu.h
>> @@ -462,9 +462,11 @@ typedef struct CPUARMState {
>>          float_status fp_status;
>>          float_status standard_fp_status;
>>      } vfp;
>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>      uint64_t exclusive_addr;
>>      uint64_t exclusive_val;
>>      uint64_t exclusive_high;
>> +#endif
>>  #if defined(CONFIG_USER_ONLY)
>>      uint64_t exclusive_test;
>>      uint32_t exclusive_info;
>> diff --git a/target-arm/helper.h b/target-arm/helper.h
>> index c2a85c7..6bc3c0a 100644
>> --- a/target-arm/helper.h
>> +++ b/target-arm/helper.h
>> @@ -532,6 +532,10 @@ DEF_HELPER_2(dc_zva, void, env, i64)
>>  DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>>  DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>>
>> +#ifdef CONFIG_ARM_USE_LDST_EXCL
>> +DEF_HELPER_1(atomic_clear, void, env)
>> +#endif
>> +
>>  #ifdef TARGET_AARCH64
>>  #include "helper-a64.h"
>>  #endif
>> diff --git a/target-arm/machine.c b/target-arm/machine.c
>> index ed1925a..7adfb4d 100644
>> --- a/target-arm/machine.c
>> +++ b/target-arm/machine.c
>> @@ -309,9 +309,11 @@ const VMStateDescription vmstate_arm_cpu = {
>>          VMSTATE_VARRAY_INT32(cpreg_vmstate_values, ARMCPU,
>>                               cpreg_vmstate_array_len,
>>                               0, vmstate_info_uint64, uint64_t),
>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>          VMSTATE_UINT64(env.exclusive_addr, ARMCPU),
>>          VMSTATE_UINT64(env.exclusive_val, ARMCPU),
>>          VMSTATE_UINT64(env.exclusive_high, ARMCPU),
>> +#endif
>
> Hmm this does imply we either need to support migration of the LL/SC
> state in the generic code or map the generic state into the ARM specific
> machine state or we'll break migration.
>
> The later if probably better so you can save machine state from a
> pre-LL/SC build and migrate to a new LL/SC enabled build.

This basically would require to add in cpu_pre_save some code to copy
env.exclusive_* to the new structures. As a consequence, this will not
get rid of the variables pre-LL/SC.

>
>>          VMSTATE_UINT64(env.features, ARMCPU),
>>          VMSTATE_UINT32(env.exception.syndrome, ARMCPU),
>>          VMSTATE_UINT32(env.exception.fsr, ARMCPU),
>> diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
>> index a5ee65f..404c13b 100644
>> --- a/target-arm/op_helper.c
>> +++ b/target-arm/op_helper.c
>> @@ -51,6 +51,14 @@ static int exception_target_el(CPUARMState *env)
>>      return target_el;
>>  }
>>
>> +#ifdef CONFIG_ARM_USE_LDST_EXCL
>> +void HELPER(atomic_clear)(CPUARMState *env)
>> +{
>> +    ENV_GET_CPU(env)->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
>> +    ENV_GET_CPU(env)->ll_sc_context = false;
>> +}
>> +#endif
>> +
>
> Given this is just touching generic CPU state this helper should probably be
> part of the generic TCG runtime. I assume other arches will just call
> this helper as well.

Would it make sense instead to add a new CPUClass hook for this? Other
architectures might want a different behaviour (or add something
else).

Thank you,
alvise

>
>
>>  uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def,
>>                            uint32_t rn, uint32_t maxindex)
>>  {
>> @@ -689,7 +697,9 @@ void HELPER(exception_return)(CPUARMState *env)
>>
>>      aarch64_save_sp(env, cur_el);
>>
>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>      env->exclusive_addr = -1;
>> +#endif
>>
>>      /* We must squash the PSTATE.SS bit to zero unless both of the
>>       * following hold:
>> diff --git a/target-arm/translate.c b/target-arm/translate.c
>> index cff511b..5150841 100644
>> --- a/target-arm/translate.c
>> +++ b/target-arm/translate.c
>> @@ -60,8 +60,10 @@ TCGv_ptr cpu_env;
>>  static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
>>  static TCGv_i32 cpu_R[16];
>>  TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>  TCGv_i64 cpu_exclusive_addr;
>>  TCGv_i64 cpu_exclusive_val;
>> +#endif
>>  #ifdef CONFIG_USER_ONLY
>>  TCGv_i64 cpu_exclusive_test;
>>  TCGv_i32 cpu_exclusive_info;
>> @@ -94,10 +96,12 @@ void arm_translate_init(void)
>>      cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
>>      cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
>>
>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>      cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
>>          offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
>>      cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
>>          offsetof(CPUARMState, exclusive_val), "exclusive_val");
>> +#endif
>>  #ifdef CONFIG_USER_ONLY
>>      cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
>>          offsetof(CPUARMState, exclusive_test), "exclusive_test");
>> @@ -7413,15 +7417,145 @@ static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
>>      tcg_gen_or_i32(cpu_ZF, lo, hi);
>>  }
>>
>> -/* Load/Store exclusive instructions are implemented by remembering
>> +/* If the softmmu is enabled, the translation of Load/Store exclusive
>> +   instructions will rely on the gen_helper_{ldlink,stcond} helpers,
>> +   offloading most of the work to the softmmu_llsc_template.h functions.
>> +   All the accesses made by the exclusive instructions include an
>> +   alignment check.
>> +
>> +   Otherwise, these instructions are implemented by remembering
>>     the value/address loaded, and seeing if these are the same
>>     when the store is performed. This should be sufficient to implement
>>     the architecturally mandated semantics, and avoids having to monitor
>>     regular stores.
>>
>> -   In system emulation mode only one CPU will be running at once, so
>> -   this sequence is effectively atomic.  In user emulation mode we
>> -   throw an exception and handle the atomic operation elsewhere.  */
>> +   In user emulation mode we throw an exception and handle the atomic
>> +   operation elsewhere.  */
>> +#ifdef CONFIG_ARM_USE_LDST_EXCL
>> +
>> +#if TARGET_LONG_BITS == 32
>> +#define DO_GEN_LDREX(SUFF)                                             \
>> +static inline void gen_ldrex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
>> +                                    TCGv_i32 index)                    \
>> +{                                                                      \
>> +    gen_helper_ldlink_##SUFF(dst, cpu_env, addr, index);               \
>> +}
>> +
>> +#define DO_GEN_STREX(SUFF)                                             \
>> +static inline void gen_strex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
>> +                                    TCGv_i32 val, TCGv_i32 index)      \
>> +{                                                                      \
>> +    gen_helper_stcond_##SUFF(dst, cpu_env, addr, val, index);          \
>> +}
>> +
>> +static inline void gen_ldrex_i64a(TCGv_i64 dst, TCGv_i32 addr, TCGv_i32 index)
>> +{
>> +    gen_helper_ldlink_i64a(dst, cpu_env, addr, index);
>> +}
>> +
>> +static inline void gen_strex_i64a(TCGv_i32 dst, TCGv_i32 addr, TCGv_i64 val,
>> +                                  TCGv_i32 index)
>> +{
>> +
>> +    gen_helper_stcond_i64a(dst, cpu_env, addr, val, index);
>> +}
>> +#else
>> +#define DO_GEN_LDREX(SUFF)                                             \
>> +static inline void gen_ldrex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
>> +                                         TCGv_i32 index)               \
>> +{                                                                      \
>> +    TCGv addr64 = tcg_temp_new();                                      \
>> +    tcg_gen_extu_i32_i64(addr64, addr);                                \
>> +    gen_helper_ldlink_##SUFF(dst, cpu_env, addr64, index);             \
>> +    tcg_temp_free(addr64);                                             \
>> +}
>> +
>> +#define DO_GEN_STREX(SUFF)                                             \
>> +static inline void gen_strex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
>> +                                    TCGv_i32 val, TCGv_i32 index)      \
>> +{                                                                      \
>> +    TCGv addr64 = tcg_temp_new();                                      \
>> +    TCGv dst64 = tcg_temp_new();                                       \
>> +    tcg_gen_extu_i32_i64(addr64, addr);                                \
>> +    gen_helper_stcond_##SUFF(dst64, cpu_env, addr64, val, index);      \
>> +    tcg_gen_extrl_i64_i32(dst, dst64);                                 \
>> +    tcg_temp_free(dst64);                                              \
>> +    tcg_temp_free(addr64);                                             \
>> +}
>> +
>> +static inline void gen_ldrex_i64a(TCGv_i64 dst, TCGv_i32 addr, TCGv_i32 index)
>> +{
>> +    TCGv addr64 = tcg_temp_new();
>> +    tcg_gen_extu_i32_i64(addr64, addr);
>> +    gen_helper_ldlink_i64a(dst, cpu_env, addr64, index);
>> +    tcg_temp_free(addr64);
>> +}
>> +
>> +static inline void gen_strex_i64a(TCGv_i32 dst, TCGv_i32 addr, TCGv_i64 val,
>> +                                  TCGv_i32 index)
>> +{
>> +    TCGv addr64 = tcg_temp_new();
>> +    TCGv dst64 = tcg_temp_new();
>> +
>> +    tcg_gen_extu_i32_i64(addr64, addr);
>> +    gen_helper_stcond_i64a(dst64, cpu_env, addr64, val, index);
>> +    tcg_gen_extrl_i64_i32(dst, dst64);
>> +
>> +    tcg_temp_free(dst64);
>> +    tcg_temp_free(addr64);
>> +}
>> +#endif
>> +
>> +#if defined(CONFIG_ARM_USE_LDST_EXCL)
>> +DO_GEN_LDREX(i8)
>> +DO_GEN_LDREX(i16a)
>> +DO_GEN_LDREX(i32a)
>> +
>> +DO_GEN_STREX(i8)
>> +DO_GEN_STREX(i16a)
>> +DO_GEN_STREX(i32a)
>> +#endif
>> +
>> +static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>> +                               TCGv_i32 addr, int size)
>> + {
>> +    TCGv_i32 tmp = tcg_temp_new_i32();
>> +    TCGv_i32 mem_idx = tcg_temp_new_i32();
>> +
>> +    tcg_gen_movi_i32(mem_idx, get_mem_index(s));
>> +
>> +    if (size != 3) {
>> +        switch (size) {
>> +        case 0:
>> +            gen_ldrex_i8(tmp, addr, mem_idx);
>> +            break;
>> +        case 1:
>> +            gen_ldrex_i16a(tmp, addr, mem_idx);
>> +            break;
>> +        case 2:
>> +            gen_ldrex_i32a(tmp, addr, mem_idx);
>> +            break;
>> +        default:
>> +            abort();
>> +        }
>> +
>> +        store_reg(s, rt, tmp);
>> +    } else {
>> +        TCGv_i64 tmp64 = tcg_temp_new_i64();
>> +        TCGv_i32 tmph = tcg_temp_new_i32();
>> +
>> +        gen_ldrex_i64a(tmp64, addr, mem_idx);
>> +        tcg_gen_extr_i64_i32(tmp, tmph, tmp64);
>> +
>> +        store_reg(s, rt, tmp);
>> +        store_reg(s, rt2, tmph);
>> +
>> +        tcg_temp_free_i64(tmp64);
>> +    }
>> +
>> +    tcg_temp_free_i32(mem_idx);
>> +}
>> +#else
>>  static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>>                                 TCGv_i32 addr, int size)
>>  {
>> @@ -7460,10 +7594,15 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>>      store_reg(s, rt, tmp);
>>      tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
>>  }
>> +#endif
>>
>>  static void gen_clrex(DisasContext *s)
>>  {
>> +#ifdef CONFIG_ARM_USE_LDST_EXCL
>> +    gen_helper_atomic_clear(cpu_env);
>> +#else
>>      tcg_gen_movi_i64(cpu_exclusive_addr, -1);
>> +#endif
>>  }
>>
>>  #ifdef CONFIG_USER_ONLY
>> @@ -7475,6 +7614,47 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>>                       size | (rd << 4) | (rt << 8) | (rt2 << 12));
>>      gen_exception_internal_insn(s, 4, EXCP_STREX);
>>  }
>> +#elif defined CONFIG_ARM_USE_LDST_EXCL
>> +static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>> +                                TCGv_i32 addr, int size)
>> +{
>> +    TCGv_i32 tmp, mem_idx;
>> +
>> +    mem_idx = tcg_temp_new_i32();
>> +
>> +    tcg_gen_movi_i32(mem_idx, get_mem_index(s));
>> +    tmp = load_reg(s, rt);
>> +
>> +    if (size != 3) {
>> +        switch (size) {
>> +        case 0:
>> +            gen_strex_i8(cpu_R[rd], addr, tmp, mem_idx);
>> +            break;
>> +        case 1:
>> +            gen_strex_i16a(cpu_R[rd], addr, tmp, mem_idx);
>> +            break;
>> +        case 2:
>> +            gen_strex_i32a(cpu_R[rd], addr, tmp, mem_idx);
>> +            break;
>> +        default:
>> +            abort();
>> +        }
>> +    } else {
>> +        TCGv_i64 tmp64;
>> +        TCGv_i32 tmp2;
>> +
>> +        tmp64 = tcg_temp_new_i64();
>> +        tmp2 = load_reg(s, rt2);
>> +        tcg_gen_concat_i32_i64(tmp64, tmp, tmp2);
>> +        gen_strex_i64a(cpu_R[rd], addr, tmp64, mem_idx);
>> +
>> +        tcg_temp_free_i32(tmp2);
>> +        tcg_temp_free_i64(tmp64);
>> +    }
>> +
>> +    tcg_temp_free_i32(tmp);
>> +    tcg_temp_free_i32(mem_idx);
>> +}
>>  #else
>>  static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>>                                  TCGv_i32 addr, int size)
>
>
> --
> Alex Bennée
Alex Bennée March 7, 2016, 8:06 p.m. UTC | #3
alvise rigo <a.rigo@virtualopensystems.com> writes:

> On Thu, Feb 18, 2016 at 6:02 PM, Alex Bennée <alex.bennee@linaro.org> wrote:
>>
>> Alvise Rigo <a.rigo@virtualopensystems.com> writes:
>>
>>> Use the new LL/SC runtime helpers to handle the ARM atomic instructions
>>> in softmmu_llsc_template.h.
>>>
>>> In general, the helper generator
>>> gen_{ldrex,strex}_{8,16a,32a,64a}() calls the function
>>> helper_{le,be}_{ldlink,stcond}{ub,uw,ul,q}_mmu() implemented in
>>> softmmu_llsc_template.h, doing an alignment check.
>>>
>>> In addition, add a simple helper function to emulate the CLREX instruction.
>>>
>>> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
>>> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
>>> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
>>> ---
>>>  target-arm/cpu.h       |   2 +
>>>  target-arm/helper.h    |   4 ++
>>>  target-arm/machine.c   |   2 +
>>>  target-arm/op_helper.c |  10 +++
>>>  target-arm/translate.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++--
>>>  5 files changed, 202 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
>>> index b8b3364..bb5361f 100644
>>> --- a/target-arm/cpu.h
>>> +++ b/target-arm/cpu.h
>>> @@ -462,9 +462,11 @@ typedef struct CPUARMState {
>>>          float_status fp_status;
>>>          float_status standard_fp_status;
>>>      } vfp;
>>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>>      uint64_t exclusive_addr;
>>>      uint64_t exclusive_val;
>>>      uint64_t exclusive_high;
>>> +#endif
>>>  #if defined(CONFIG_USER_ONLY)
>>>      uint64_t exclusive_test;
>>>      uint32_t exclusive_info;
>>> diff --git a/target-arm/helper.h b/target-arm/helper.h
>>> index c2a85c7..6bc3c0a 100644
>>> --- a/target-arm/helper.h
>>> +++ b/target-arm/helper.h
>>> @@ -532,6 +532,10 @@ DEF_HELPER_2(dc_zva, void, env, i64)
>>>  DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>>>  DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>>>
>>> +#ifdef CONFIG_ARM_USE_LDST_EXCL
>>> +DEF_HELPER_1(atomic_clear, void, env)
>>> +#endif
>>> +
>>>  #ifdef TARGET_AARCH64
>>>  #include "helper-a64.h"
>>>  #endif
>>> diff --git a/target-arm/machine.c b/target-arm/machine.c
>>> index ed1925a..7adfb4d 100644
>>> --- a/target-arm/machine.c
>>> +++ b/target-arm/machine.c
>>> @@ -309,9 +309,11 @@ const VMStateDescription vmstate_arm_cpu = {
>>>          VMSTATE_VARRAY_INT32(cpreg_vmstate_values, ARMCPU,
>>>                               cpreg_vmstate_array_len,
>>>                               0, vmstate_info_uint64, uint64_t),
>>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>>          VMSTATE_UINT64(env.exclusive_addr, ARMCPU),
>>>          VMSTATE_UINT64(env.exclusive_val, ARMCPU),
>>>          VMSTATE_UINT64(env.exclusive_high, ARMCPU),
>>> +#endif
>>
>> Hmm this does imply we either need to support migration of the LL/SC
>> state in the generic code or map the generic state into the ARM specific
>> machine state or we'll break migration.
>>
>> The later if probably better so you can save machine state from a
>> pre-LL/SC build and migrate to a new LL/SC enabled build.
>
> This basically would require to add in cpu_pre_save some code to copy
> env.exclusive_* to the new structures. As a consequence, this will not
> get rid of the variables pre-LL/SC.

I wonder what the others think but I think breaking any existing TCG
state on migration would be against the spirit of the thing, but I could
be wrong. If we do break migration we'll need to at least bump the
version tag.

>
>>
>>>          VMSTATE_UINT64(env.features, ARMCPU),
>>>          VMSTATE_UINT32(env.exception.syndrome, ARMCPU),
>>>          VMSTATE_UINT32(env.exception.fsr, ARMCPU),
>>> diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
>>> index a5ee65f..404c13b 100644
>>> --- a/target-arm/op_helper.c
>>> +++ b/target-arm/op_helper.c
>>> @@ -51,6 +51,14 @@ static int exception_target_el(CPUARMState *env)
>>>      return target_el;
>>>  }
>>>
>>> +#ifdef CONFIG_ARM_USE_LDST_EXCL
>>> +void HELPER(atomic_clear)(CPUARMState *env)
>>> +{
>>> +    ENV_GET_CPU(env)->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
>>> +    ENV_GET_CPU(env)->ll_sc_context = false;
>>> +}
>>> +#endif
>>> +
>>
>> Given this is just touching generic CPU state this helper should probably be
>> part of the generic TCG runtime. I assume other arches will just call
>> this helper as well.
>
> Would it make sense instead to add a new CPUClass hook for this? Other
> architectures might want a different behaviour (or add something
> else).

Yes. Best of both worlds a generic helper with an option to vary it if needed.

>
> Thank you,
> alvise
>
>>
>>
>>>  uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def,
>>>                            uint32_t rn, uint32_t maxindex)
>>>  {
>>> @@ -689,7 +697,9 @@ void HELPER(exception_return)(CPUARMState *env)
>>>
>>>      aarch64_save_sp(env, cur_el);
>>>
>>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>>      env->exclusive_addr = -1;
>>> +#endif
>>>
>>>      /* We must squash the PSTATE.SS bit to zero unless both of the
>>>       * following hold:
>>> diff --git a/target-arm/translate.c b/target-arm/translate.c
>>> index cff511b..5150841 100644
>>> --- a/target-arm/translate.c
>>> +++ b/target-arm/translate.c
>>> @@ -60,8 +60,10 @@ TCGv_ptr cpu_env;
>>>  static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
>>>  static TCGv_i32 cpu_R[16];
>>>  TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
>>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>>  TCGv_i64 cpu_exclusive_addr;
>>>  TCGv_i64 cpu_exclusive_val;
>>> +#endif
>>>  #ifdef CONFIG_USER_ONLY
>>>  TCGv_i64 cpu_exclusive_test;
>>>  TCGv_i32 cpu_exclusive_info;
>>> @@ -94,10 +96,12 @@ void arm_translate_init(void)
>>>      cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
>>>      cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
>>>
>>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>>      cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
>>>          offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
>>>      cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
>>>          offsetof(CPUARMState, exclusive_val), "exclusive_val");
>>> +#endif
>>>  #ifdef CONFIG_USER_ONLY
>>>      cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
>>>          offsetof(CPUARMState, exclusive_test), "exclusive_test");
>>> @@ -7413,15 +7417,145 @@ static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
>>>      tcg_gen_or_i32(cpu_ZF, lo, hi);
>>>  }
>>>
>>> -/* Load/Store exclusive instructions are implemented by remembering
>>> +/* If the softmmu is enabled, the translation of Load/Store exclusive
>>> +   instructions will rely on the gen_helper_{ldlink,stcond} helpers,
>>> +   offloading most of the work to the softmmu_llsc_template.h functions.
>>> +   All the accesses made by the exclusive instructions include an
>>> +   alignment check.
>>> +
>>> +   Otherwise, these instructions are implemented by remembering
>>>     the value/address loaded, and seeing if these are the same
>>>     when the store is performed. This should be sufficient to implement
>>>     the architecturally mandated semantics, and avoids having to monitor
>>>     regular stores.
>>>
>>> -   In system emulation mode only one CPU will be running at once, so
>>> -   this sequence is effectively atomic.  In user emulation mode we
>>> -   throw an exception and handle the atomic operation elsewhere.  */
>>> +   In user emulation mode we throw an exception and handle the atomic
>>> +   operation elsewhere.  */
>>> +#ifdef CONFIG_ARM_USE_LDST_EXCL
>>> +
>>> +#if TARGET_LONG_BITS == 32
>>> +#define DO_GEN_LDREX(SUFF)                                             \
>>> +static inline void gen_ldrex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
>>> +                                    TCGv_i32 index)                    \
>>> +{                                                                      \
>>> +    gen_helper_ldlink_##SUFF(dst, cpu_env, addr, index);               \
>>> +}
>>> +
>>> +#define DO_GEN_STREX(SUFF)                                             \
>>> +static inline void gen_strex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
>>> +                                    TCGv_i32 val, TCGv_i32 index)      \
>>> +{                                                                      \
>>> +    gen_helper_stcond_##SUFF(dst, cpu_env, addr, val, index);          \
>>> +}
>>> +
>>> +static inline void gen_ldrex_i64a(TCGv_i64 dst, TCGv_i32 addr, TCGv_i32 index)
>>> +{
>>> +    gen_helper_ldlink_i64a(dst, cpu_env, addr, index);
>>> +}
>>> +
>>> +static inline void gen_strex_i64a(TCGv_i32 dst, TCGv_i32 addr, TCGv_i64 val,
>>> +                                  TCGv_i32 index)
>>> +{
>>> +
>>> +    gen_helper_stcond_i64a(dst, cpu_env, addr, val, index);
>>> +}
>>> +#else
>>> +#define DO_GEN_LDREX(SUFF)                                             \
>>> +static inline void gen_ldrex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
>>> +                                         TCGv_i32 index)               \
>>> +{                                                                      \
>>> +    TCGv addr64 = tcg_temp_new();                                      \
>>> +    tcg_gen_extu_i32_i64(addr64, addr);                                \
>>> +    gen_helper_ldlink_##SUFF(dst, cpu_env, addr64, index);             \
>>> +    tcg_temp_free(addr64);                                             \
>>> +}
>>> +
>>> +#define DO_GEN_STREX(SUFF)                                             \
>>> +static inline void gen_strex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
>>> +                                    TCGv_i32 val, TCGv_i32 index)      \
>>> +{                                                                      \
>>> +    TCGv addr64 = tcg_temp_new();                                      \
>>> +    TCGv dst64 = tcg_temp_new();                                       \
>>> +    tcg_gen_extu_i32_i64(addr64, addr);                                \
>>> +    gen_helper_stcond_##SUFF(dst64, cpu_env, addr64, val, index);      \
>>> +    tcg_gen_extrl_i64_i32(dst, dst64);                                 \
>>> +    tcg_temp_free(dst64);                                              \
>>> +    tcg_temp_free(addr64);                                             \
>>> +}
>>> +
>>> +static inline void gen_ldrex_i64a(TCGv_i64 dst, TCGv_i32 addr, TCGv_i32 index)
>>> +{
>>> +    TCGv addr64 = tcg_temp_new();
>>> +    tcg_gen_extu_i32_i64(addr64, addr);
>>> +    gen_helper_ldlink_i64a(dst, cpu_env, addr64, index);
>>> +    tcg_temp_free(addr64);
>>> +}
>>> +
>>> +static inline void gen_strex_i64a(TCGv_i32 dst, TCGv_i32 addr, TCGv_i64 val,
>>> +                                  TCGv_i32 index)
>>> +{
>>> +    TCGv addr64 = tcg_temp_new();
>>> +    TCGv dst64 = tcg_temp_new();
>>> +
>>> +    tcg_gen_extu_i32_i64(addr64, addr);
>>> +    gen_helper_stcond_i64a(dst64, cpu_env, addr64, val, index);
>>> +    tcg_gen_extrl_i64_i32(dst, dst64);
>>> +
>>> +    tcg_temp_free(dst64);
>>> +    tcg_temp_free(addr64);
>>> +}
>>> +#endif
>>> +
>>> +#if defined(CONFIG_ARM_USE_LDST_EXCL)
>>> +DO_GEN_LDREX(i8)
>>> +DO_GEN_LDREX(i16a)
>>> +DO_GEN_LDREX(i32a)
>>> +
>>> +DO_GEN_STREX(i8)
>>> +DO_GEN_STREX(i16a)
>>> +DO_GEN_STREX(i32a)
>>> +#endif
>>> +
>>> +static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>>> +                               TCGv_i32 addr, int size)
>>> + {
>>> +    TCGv_i32 tmp = tcg_temp_new_i32();
>>> +    TCGv_i32 mem_idx = tcg_temp_new_i32();
>>> +
>>> +    tcg_gen_movi_i32(mem_idx, get_mem_index(s));
>>> +
>>> +    if (size != 3) {
>>> +        switch (size) {
>>> +        case 0:
>>> +            gen_ldrex_i8(tmp, addr, mem_idx);
>>> +            break;
>>> +        case 1:
>>> +            gen_ldrex_i16a(tmp, addr, mem_idx);
>>> +            break;
>>> +        case 2:
>>> +            gen_ldrex_i32a(tmp, addr, mem_idx);
>>> +            break;
>>> +        default:
>>> +            abort();
>>> +        }
>>> +
>>> +        store_reg(s, rt, tmp);
>>> +    } else {
>>> +        TCGv_i64 tmp64 = tcg_temp_new_i64();
>>> +        TCGv_i32 tmph = tcg_temp_new_i32();
>>> +
>>> +        gen_ldrex_i64a(tmp64, addr, mem_idx);
>>> +        tcg_gen_extr_i64_i32(tmp, tmph, tmp64);
>>> +
>>> +        store_reg(s, rt, tmp);
>>> +        store_reg(s, rt2, tmph);
>>> +
>>> +        tcg_temp_free_i64(tmp64);
>>> +    }
>>> +
>>> +    tcg_temp_free_i32(mem_idx);
>>> +}
>>> +#else
>>>  static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>>>                                 TCGv_i32 addr, int size)
>>>  {
>>> @@ -7460,10 +7594,15 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>>>      store_reg(s, rt, tmp);
>>>      tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
>>>  }
>>> +#endif
>>>
>>>  static void gen_clrex(DisasContext *s)
>>>  {
>>> +#ifdef CONFIG_ARM_USE_LDST_EXCL
>>> +    gen_helper_atomic_clear(cpu_env);
>>> +#else
>>>      tcg_gen_movi_i64(cpu_exclusive_addr, -1);
>>> +#endif
>>>  }
>>>
>>>  #ifdef CONFIG_USER_ONLY
>>> @@ -7475,6 +7614,47 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>>>                       size | (rd << 4) | (rt << 8) | (rt2 << 12));
>>>      gen_exception_internal_insn(s, 4, EXCP_STREX);
>>>  }
>>> +#elif defined CONFIG_ARM_USE_LDST_EXCL
>>> +static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>>> +                                TCGv_i32 addr, int size)
>>> +{
>>> +    TCGv_i32 tmp, mem_idx;
>>> +
>>> +    mem_idx = tcg_temp_new_i32();
>>> +
>>> +    tcg_gen_movi_i32(mem_idx, get_mem_index(s));
>>> +    tmp = load_reg(s, rt);
>>> +
>>> +    if (size != 3) {
>>> +        switch (size) {
>>> +        case 0:
>>> +            gen_strex_i8(cpu_R[rd], addr, tmp, mem_idx);
>>> +            break;
>>> +        case 1:
>>> +            gen_strex_i16a(cpu_R[rd], addr, tmp, mem_idx);
>>> +            break;
>>> +        case 2:
>>> +            gen_strex_i32a(cpu_R[rd], addr, tmp, mem_idx);
>>> +            break;
>>> +        default:
>>> +            abort();
>>> +        }
>>> +    } else {
>>> +        TCGv_i64 tmp64;
>>> +        TCGv_i32 tmp2;
>>> +
>>> +        tmp64 = tcg_temp_new_i64();
>>> +        tmp2 = load_reg(s, rt2);
>>> +        tcg_gen_concat_i32_i64(tmp64, tmp, tmp2);
>>> +        gen_strex_i64a(cpu_R[rd], addr, tmp64, mem_idx);
>>> +
>>> +        tcg_temp_free_i32(tmp2);
>>> +        tcg_temp_free_i64(tmp64);
>>> +    }
>>> +
>>> +    tcg_temp_free_i32(tmp);
>>> +    tcg_temp_free_i32(mem_idx);
>>> +}
>>>  #else
>>>  static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>>>                                  TCGv_i32 addr, int size)
>>
>>
>> --
>> Alex Bennée


--
Alex Bennée
diff mbox

Patch

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index b8b3364..bb5361f 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -462,9 +462,11 @@  typedef struct CPUARMState {
         float_status fp_status;
         float_status standard_fp_status;
     } vfp;
+#if !defined(CONFIG_ARM_USE_LDST_EXCL)
     uint64_t exclusive_addr;
     uint64_t exclusive_val;
     uint64_t exclusive_high;
+#endif
 #if defined(CONFIG_USER_ONLY)
     uint64_t exclusive_test;
     uint32_t exclusive_info;
diff --git a/target-arm/helper.h b/target-arm/helper.h
index c2a85c7..6bc3c0a 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -532,6 +532,10 @@  DEF_HELPER_2(dc_zva, void, env, i64)
 DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 
+#ifdef CONFIG_ARM_USE_LDST_EXCL
+DEF_HELPER_1(atomic_clear, void, env)
+#endif
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #endif
diff --git a/target-arm/machine.c b/target-arm/machine.c
index ed1925a..7adfb4d 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -309,9 +309,11 @@  const VMStateDescription vmstate_arm_cpu = {
         VMSTATE_VARRAY_INT32(cpreg_vmstate_values, ARMCPU,
                              cpreg_vmstate_array_len,
                              0, vmstate_info_uint64, uint64_t),
+#if !defined(CONFIG_ARM_USE_LDST_EXCL)
         VMSTATE_UINT64(env.exclusive_addr, ARMCPU),
         VMSTATE_UINT64(env.exclusive_val, ARMCPU),
         VMSTATE_UINT64(env.exclusive_high, ARMCPU),
+#endif
         VMSTATE_UINT64(env.features, ARMCPU),
         VMSTATE_UINT32(env.exception.syndrome, ARMCPU),
         VMSTATE_UINT32(env.exception.fsr, ARMCPU),
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index a5ee65f..404c13b 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -51,6 +51,14 @@  static int exception_target_el(CPUARMState *env)
     return target_el;
 }
 
+#ifdef CONFIG_ARM_USE_LDST_EXCL
+void HELPER(atomic_clear)(CPUARMState *env)
+{
+    ENV_GET_CPU(env)->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
+    ENV_GET_CPU(env)->ll_sc_context = false;
+}
+#endif
+
 uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def,
                           uint32_t rn, uint32_t maxindex)
 {
@@ -689,7 +697,9 @@  void HELPER(exception_return)(CPUARMState *env)
 
     aarch64_save_sp(env, cur_el);
 
+#if !defined(CONFIG_ARM_USE_LDST_EXCL)
     env->exclusive_addr = -1;
+#endif
 
     /* We must squash the PSTATE.SS bit to zero unless both of the
      * following hold:
diff --git a/target-arm/translate.c b/target-arm/translate.c
index cff511b..5150841 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -60,8 +60,10 @@  TCGv_ptr cpu_env;
 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
 static TCGv_i32 cpu_R[16];
 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
+#if !defined(CONFIG_ARM_USE_LDST_EXCL)
 TCGv_i64 cpu_exclusive_addr;
 TCGv_i64 cpu_exclusive_val;
+#endif
 #ifdef CONFIG_USER_ONLY
 TCGv_i64 cpu_exclusive_test;
 TCGv_i32 cpu_exclusive_info;
@@ -94,10 +96,12 @@  void arm_translate_init(void)
     cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
     cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
 
+#if !defined(CONFIG_ARM_USE_LDST_EXCL)
     cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
     cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
         offsetof(CPUARMState, exclusive_val), "exclusive_val");
+#endif
 #ifdef CONFIG_USER_ONLY
     cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
         offsetof(CPUARMState, exclusive_test), "exclusive_test");
@@ -7413,15 +7417,145 @@  static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
     tcg_gen_or_i32(cpu_ZF, lo, hi);
 }
 
-/* Load/Store exclusive instructions are implemented by remembering
+/* If the softmmu is enabled, the translation of Load/Store exclusive
+   instructions will rely on the gen_helper_{ldlink,stcond} helpers,
+   offloading most of the work to the softmmu_llsc_template.h functions.
+   All the accesses made by the exclusive instructions include an
+   alignment check.
+
+   Otherwise, these instructions are implemented by remembering
    the value/address loaded, and seeing if these are the same
    when the store is performed. This should be sufficient to implement
    the architecturally mandated semantics, and avoids having to monitor
    regular stores.
 
-   In system emulation mode only one CPU will be running at once, so
-   this sequence is effectively atomic.  In user emulation mode we
-   throw an exception and handle the atomic operation elsewhere.  */
+   In user emulation mode we throw an exception and handle the atomic
+   operation elsewhere.  */
+#ifdef CONFIG_ARM_USE_LDST_EXCL
+
+#if TARGET_LONG_BITS == 32
+#define DO_GEN_LDREX(SUFF)                                             \
+static inline void gen_ldrex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
+                                    TCGv_i32 index)                    \
+{                                                                      \
+    gen_helper_ldlink_##SUFF(dst, cpu_env, addr, index);               \
+}
+
+#define DO_GEN_STREX(SUFF)                                             \
+static inline void gen_strex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
+                                    TCGv_i32 val, TCGv_i32 index)      \
+{                                                                      \
+    gen_helper_stcond_##SUFF(dst, cpu_env, addr, val, index);          \
+}
+
+static inline void gen_ldrex_i64a(TCGv_i64 dst, TCGv_i32 addr, TCGv_i32 index)
+{
+    gen_helper_ldlink_i64a(dst, cpu_env, addr, index);
+}
+
+static inline void gen_strex_i64a(TCGv_i32 dst, TCGv_i32 addr, TCGv_i64 val,
+                                  TCGv_i32 index)
+{
+
+    gen_helper_stcond_i64a(dst, cpu_env, addr, val, index);
+}
+#else
+#define DO_GEN_LDREX(SUFF)                                             \
+static inline void gen_ldrex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
+                                         TCGv_i32 index)               \
+{                                                                      \
+    TCGv addr64 = tcg_temp_new();                                      \
+    tcg_gen_extu_i32_i64(addr64, addr);                                \
+    gen_helper_ldlink_##SUFF(dst, cpu_env, addr64, index);             \
+    tcg_temp_free(addr64);                                             \
+}
+
+#define DO_GEN_STREX(SUFF)                                             \
+static inline void gen_strex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
+                                    TCGv_i32 val, TCGv_i32 index)      \
+{                                                                      \
+    TCGv addr64 = tcg_temp_new();                                      \
+    TCGv dst64 = tcg_temp_new();                                       \
+    tcg_gen_extu_i32_i64(addr64, addr);                                \
+    gen_helper_stcond_##SUFF(dst64, cpu_env, addr64, val, index);      \
+    tcg_gen_extrl_i64_i32(dst, dst64);                                 \
+    tcg_temp_free(dst64);                                              \
+    tcg_temp_free(addr64);                                             \
+}
+
+static inline void gen_ldrex_i64a(TCGv_i64 dst, TCGv_i32 addr, TCGv_i32 index)
+{
+    TCGv addr64 = tcg_temp_new();
+    tcg_gen_extu_i32_i64(addr64, addr);
+    gen_helper_ldlink_i64a(dst, cpu_env, addr64, index);
+    tcg_temp_free(addr64);
+}
+
+static inline void gen_strex_i64a(TCGv_i32 dst, TCGv_i32 addr, TCGv_i64 val,
+                                  TCGv_i32 index)
+{
+    TCGv addr64 = tcg_temp_new();
+    TCGv dst64 = tcg_temp_new();
+
+    tcg_gen_extu_i32_i64(addr64, addr);
+    gen_helper_stcond_i64a(dst64, cpu_env, addr64, val, index);
+    tcg_gen_extrl_i64_i32(dst, dst64);
+
+    tcg_temp_free(dst64);
+    tcg_temp_free(addr64);
+}
+#endif
+
+#if defined(CONFIG_ARM_USE_LDST_EXCL)
+DO_GEN_LDREX(i8)
+DO_GEN_LDREX(i16a)
+DO_GEN_LDREX(i32a)
+
+DO_GEN_STREX(i8)
+DO_GEN_STREX(i16a)
+DO_GEN_STREX(i32a)
+#endif
+
+static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
+                               TCGv_i32 addr, int size)
+ {
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    TCGv_i32 mem_idx = tcg_temp_new_i32();
+
+    tcg_gen_movi_i32(mem_idx, get_mem_index(s));
+
+    if (size != 3) {
+        switch (size) {
+        case 0:
+            gen_ldrex_i8(tmp, addr, mem_idx);
+            break;
+        case 1:
+            gen_ldrex_i16a(tmp, addr, mem_idx);
+            break;
+        case 2:
+            gen_ldrex_i32a(tmp, addr, mem_idx);
+            break;
+        default:
+            abort();
+        }
+
+        store_reg(s, rt, tmp);
+    } else {
+        TCGv_i64 tmp64 = tcg_temp_new_i64();
+        TCGv_i32 tmph = tcg_temp_new_i32();
+
+        gen_ldrex_i64a(tmp64, addr, mem_idx);
+        tcg_gen_extr_i64_i32(tmp, tmph, tmp64);
+
+        store_reg(s, rt, tmp);
+        store_reg(s, rt2, tmph);
+
+        tcg_temp_free_i64(tmp64);
+    }
+
+    tcg_temp_free_i32(mem_idx);
+}
+#else
 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
                                TCGv_i32 addr, int size)
 {
@@ -7460,10 +7594,15 @@  static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
     store_reg(s, rt, tmp);
     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
 }
+#endif
 
 static void gen_clrex(DisasContext *s)
 {
+#ifdef CONFIG_ARM_USE_LDST_EXCL
+    gen_helper_atomic_clear(cpu_env);
+#else
     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
+#endif
 }
 
 #ifdef CONFIG_USER_ONLY
@@ -7475,6 +7614,47 @@  static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
                      size | (rd << 4) | (rt << 8) | (rt2 << 12));
     gen_exception_internal_insn(s, 4, EXCP_STREX);
 }
+#elif defined CONFIG_ARM_USE_LDST_EXCL
+static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
+                                TCGv_i32 addr, int size)
+{
+    TCGv_i32 tmp, mem_idx;
+
+    mem_idx = tcg_temp_new_i32();
+
+    tcg_gen_movi_i32(mem_idx, get_mem_index(s));
+    tmp = load_reg(s, rt);
+
+    if (size != 3) {
+        switch (size) {
+        case 0:
+            gen_strex_i8(cpu_R[rd], addr, tmp, mem_idx);
+            break;
+        case 1:
+            gen_strex_i16a(cpu_R[rd], addr, tmp, mem_idx);
+            break;
+        case 2:
+            gen_strex_i32a(cpu_R[rd], addr, tmp, mem_idx);
+            break;
+        default:
+            abort();
+        }
+    } else {
+        TCGv_i64 tmp64;
+        TCGv_i32 tmp2;
+
+        tmp64 = tcg_temp_new_i64();
+        tmp2 = load_reg(s, rt2);
+        tcg_gen_concat_i32_i64(tmp64, tmp, tmp2);
+        gen_strex_i64a(cpu_R[rd], addr, tmp64, mem_idx);
+
+        tcg_temp_free_i32(tmp2);
+        tcg_temp_free_i64(tmp64);
+    }
+
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(mem_idx);
+}
 #else
 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
                                 TCGv_i32 addr, int size)