diff mbox

[RFC,3/3] mttcg: Implement implicit ordering semantics

Message ID 20170828035327.17146-3-bobby.prani@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Pranith Kumar Aug. 28, 2017, 3:53 a.m. UTC
Currently, we cannot use mttcg for running strong memory model guests
on weak memory model hosts due to missing ordering semantics.

We implicitly generate fence instructions for stronger guests if an
ordering mismatch is detected. We generate fences only for the orders
for which fence instructions are necessary, for example a fence is not
necessary between a store and a subsequent load on x86 since its
absence in the guest binary tells that ordering need not be
ensured. Also note that if we find multiple subsequent fence
instructions in the generated IR, we combine them in the TCG
optimization pass.

This patch allows us to boot an x86 guest on ARM64 hosts using mttcg.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
 tcg/aarch64/tcg-target.h |  2 ++
 tcg/arm/tcg-target.h     |  2 ++
 tcg/mips/tcg-target.h    |  2 ++
 tcg/ppc/tcg-target.h     |  2 ++
 tcg/tcg-op.c             | 17 +++++++++++++++++
 tcg/tcg-op.h             |  1 +
 6 files changed, 26 insertions(+)

Comments

Richard Henderson Aug. 28, 2017, 5:57 p.m. UTC | #1
On 08/27/2017 08:53 PM, Pranith Kumar wrote:
> diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
> index 55a46ac825..b41a248bee 100644
> --- a/tcg/aarch64/tcg-target.h
> +++ b/tcg/aarch64/tcg-target.h
> @@ -117,4 +117,6 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
>      __builtin___clear_cache((char *)start, (char *)stop);
>  }
>  
> +#define TCG_TARGET_DEFAULT_MO (0)
> +
>  #endif /* AARCH64_TCG_TARGET_H */

Please add all of these in one patch, separate from the tcg-op.c changes.
We should also just make this mandatory and remove any related #ifdefs.

> +void tcg_gen_req_mo(TCGBar type)

static, until we find that we need it somewhere else.

> +#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
> +    TCGBar order_mismatch = type & (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO);
> +    if (order_mismatch) {
> +        tcg_gen_mb(order_mismatch | TCG_BAR_SC);
> +    }
> +#else
> +    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
> +#endif

Hmm.  How about

static void tcg_gen_reg_mo(TCGBar type)
{
#ifdef TCG_GUEST_DEFAULT_MO
    type &= TCG_GUEST_DEFAULT_MO;
#endif
#ifdef TCG_TARGET_DEFAULT_MO
    type &= ~TCG_TARGET_DEFAULT_MO;
#endif
    if (type) {
        tcg_gen_mb(type | TCG_BAR_SC);
    }
}

Just because one of those is undefined doesn't mean we can't infer tighter
barriers from the others, including the initial value of TYPE.

>  void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
>  {
> +    tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_LD_ST);
>      memop = tcg_canonicalize_memop(memop, 0, 0);

You're putting the barrier before the load, so that should be

  TCG_MO_LD_LD | TCG_MO_ST_LD

i.e.  TCG_MO_<any>_<current op>

If you were putting the barrier afterward (an equally reasonable option), you'd
reverse that and use what you have above.


r~
Pranith Kumar Aug. 28, 2017, 9:41 p.m. UTC | #2
On Mon, Aug 28, 2017 at 1:57 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 08/27/2017 08:53 PM, Pranith Kumar wrote:
>> diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
>> index 55a46ac825..b41a248bee 100644
>> --- a/tcg/aarch64/tcg-target.h
>> +++ b/tcg/aarch64/tcg-target.h
>> @@ -117,4 +117,6 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
>>      __builtin___clear_cache((char *)start, (char *)stop);
>>  }
>>
>> +#define TCG_TARGET_DEFAULT_MO (0)
>> +
>>  #endif /* AARCH64_TCG_TARGET_H */
>
> Please add all of these in one patch, separate from the tcg-op.c changes.
> We should also just make this mandatory and remove any related #ifdefs.

I tried looking up ordering semantics for architectures like ia64 and
s390. It is not really clear. I think every arch but for x86 can be
defined as weak, even though archs like sparc can also be configured
as TSO. Is this right?

>
>> +void tcg_gen_req_mo(TCGBar type)
>
> static, until we find that we need it somewhere else.
>

Will fix.

>> +#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
>> +    TCGBar order_mismatch = type & (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO);
>> +    if (order_mismatch) {
>> +        tcg_gen_mb(order_mismatch | TCG_BAR_SC);
>> +    }
>> +#else
>> +    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
>> +#endif
>
> Hmm.  How about
>
> static void tcg_gen_reg_mo(TCGBar type)
> {
> #ifdef TCG_GUEST_DEFAULT_MO
>     type &= TCG_GUEST_DEFAULT_MO;
> #endif
> #ifdef TCG_TARGET_DEFAULT_MO
>     type &= ~TCG_TARGET_DEFAULT_MO;
> #endif
>     if (type) {
>         tcg_gen_mb(type | TCG_BAR_SC);
>     }
> }

Yes, this looks better and until we can get all the possible
definitions of TCG_GUEST_DEFAULT_MO and TCG_TARGET_DEFAULT_MO figured
out I would like to keep the #ifdefs.

>
> Just because one of those is undefined doesn't mean we can't infer tighter
> barriers from the others, including the initial value of TYPE.
>
>>  void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
>>  {
>> +    tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_LD_ST);
>>      memop = tcg_canonicalize_memop(memop, 0, 0);
>
> You're putting the barrier before the load, so that should be
>
>   TCG_MO_LD_LD | TCG_MO_ST_LD
>
> i.e.  TCG_MO_<any>_<current op>
>
> If you were putting the barrier afterward (an equally reasonable option), you'd
> reverse that and use what you have above.

OK, will fix this. Thanks for the review.
Richard Henderson Aug. 28, 2017, 10:39 p.m. UTC | #3
On 08/28/2017 02:41 PM, Pranith Kumar wrote:
> On Mon, Aug 28, 2017 at 1:57 PM, Richard Henderson <rth@twiddle.net> wrote:
>> On 08/27/2017 08:53 PM, Pranith Kumar wrote:
>>> diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
>>> index 55a46ac825..b41a248bee 100644
>>> --- a/tcg/aarch64/tcg-target.h
>>> +++ b/tcg/aarch64/tcg-target.h
>>> @@ -117,4 +117,6 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
>>>      __builtin___clear_cache((char *)start, (char *)stop);
>>>  }
>>>
>>> +#define TCG_TARGET_DEFAULT_MO (0)
>>> +
>>>  #endif /* AARCH64_TCG_TARGET_H */
>>
>> Please add all of these in one patch, separate from the tcg-op.c changes.
>> We should also just make this mandatory and remove any related #ifdefs.
> 
> I tried looking up ordering semantics for architectures like ia64 and
> s390. It is not really clear. I think every arch but for x86 can be
> defined as weak, even though archs like sparc can also be configured
> as TSO. Is this right?

s390 has the same memory ordering as i386.

But you're right that the risc chips should generally be 0.

I'll try and figure out when sparc can use PSO (loosest for sparc < 8, and
modern niagara), but leave it 0 for now.


r~
diff mbox

Patch

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 55a46ac825..b41a248bee 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -117,4 +117,6 @@  static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
     __builtin___clear_cache((char *)start, (char *)stop);
 }
 
+#define TCG_TARGET_DEFAULT_MO (0)
+
 #endif /* AARCH64_TCG_TARGET_H */
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 5ef1086710..a38be15a39 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -134,4 +134,6 @@  static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
     __builtin___clear_cache((char *) start, (char *) stop);
 }
 
+#define TCG_TARGET_DEFAULT_MO (0)
+
 #endif
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index d75cb63ed3..e9558d15bc 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -206,4 +206,6 @@  static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
     cacheflush ((void *)start, stop-start, ICACHE);
 }
 
+#define TCG_TARGET_DEFAULT_MO (0)
+
 #endif
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 5f4a40a5b4..5a092b038a 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -125,4 +125,6 @@  extern bool have_isa_3_00;
 
 void flush_icache_range(uintptr_t start, uintptr_t stop);
 
+#define TCG_TARGET_DEFAULT_MO (0)
+
 #endif
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 87f673ef49..085fe66fb2 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -28,6 +28,7 @@ 
 #include "exec/exec-all.h"
 #include "tcg.h"
 #include "tcg-op.h"
+#include "tcg-mo.h"
 #include "trace-tcg.h"
 #include "trace/mem.h"
 
@@ -2662,8 +2663,21 @@  static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
 #endif
 }
 
+void tcg_gen_req_mo(TCGBar type)
+{
+#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
+    TCGBar order_mismatch = type & (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO);
+    if (order_mismatch) {
+        tcg_gen_mb(order_mismatch | TCG_BAR_SC);
+    }
+#else
+    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+#endif
+}
+
 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
+    tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_LD_ST);
     memop = tcg_canonicalize_memop(memop, 0, 0);
     trace_guest_mem_before_tcg(tcg_ctx.cpu, tcg_ctx.tcg_env,
                                addr, trace_mem_get_info(memop, 0));
@@ -2672,6 +2686,7 @@  void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 
 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
+    tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
     memop = tcg_canonicalize_memop(memop, 0, 1);
     trace_guest_mem_before_tcg(tcg_ctx.cpu, tcg_ctx.tcg_env,
                                addr, trace_mem_get_info(memop, 1));
@@ -2680,6 +2695,7 @@  void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 
 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
+    tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_LD_ST);
     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
         if (memop & MO_SIGN) {
@@ -2698,6 +2714,7 @@  void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 
 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
+    tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
         return;
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 5d3278f243..6ad2c6d60e 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -262,6 +262,7 @@  static inline void tcg_gen_br(TCGLabel *l)
 }
 
 void tcg_gen_mb(TCGBar);
+void tcg_gen_req_mo(TCGBar type);
 
 /* Helper calls. */