diff mbox

[RFC,v3,07/14] tcg/ppc: Add support for fence

Message ID 20160618040343.19517-8-bobby.prani@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Pranith Kumar June 18, 2016, 4:03 a.m. UTC
Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
 tcg/ppc/tcg-target.inc.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

Comments

Sergey Fedorov June 22, 2016, 7:50 p.m. UTC | #1
On 18/06/16 07:03, Pranith Kumar wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
> ---
>  tcg/ppc/tcg-target.inc.c | 24 ++++++++++++++++++++++++
>  1 file changed, 24 insertions(+)
>
> diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
> index da10052..766848e 100644
> --- a/tcg/ppc/tcg-target.inc.c
> +++ b/tcg/ppc/tcg-target.inc.c
> @@ -469,6 +469,10 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
>  #define STHX   XO31(407)
>  #define STWX   XO31(151)
>  
> +#define EIEIO  XO31(854)
> +#define HWSYNC XO31(598)
> +#define LWSYNC (HWSYNC | (1u << 21))
> +
>  #define SPR(a, b) ((((a)<<5)|(b))<<11)
>  #define LR     SPR(8, 0)
>  #define CTR    SPR(9, 0)
> @@ -1237,6 +1241,21 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
>      tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
>  }
>  
> +static void tcg_out_mb(TCGContext *s, TCGArg a0)
> +{
> +    switch (a0 & TCG_MO_ALL) {
> +    case TCG_MO_LD_LD:
> +        tcg_out32(s, LWSYNC);

lwsync can be used for all cases except store-load which requires
hwsync. eieio is for synchronizing memory-mapped IO which is not used by
TCG at all. So there should be:

    switch (a0 & TCG_MO_ALL) {
    case TCG_MO_ST_LD:
        tcg_out32(s, HWSYNC);
        break;
    default:
        tcg_out32(s, LWSYNC);
        break;
    }

Kind regards,
Sergey

> +        break;
> +    case TCG_MO_ST_ST:
> +        tcg_out32(s, EIEIO);
> +        break;
> +    default:
> +        tcg_out32(s, HWSYNC);
> +        break;
> +    }
> +}
> +
>  #ifdef __powerpc64__
>  void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
>  {
> @@ -2439,6 +2458,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
>          tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
>          break;
>  
> +    case INDEX_op_mb:
> +        tcg_out_mb(s, args[0]);
> +        break;
> +
>      case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
>      case INDEX_op_mov_i64:
>      case INDEX_op_movi_i32:  /* Always emitted via tcg_out_movi.  */
> @@ -2586,6 +2609,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
>      { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } },
>  #endif
>  
> +    { INDEX_op_mb, { } },
>      { -1 },
>  };
>
Richard Henderson June 22, 2016, 8:21 p.m. UTC | #2
On 06/22/2016 12:50 PM, Sergey Fedorov wrote:
> On 18/06/16 07:03, Pranith Kumar wrote:
>> Signed-off-by: Richard Henderson <rth@twiddle.net>
>> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
>> ---
>>  tcg/ppc/tcg-target.inc.c | 24 ++++++++++++++++++++++++
>>  1 file changed, 24 insertions(+)
>>
>> diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
>> index da10052..766848e 100644
>> --- a/tcg/ppc/tcg-target.inc.c
>> +++ b/tcg/ppc/tcg-target.inc.c
>> @@ -469,6 +469,10 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
>>  #define STHX   XO31(407)
>>  #define STWX   XO31(151)
>>  
>> +#define EIEIO  XO31(854)
>> +#define HWSYNC XO31(598)
>> +#define LWSYNC (HWSYNC | (1u << 21))
>> +
>>  #define SPR(a, b) ((((a)<<5)|(b))<<11)
>>  #define LR     SPR(8, 0)
>>  #define CTR    SPR(9, 0)
>> @@ -1237,6 +1241,21 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
>>      tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
>>  }
>>  
>> +static void tcg_out_mb(TCGContext *s, TCGArg a0)
>> +{
>> +    switch (a0 & TCG_MO_ALL) {
>> +    case TCG_MO_LD_LD:
>> +        tcg_out32(s, LWSYNC);
> 
> lwsync can be used for all cases except store-load which requires
> hwsync. eieio is for synchronizing memory-mapped IO which is not used by
> TCG at all. 

Have a look through linux/arch/powerpc/include/asm/barrier.h wherein we find

  # The eieio instruction is a barrier providing an ordering ...
  # for (a) cacheable stores ....

  # However, on CPUs that don't support lwsync, lwsync actually maps to a
  # heavy-weight sync, so smp_wmb() can be a lighter-weight eieio.

And elsewhere,

#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC)
#define __SUBARCH_HAS_LWSYNC
#endif

Which suggests that ppc64 should use lwsync and ppc32 should use eieio for ST_ST.


r~
Sergey Fedorov June 22, 2016, 8:27 p.m. UTC | #3
On 22/06/16 23:21, Richard Henderson wrote:
> On 06/22/2016 12:50 PM, Sergey Fedorov wrote:
>> On 18/06/16 07:03, Pranith Kumar wrote:
>>> Signed-off-by: Richard Henderson <rth@twiddle.net>
>>> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
>>> ---
>>>  tcg/ppc/tcg-target.inc.c | 24 ++++++++++++++++++++++++
>>>  1 file changed, 24 insertions(+)
>>>
>>> diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
>>> index da10052..766848e 100644
>>> --- a/tcg/ppc/tcg-target.inc.c
>>> +++ b/tcg/ppc/tcg-target.inc.c
>>> @@ -469,6 +469,10 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
>>>  #define STHX   XO31(407)
>>>  #define STWX   XO31(151)
>>>  
>>> +#define EIEIO  XO31(854)
>>> +#define HWSYNC XO31(598)
>>> +#define LWSYNC (HWSYNC | (1u << 21))
>>> +
>>>  #define SPR(a, b) ((((a)<<5)|(b))<<11)
>>>  #define LR     SPR(8, 0)
>>>  #define CTR    SPR(9, 0)
>>> @@ -1237,6 +1241,21 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
>>>      tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
>>>  }
>>>  
>>> +static void tcg_out_mb(TCGContext *s, TCGArg a0)
>>> +{
>>> +    switch (a0 & TCG_MO_ALL) {
>>> +    case TCG_MO_LD_LD:
>>> +        tcg_out32(s, LWSYNC);
>> lwsync can be used for all cases except store-load which requires
>> hwsync. eieio is for synchronizing memory-mapped IO which is not used by
>> TCG at all. 
> Have a look through linux/arch/powerpc/include/asm/barrier.h wherein we find
>
>   # The eieio instruction is a barrier providing an ordering ...
>   # for (a) cacheable stores ....
>
>   # However, on CPUs that don't support lwsync, lwsync actually maps to a
>   # heavy-weight sync, so smp_wmb() can be a lighter-weight eieio.
>
> And elsewhere,
>
> #if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC)
> #define __SUBARCH_HAS_LWSYNC
> #endif
>
> Which suggests that ppc64 should use lwsync and ppc32 should use eieio for ST_ST.

Hmm, it's always useful to look at Linux kernel :) Looks like we should
also make this check and use eieio in place of lwsync if it's not supported.

Thanks,
Sergey
Sergey Fedorov June 23, 2016, 2:42 p.m. UTC | #4
On 22/06/16 22:50, Sergey Fedorov wrote:
> On 18/06/16 07:03, Pranith Kumar wrote:
>> Signed-off-by: Richard Henderson <rth@twiddle.net>
>> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
>> ---
>>  tcg/ppc/tcg-target.inc.c | 24 ++++++++++++++++++++++++
>>  1 file changed, 24 insertions(+)
>>
>> diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
>> index da10052..766848e 100644
>> --- a/tcg/ppc/tcg-target.inc.c
>> +++ b/tcg/ppc/tcg-target.inc.c
>> @@ -469,6 +469,10 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
>>  #define STHX   XO31(407)
>>  #define STWX   XO31(151)
>>  
>> +#define EIEIO  XO31(854)
>> +#define HWSYNC XO31(598)
>> +#define LWSYNC (HWSYNC | (1u << 21))
>> +
>>  #define SPR(a, b) ((((a)<<5)|(b))<<11)
>>  #define LR     SPR(8, 0)
>>  #define CTR    SPR(9, 0)
>> @@ -1237,6 +1241,21 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
>>      tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
>>  }
>>  
>> +static void tcg_out_mb(TCGContext *s, TCGArg a0)
>> +{
>> +    switch (a0 & TCG_MO_ALL) {
>> +    case TCG_MO_LD_LD:
>> +        tcg_out32(s, LWSYNC);
> lwsync can be used for all cases except store-load which requires
> hwsync. eieio is for synchronizing memory-mapped IO which is not used by
> TCG at all. 

However, if we think about the possibility to make read HW passthrough
in TCG mode, that doesn't seem impossible. Probably we'd like to take
care of this and introduce some support of memory barrier which can
order IO memory access and normal guest memory access.

Kind regards,
Sergey

> So there should be:
>
>     switch (a0 & TCG_MO_ALL) {
>     case TCG_MO_ST_LD:
>         tcg_out32(s, HWSYNC);
>         break;
>     default:
>         tcg_out32(s, LWSYNC);
>         break;
>     }
>
> Kind regards,
> Sergey
>
>> +        break;
>> +    case TCG_MO_ST_ST:
>> +        tcg_out32(s, EIEIO);
>> +        break;
>> +    default:
>> +        tcg_out32(s, HWSYNC);
>> +        break;
>> +    }
>> +}
>> +
>>  #ifdef __powerpc64__
>>  void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
>>  {
>> @@ -2439,6 +2458,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
>>          tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
>>          break;
>>  
>> +    case INDEX_op_mb:
>> +        tcg_out_mb(s, args[0]);
>> +        break;
>> +
>>      case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
>>      case INDEX_op_mov_i64:
>>      case INDEX_op_movi_i32:  /* Always emitted via tcg_out_movi.  */
>> @@ -2586,6 +2609,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
>>      { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } },
>>  #endif
>>  
>> +    { INDEX_op_mb, { } },
>>      { -1 },
>>  };
>>
diff mbox

Patch

diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index da10052..766848e 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -469,6 +469,10 @@  static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define STHX   XO31(407)
 #define STWX   XO31(151)
 
+#define EIEIO  XO31(854)
+#define HWSYNC XO31(598)
+#define LWSYNC (HWSYNC | (1u << 21))
+
 #define SPR(a, b) ((((a)<<5)|(b))<<11)
 #define LR     SPR(8, 0)
 #define CTR    SPR(9, 0)
@@ -1237,6 +1241,21 @@  static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
     tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
 }
 
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+    switch (a0 & TCG_MO_ALL) {
+    case TCG_MO_LD_LD:
+        tcg_out32(s, LWSYNC);
+        break;
+    case TCG_MO_ST_ST:
+        tcg_out32(s, EIEIO);
+        break;
+    default:
+        tcg_out32(s, HWSYNC);
+        break;
+    }
+}
+
 #ifdef __powerpc64__
 void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 {
@@ -2439,6 +2458,10 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
         break;
 
+    case INDEX_op_mb:
+        tcg_out_mb(s, args[0]);
+        break;
+
     case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_movi_i32:  /* Always emitted via tcg_out_movi.  */
@@ -2586,6 +2609,7 @@  static const TCGTargetOpDef ppc_op_defs[] = {
     { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } },
 #endif
 
+    { INDEX_op_mb, { } },
     { -1 },
 };