diff mbox series

[1/1] disas/riscv: Guard dec->cfg dereference for host disassemble

Message ID 20241206032411.52528-1-zhiwei_liu@linux.alibaba.com (mailing list archive)
State New
Headers show
Series [1/1] disas/riscv: Guard dec->cfg dereference for host disassemble | expand

Commit Message

LIU Zhiwei Dec. 6, 2024, 3:24 a.m. UTC
For riscv host, it will set dec->cfg to zero. Thus we shuld guard
the dec->cfg deference for riscv host disassemble.

And in general, we should only use dec->cfg for target in three cases:

1) For not incompatible encodings, such as zcmp/zcmt/zfinx.
2) For maybe-ops encodings, they are better to be disassembled to
   the "real" extensions, such as zicfiss. The guard of dec->zimop
   and dec->zcmop is for comment and avoid check for every extension
   that encoded in maybe-ops area.
3) For custom encodings, we have to use dec->cfg to disassemble
   custom encodings using the same encoding area.

Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
---
 disas/riscv.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

Comments

Richard Henderson Dec. 6, 2024, 3:36 a.m. UTC | #1
On 12/5/24 21:24, LIU Zhiwei wrote:
> For riscv host, it will set dec->cfg to zero. Thus we shuld guard
> the dec->cfg deference for riscv host disassemble.
> 
> And in general, we should only use dec->cfg for target in three cases:
> 
> 1) For not incompatible encodings, such as zcmp/zcmt/zfinx.
> 2) For maybe-ops encodings, they are better to be disassembled to
>     the "real" extensions, such as zicfiss. The guard of dec->zimop
>     and dec->zcmop is for comment and avoid check for every extension
>     that encoded in maybe-ops area.
> 3) For custom encodings, we have to use dec->cfg to disassemble
>     custom encodings using the same encoding area.
> 
> Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
> Suggested-by: Richard Henderson <richard.henderson@linaro.org>

...

> @@ -5112,28 +5112,28 @@ static GString *format_inst(size_t tab, rv_decode *dec)
>               g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
>               break;
>           case '3':
> -            if (dec->cfg->ext_zfinx) {
> +            if (dec->cfg && dec->cfg->ext_zfinx) {
>                   g_string_append(buf, rv_ireg_name_sym[dec->rd]);
>               } else {
>                   g_string_append(buf, rv_freg_name_sym[dec->rd]);
>               }
>               break;
>           case '4':
> -            if (dec->cfg->ext_zfinx) {
> +            if (dec->cfg && dec->cfg->ext_zfinx) {
>                   g_string_append(buf, rv_ireg_name_sym[dec->rs1]);
>               } else {
>                   g_string_append(buf, rv_freg_name_sym[dec->rs1]);
>               }
>               break;
>           case '5':
> -            if (dec->cfg->ext_zfinx) {
> +            if (dec->cfg && dec->cfg->ext_zfinx) {
>                   g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
>               } else {
>                   g_string_append(buf, rv_freg_name_sym[dec->rs2]);
>               }
>               break;
>           case '6':
> -            if (dec->cfg->ext_zfinx) {
> +            if (dec->cfg && dec->cfg->ext_zfinx) {
>                   g_string_append(buf, rv_ireg_name_sym[dec->rs3]);
>               } else {
>                   g_string_append(buf, rv_freg_name_sym[dec->rs3]);

These are the only tests of cfg that are required.
None of the other standard isa extensions overlap.

> @@ -5439,7 +5439,8 @@ static GString *disasm_inst(rv_isa isa, uint64_t pc, rv_inst inst,
>           const rv_opcode_data *opcode_data = decoders[i].opcode_data;
>           void (*decode_func)(rv_decode *, rv_isa) = decoders[i].decode_func;
>   
> -        if (guard_func(cfg)) {
> +        /* always_true_p don't dereference cfg */
> +        if (((i == 0) || cfg) && guard_func(cfg)) {

This should be i == 0 || (cfg && guard_func(cfg)).


r~
LIU Zhiwei Dec. 6, 2024, 4:39 a.m. UTC | #2
On 2024/12/6 11:36, Richard Henderson wrote:
> On 12/5/24 21:24, LIU Zhiwei wrote:
>> For riscv host, it will set dec->cfg to zero. Thus we shuld guard
>> the dec->cfg deference for riscv host disassemble.
>>
>> And in general, we should only use dec->cfg for target in three cases:
>>
>> 1) For not incompatible encodings, such as zcmp/zcmt/zfinx.
>> 2) For maybe-ops encodings, they are better to be disassembled to
>>     the "real" extensions, such as zicfiss. The guard of dec->zimop
>>     and dec->zcmop is for comment and avoid check for every extension
>>     that encoded in maybe-ops area.
>> 3) For custom encodings, we have to use dec->cfg to disassemble
>>     custom encodings using the same encoding area.
>>
>> Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
>> Suggested-by: Richard Henderson <richard.henderson@linaro.org>
>
> ...
>
>> @@ -5112,28 +5112,28 @@ static GString *format_inst(size_t tab, 
>> rv_decode *dec)
>>               g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
>>               break;
>>           case '3':
>> -            if (dec->cfg->ext_zfinx) {
>> +            if (dec->cfg && dec->cfg->ext_zfinx) {
>>                   g_string_append(buf, rv_ireg_name_sym[dec->rd]);
>>               } else {
>>                   g_string_append(buf, rv_freg_name_sym[dec->rd]);
>>               }
>>               break;
>>           case '4':
>> -            if (dec->cfg->ext_zfinx) {
>> +            if (dec->cfg && dec->cfg->ext_zfinx) {
>>                   g_string_append(buf, rv_ireg_name_sym[dec->rs1]);
>>               } else {
>>                   g_string_append(buf, rv_freg_name_sym[dec->rs1]);
>>               }
>>               break;
>>           case '5':
>> -            if (dec->cfg->ext_zfinx) {
>> +            if (dec->cfg && dec->cfg->ext_zfinx) {
>>                   g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
>>               } else {
>>                   g_string_append(buf, rv_freg_name_sym[dec->rs2]);
>>               }
>>               break;
>>           case '6':
>> -            if (dec->cfg->ext_zfinx) {
>> +            if (dec->cfg && dec->cfg->ext_zfinx) {
>>                   g_string_append(buf, rv_ireg_name_sym[dec->rs3]);
>>               } else {
>>                   g_string_append(buf, rv_freg_name_sym[dec->rs3]);
>
> These are the only tests of cfg that are required.
> None of the other standard isa extensions overlap.

Both zcmt and zcmp are not compatible with Zcd, as they reuse some encodings from c.fsdsp.

Zimop or Zcmop also overlap with other isa extensions, as they are maybe-ops. Other extensions
such as zicfiss will reuse their encodings.

I think we had better disassemble them to zicifss if it has been implemented on the target cpu. Otherwise
we disassemble them to maybe-ops.

>
>> @@ -5439,7 +5439,8 @@ static GString *disasm_inst(rv_isa isa, 
>> uint64_t pc, rv_inst inst,
>>           const rv_opcode_data *opcode_data = decoders[i].opcode_data;
>>           void (*decode_func)(rv_decode *, rv_isa) = 
>> decoders[i].decode_func;
>>   -        if (guard_func(cfg)) {
>> +        /* always_true_p don't dereference cfg */
>> +        if (((i == 0) || cfg) && guard_func(cfg)) {
>
> This should be i == 0 || (cfg && guard_func(cfg)).

OK. Although I think they are both right.

Thanks,
Zhiwei

>
>
> r~
Richard Henderson Dec. 6, 2024, 1:36 p.m. UTC | #3
On 12/5/24 22:39, LIU Zhiwei wrote:
> 
> Both zcmt and zcmp are not compatible with Zcd, as they reuse some encodings from c.fsdsp.

Ok, fair.  A comment about conflicts at that point may help.

> 
> Zimop or Zcmop also overlap with other isa extensions, as they are maybe-ops. Other extensions
> such as zicfiss will reuse their encodings.
> 
> I think we had better disassemble them to zicifss if it has been implemented on the target cpu. Otherwise
> we disassemble them to maybe-ops.

My point is that they are only belong to zimop until they are assigned, like zicifss.
At that point they *have* a defined meaning in the standard isa.

So, yes, disassemble as zicifss, but always, not "if it has been implemented in the target 
cpu".

>>> +        if (((i == 0) || cfg) && guard_func(cfg)) {
>>
>> This should be i == 0 || (cfg && guard_func(cfg)).
> 
> OK. Although I think they are both right.

  i = 0
  cfg = NULL

    (0 == 0 || NULL) && guard_func(NULL)
-> (true || false) && guard_func(NULL)
-> true && guard_func(NULL)
-> guard_func(NULL)
-> boom.

Or are you saying it won't go boom because we happen to know the 0th guard_func only 
returns true?  There's still no reason to call it...


r~
LIU Zhiwei Dec. 7, 2024, 1:27 a.m. UTC | #4
On 2024/12/6 21:36, Richard Henderson wrote:
> On 12/5/24 22:39, LIU Zhiwei wrote:
>>
>> Both zcmt and zcmp are not compatible with Zcd, as they reuse some 
>> encodings from c.fsdsp.
>
> Ok, fair.  A comment about conflicts at that point may help.
Ok.
>
>>
>> Zimop or Zcmop also overlap with other isa extensions, as they are 
>> maybe-ops. Other extensions
>> such as zicfiss will reuse their encodings.
>>
>> I think we had better disassemble them to zicifss if it has been 
>> implemented on the target cpu. Otherwise
>> we disassemble them to maybe-ops.
>
> My point is that they are only belong to zimop until they are 
> assigned, like zicifss.

No, they always belong to zimop unless they are assigned to other 
extensions. Applications built with zicfiss can also
run on cpu with zimop. In this case, the instructions of zicfiss should 
be disassemble as zimop maybe-ops which has it's default behavior 
different with the behavior of zicfiss.

  Zimop belongs to mandate extension in RVB23 profile. There may be a 
lot of cpus implement zimop but doesn't implement overlapping zicfiss.  
So disassemble the applications to zimop is appropriate for these cpus.

> At that point they *have* a defined meaning in the standard isa.
>
> So, yes, disassemble as zicifss, but always, not "if it has been 
> implemented in the target cpu".
>
>>>> +        if (((i == 0) || cfg) && guard_func(cfg)) {
>>>
>>> This should be i == 0 || (cfg && guard_func(cfg)).
>>
>> OK. Although I think they are both right.
>
>  i = 0
>  cfg = NULL
>
>    (0 == 0 || NULL) && guard_func(NULL)
> -> (true || false) && guard_func(NULL)
> -> true && guard_func(NULL)
> -> guard_func(NULL)
> -> boom.
>
> Or are you saying it won't go boom because we happen to know the 0th 
> guard_func only returns true
Yes.
> ? There's still no reason to call it...

Agree. Will use this way.

Thanks,
Zhiwei

>
>
> r~
diff mbox series

Patch

diff --git a/disas/riscv.c b/disas/riscv.c
index 9c1e332dde..4075ed6bfe 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -2611,7 +2611,7 @@  static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
             break;
         case 2: op = rv_op_c_li; break;
         case 3:
-            if (dec->cfg->ext_zcmop) {
+            if (dec->cfg && dec->cfg->ext_zcmop) {
                 if ((((inst >> 2) & 0b111111) == 0b100000) &&
                     (((inst >> 11) & 0b11) == 0b0)) {
                     unsigned int cmop_code = 0;
@@ -2712,7 +2712,7 @@  static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 op = rv_op_c_sqsp;
             } else {
                 op = rv_op_c_fsdsp;
-                if (dec->cfg->ext_zcmp && ((inst >> 12) & 0b01)) {
+                if (dec->cfg && dec->cfg->ext_zcmp && ((inst >> 12) & 0b01)) {
                     switch ((inst >> 8) & 0b01111) {
                     case 8:
                         if (((inst >> 4) & 0b01111) >= 4) {
@@ -2738,7 +2738,7 @@  static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 } else {
                     switch ((inst >> 10) & 0b011) {
                     case 0:
-                        if (!dec->cfg->ext_zcmt) {
+                        if (dec->cfg && !dec->cfg->ext_zcmt) {
                             break;
                         }
                         if (((inst >> 2) & 0xFF) >= 32) {
@@ -2748,7 +2748,7 @@  static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                         }
                         break;
                     case 3:
-                        if (!dec->cfg->ext_zcmp) {
+                        if (dec->cfg && !dec->cfg->ext_zcmp) {
                             break;
                         }
                         switch ((inst >> 5) & 0b011) {
@@ -2956,7 +2956,7 @@  static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
             break;
         case 5:
             op = rv_op_auipc;
-            if (dec->cfg->ext_zicfilp &&
+            if (dec->cfg && dec->cfg->ext_zicfilp &&
                 (((inst >> 7) & 0b11111) == 0b00000)) {
                 op = rv_op_lpad;
             }
@@ -4058,7 +4058,7 @@  static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
             case 2: op = rv_op_csrrs; break;
             case 3: op = rv_op_csrrc; break;
             case 4:
-                if (dec->cfg->ext_zimop) {
+                if (dec->cfg && dec->cfg->ext_zimop) {
                     int imm_mop5, imm_mop3, reg_num;
                     if ((extract32(inst, 22, 10) & 0b1011001111)
                         == 0b1000000111) {
@@ -5112,28 +5112,28 @@  static GString *format_inst(size_t tab, rv_decode *dec)
             g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
             break;
         case '3':
-            if (dec->cfg->ext_zfinx) {
+            if (dec->cfg && dec->cfg->ext_zfinx) {
                 g_string_append(buf, rv_ireg_name_sym[dec->rd]);
             } else {
                 g_string_append(buf, rv_freg_name_sym[dec->rd]);
             }
             break;
         case '4':
-            if (dec->cfg->ext_zfinx) {
+            if (dec->cfg && dec->cfg->ext_zfinx) {
                 g_string_append(buf, rv_ireg_name_sym[dec->rs1]);
             } else {
                 g_string_append(buf, rv_freg_name_sym[dec->rs1]);
             }
             break;
         case '5':
-            if (dec->cfg->ext_zfinx) {
+            if (dec->cfg && dec->cfg->ext_zfinx) {
                 g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
             } else {
                 g_string_append(buf, rv_freg_name_sym[dec->rs2]);
             }
             break;
         case '6':
-            if (dec->cfg->ext_zfinx) {
+            if (dec->cfg && dec->cfg->ext_zfinx) {
                 g_string_append(buf, rv_ireg_name_sym[dec->rs3]);
             } else {
                 g_string_append(buf, rv_freg_name_sym[dec->rs3]);
@@ -5439,7 +5439,8 @@  static GString *disasm_inst(rv_isa isa, uint64_t pc, rv_inst inst,
         const rv_opcode_data *opcode_data = decoders[i].opcode_data;
         void (*decode_func)(rv_decode *, rv_isa) = decoders[i].decode_func;
 
-        if (guard_func(cfg)) {
+        /* always_true_p don't dereference cfg */
+        if (((i == 0) || cfg) && guard_func(cfg)) {
             dec.opcode_data = opcode_data;
             decode_func(&dec, isa);
             if (dec.op != rv_op_illegal)