diff mbox series

tcg: Mark tcg helpers noinline to avoid an issue with LTO

Message ID 20230119165006.742073-1-richard.henderson@linaro.org (mailing list archive)
State New, archived
Headers show
Series tcg: Mark tcg helpers noinline to avoid an issue with LTO | expand

Commit Message

Richard Henderson Jan. 19, 2023, 4:50 p.m. UTC
Marking helpers __attribute__((noinline)) prevents an issue
with GCC's ipa-split pass under --enable-lto.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1454
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---

Idan, please give this a try.  By inspection, it fixes the issue
with s390x's helper_divs32 function.


r~

---
 include/exec/helper-proto.h | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

Comments

Philippe Mathieu-Daudé Jan. 19, 2023, 4:55 p.m. UTC | #1
On 19/1/23 17:50, Richard Henderson wrote:
> Marking helpers __attribute__((noinline)) prevents an issue
> with GCC's ipa-split pass under --enable-lto.
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1454
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> 
> Idan, please give this a try.  By inspection, it fixes the issue
> with s390x's helper_divs32 function.
> 
> 
> r~
> 
> ---
>   include/exec/helper-proto.h | 32 ++++++++++++++++++++++++--------
>   1 file changed, 24 insertions(+), 8 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Peter Maydell Jan. 19, 2023, 5:11 p.m. UTC | #2
On Thu, 19 Jan 2023 at 16:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Marking helpers __attribute__((noinline)) prevents an issue
> with GCC's ipa-split pass under --enable-lto.
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1454
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>
> Idan, please give this a try.  By inspection, it fixes the issue
> with s390x's helper_divs32 function.
>
>
> r~
>
> ---
>  include/exec/helper-proto.h | 32 ++++++++++++++++++++++++--------
>  1 file changed, 24 insertions(+), 8 deletions(-)
>
> diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h
> index c4b1bda632..7a3f04b58c 100644
> --- a/include/exec/helper-proto.h
> +++ b/include/exec/helper-proto.h
> @@ -6,34 +6,49 @@
>
>  #include "exec/helper-head.h"
>
> +/*
> + * Work around an issue with --enable-lto, in which GCC's ipa-split pass
> + * decides to split out the noreturn code paths that raise an exception,
> + * taking the __builtin_return_address() along into the new function,
> + * where it no longer computes a value that returns to TCG generated code.
> + * Despite the name, the noinline attribute affects splitter, so this
> + * prevents the optimization in question.  Given that helpers should not
> + * otherwise be called directly, this should have any other visible effect.
> + *
> + * See https://gitlab.com/qemu-project/qemu/-/issues/1454
> + */

Do we have any chance of persuading the gcc folks to document
that 'noinline' also affects function splitting ? That would
reduce the chance that they decide to take out the attribute
check at some future date...

thanks
-- PMM
Idan Horowitz Jan. 19, 2023, 5:32 p.m. UTC | #3
On 19/01/2023 18:50, Richard Henderson wrote:
> Marking helpers __attribute__((noinline)) prevents an issue
> with GCC's ipa-split pass under --enable-lto.
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1454
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> 
> Idan, please give this a try.  By inspection, it fixes the issue
> with s390x's helper_divs32 function.
> 
> 
> r~

This resolves the issue:

000000000023f9b0 <helper_divs32>:
   23f9b0:       48 83 ec 08             sub    rsp,0x8
   23f9b4:       85 d2                   test   edx,edx
   23f9b6:       74 22                   je     23f9da <helper_divs32+0x2a>
   23f9b8:       48 89 f0                mov    rax,rsi
   23f9bb:       48 63 ca                movsxd rcx,edx
   23f9be:       48 99                   cqo
   23f9c0:       48 f7 f9                idiv   rcx
   23f9c3:       48 89 97 10 03 00 00    mov    QWORD PTR [rdi+0x310],rdx
   23f9ca:       48 63 d0                movsxd rdx,eax
   23f9cd:       48 39 c2                cmp    rdx,rax
   23f9d0:       75 08                   jne    23f9da <helper_divs32+0x2a>
   23f9d2:       48 89 d0                mov    rax,rdx
   23f9d5:       48 83 c4 08             add    rsp,0x8
   23f9d9:       c3                      ret
   23f9da:       48 8b 54 24 08          mov    rdx,QWORD PTR [rsp+0x8]
   23f9df:       be 09 00 00 00          mov    esi,0x9
   23f9e4:       e8 a7 df ff ff          call   23d990 <tcg_s390_program_interrupt>

Thanks!

Tested-by: Idan Horowitz <idan.horowitz@gmail.com>

> ---
>  include/exec/helper-proto.h | 32 ++++++++++++++++++++++++--------
>  1 file changed, 24 insertions(+), 8 deletions(-)
>
Richard Henderson Jan. 19, 2023, 8:50 p.m. UTC | #4
On 1/19/23 07:11, Peter Maydell wrote:
> Do we have any chance of persuading the gcc folks to document
> that 'noinline' also affects function splitting ? That would
> reduce the chance that they decide to take out the attribute
> check at some future date...

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108470


r~
diff mbox series

Patch

diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h
index c4b1bda632..7a3f04b58c 100644
--- a/include/exec/helper-proto.h
+++ b/include/exec/helper-proto.h
@@ -6,34 +6,49 @@ 
 
 #include "exec/helper-head.h"
 
+/*
+ * Work around an issue with --enable-lto, in which GCC's ipa-split pass
+ * decides to split out the noreturn code paths that raise an exception,
+ * taking the __builtin_return_address() along into the new function,
+ * where it no longer computes a value that returns to TCG generated code.
+ * Despite the name, the noinline attribute affects splitter, so this
+ * prevents the optimization in question.  Given that helpers should not
+ * otherwise be called directly, this should have any other visible effect.
+ *
+ * See https://gitlab.com/qemu-project/qemu/-/issues/1454
+ */
+#define DEF_HELPER_ATTR  __attribute__((noinline))
+
 #define DEF_HELPER_FLAGS_0(name, flags, ret) \
-dh_ctype(ret) HELPER(name) (void);
+dh_ctype(ret) HELPER(name) (void) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1));
+dh_ctype(ret) HELPER(name) (dh_ctype(t1)) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2));
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2)) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3));
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), \
+                            dh_ctype(t3)) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \
 dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-                                   dh_ctype(t4));
+                            dh_ctype(t4)) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5) \
 dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-                            dh_ctype(t4), dh_ctype(t5));
+                            dh_ctype(t4), dh_ctype(t5)) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_6(name, flags, ret, t1, t2, t3, t4, t5, t6) \
 dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-                            dh_ctype(t4), dh_ctype(t5), dh_ctype(t6));
+                            dh_ctype(t4), dh_ctype(t5), \
+                            dh_ctype(t6)) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \
 dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
                             dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \
-                            dh_ctype(t7));
+                            dh_ctype(t7)) DEF_HELPER_ATTR;
 
 #define IN_HELPER_PROTO
 
@@ -51,5 +66,6 @@  dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
 #undef DEF_HELPER_FLAGS_5
 #undef DEF_HELPER_FLAGS_6
 #undef DEF_HELPER_FLAGS_7
+#undef DEF_HELPER_ATTR
 
 #endif /* HELPER_PROTO_H */