diff mbox series

init/Kconfig: remove CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND

Message ID 20240718120647.1115592-1-mark.rutland@arm.com (mailing list archive)
State New, archived
Headers show
Series init/Kconfig: remove CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND | expand

Commit Message

Mark Rutland July 18, 2024, 12:06 p.m. UTC
Several versions of GCC mis-compile asm goto with outputs. We try to
workaround this, but our workaround is demonstrably incomplete and
liable to result in subtle bugs, especially on arm64 where get_user()
has recently been moved over to using asm goto with outputs.

From discussion(s) with Linus at:

  https://lore.kernel.org/linux-arm-kernel/Zpfv2tnlQ-gOLGac@J2N7QTR9R3.cambridge.arm.com/
  https://lore.kernel.org/linux-arm-kernel/ZpfxLrJAOF2YNqCk@J2N7QTR9R3.cambridge.arm.com/

... it sounds like the best thing to do for now is to remove the workaround and
make CC_HAS_ASM_GOTO_OUTPUT depend on working compiler versions.

The issue was originally reported to GCC by Sean Christopherson:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921

... and Jakub Jelinek fixed this for GCC 14, with the fix backported to
13.3.0, 12.4.0, and 11.5.0.

In the kernel, we tried to workaround broken compilers in commits:

  4356e9f841f7 ("work around gcc bugs with 'asm goto' with outputs")
  68fb3ca0e408 ("update workarounds for gcc "asm goto" issue")

... but the workaround of adding an empty asm("") after the asm volatile
goto(...) demonstrably does not always avoid the problem, as can be seen
in the following test case:

| #define asm_goto_output(x...) \
|         do { asm volatile goto(x); asm (""); } while (0)
|
| #define __good_or_bad(__val, __key)                                     \
| do {                                                                    \
|         __label__ __failed;                                             \
|         unsigned long __tmp;                                            \
|         asm_goto_output(                                                \
|         "       cbnz    %[key], %l[__failed]\n"                         \
|         "       mov     %[val], #0x900d\n"                              \
|         : [val] "=r" (__tmp)                                            \
|         : [key] "r" (__key)                                             \
|         :                                                               \
|         : __failed);                                                    \
|         (__val) = __tmp;                                                \
|         break;                                                          \
| __failed:                                                               \
|         (__val) = 0xbad;                                                \
| } while (0)
|
| unsigned long get_val(unsigned long key);
| unsigned long get_val(unsigned long key)
| {
|         unsigned long val = 0xbad;
|
|         __good_or_bad(val, key);
|
|         return val;
| }

GCC 13.2.0 (at -O2) compiles this to:

| 	cbnz    x0, .Lfailed
| 	mov     x0, #0x900d
| .Lfailed:
| 	ret

GCC 14.1.0 (at -O2) compiles this to:

| 	cbnz    x0, .Lfailed
| 	mov     x0, #0x900d
| 	ret
| .Lfailed:
| 	mov     x0, #0xbad
| 	ret

Note that GCC 13.2.0 erroneously omits the assignment to 'val' in the
error path (even though this does not depend on an output of the asm
goto). GCC 14.1.0 correctly retains the assignment.

This problem can be seen within the kernel with the following test case:

| #include <linux/uaccess.h>
| #include <linux/types.h>
|
| noinline unsigned long test_unsafe_get_user(unsigned long __user *ptr);
| noinline unsigned long test_unsafe_get_user(unsigned long __user *ptr)
| {
|         unsigned long val;
|
|         unsafe_get_user(val, ptr, Efault);
|         return val;
|
| Efault:
|         val = 0x900d;
|         return val;
| }

GCC 13.2.0 (arm64 defconfig) compiles this to:

|         and     x0, x0, #0xff7fffffffffffff
|         ldtr    x0, [x0]
| .Lextable_fixup:
|         ret

GCC 13.2.0 (x86_64 defconfig + MITIGATION_RETPOLINE=n) compiles this to:

|         endbr64
|         mov    (%rdi),%rax
| .Lextable_fixup:
|         ret

... omitting the assignment to 'val' in the error path, and leaving
garbage in the result register returned by the function (which happens
to contain the faulting address in the generated code).

GCC 14.1.0 (arm64 defconfig) compiles this to:

|         and     x0, x0, #0xff7fffffffffffff
|         ldtr    x0, [x0]
|         ret
| .Lextable_fixup:
|         mov     x0, #0x900d                     // #36877
|         ret

GCC 14.1.0 (x86_64 defconfig + MITIGATION_RETPOLINE=n) compiles this to:

|         endbr64
|         mov    (%rdi),%rax
|         ret
| .Lextable_fixup:
|         mov    $0x900d,%eax
|         ret

... retaining the expected assignment to 'val' in the error path.

We don't have a complete and reasonable workaround. While placing empty
asm("") blocks after each goto label *might* be sufficient, we don't
know for certain, this is tedious and error-prone, and there doesn't
seem to be a neat way to wrap this up (which is especially painful for
cases with multiple goto labels).

Avoid this issue by disabling CONFIG_CC_HAS_ASM_GOTO_OUTPUT for
known-broken compiler versions and removing the workaround (along with
the CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND config option).

For the moment I've left the default implementation of asm_goto_output()
unchanged. This should now be redundant since any compiler with the fix
for the clobbering issue whould also have a fix for the (earlier)
volatile issue, but it's far less churny to leave it around, which makes
it easier to backport this patch if necessary.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alex Coplan <alex.coplan@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jakub Jelinek <jakub@gcc.gnu.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Szabolcs Nagy <szabolcs.nagy@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
 include/linux/compiler-gcc.h | 20 --------------------
 init/Kconfig                 | 14 +++++---------
 2 files changed, 5 insertions(+), 29 deletions(-)

Comments

Linus Torvalds July 18, 2024, 4:59 p.m. UTC | #1
On Thu, 18 Jul 2024 at 05:07, Mark Rutland <mark.rutland@arm.com> wrote:
>
>  config CC_HAS_ASM_GOTO_OUTPUT
> +       # Fixed in GCC 14, 13.3, 12.4 and 11.5
> +       # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
> +       default n if CC_IS_GCC && GCC_VERSION < 110500
> +       default n if CC_IS_GCC && GCC_VERSION >= 120000 && GCC_VERSION < 120400
> +       default n if CC_IS_GCC && GCC_VERSION >= 130000 && GCC_VERSION < 130300
>         def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null)

I'll edit this all a bit, because I don't love complicated config entries.

Let's keep the "which gcc versions are scrogged" as a separate config
entry, and then have this just as a

     default n if CC_IS_GCC && GCC_NO_ASM_GOTO_OUTPUTS

because we've been here before with the whole "gcc version tests", and
it got ugly, and we have that whole "CC_NO_ARRAY_BOUNDS" where we went
back and forth on different gcc versions, and it was just really
annoying.

So then we learnt our lesson, and for the stringop overflow code we do

  # Currently, disable -Wstringop-overflow for GCC globally.
  config GCC_NO_STRINGOP_OVERFLOW
          def_bool y

so that we can make any possible future "this gcc version is good /
bad" in one clear place, and we have

  config CC_NO_STRINGOP_OVERFLOW
          bool
          default y if CC_IS_GCC && GCC_NO_STRINGOP_OVERFLOW

which is readable and understandable and if clang - or some other
compiler - were to add their own issues, it would still be readable
and understandable, and not some complicated mess.

Of course, once we _did_ learn our lesson about gcc versions, that
-Wstringop-overflow hasn't actually gotten any more complex
afterwards, so the *one* place we do this right didn't actually need
it. Yet.

              Linus
Linus Torvalds July 18, 2024, 5:59 p.m. UTC | #2
On Thu, 18 Jul 2024 at 09:59, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> Let's keep the "which gcc versions are scrogged" as a separate config
> entry, and then have this just as a
>
>      default n if CC_IS_GCC && GCC_NO_ASM_GOTO_OUTPUTS

Ok, I ended up playing around with this a bit more, and it ended up like

  config GCC_ASM_GOTO_OUTPUT_BROKEN
        bool
        depends on CC_IS_GCC
        default y if GCC_VERSION < 110500
        ..

with then CC_HAS_ASM_GOTO_OUTPUT just having a

        depends on !GCC_ASM_GOTO_OUTPUT_BROKEN

in it. That looks fairly legible to me, and seems to work fine.

I left it all credited to you, since you found all the problems and
wrote that big nice commit log. But it means that if I screwed up in
my edits, you get the blame too. So if that happens, just point haters
at this email and say it's all my fault.

           Linus
Mark Rutland July 19, 2024, 9:29 a.m. UTC | #3
On Thu, Jul 18, 2024 at 10:59:16AM -0700, Linus Torvalds wrote:
> On Thu, 18 Jul 2024 at 09:59, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> > Let's keep the "which gcc versions are scrogged" as a separate config
> > entry, and then have this just as a
> >
> >      default n if CC_IS_GCC && GCC_NO_ASM_GOTO_OUTPUTS
> 
> Ok, I ended up playing around with this a bit more, and it ended up like
> 
>   config GCC_ASM_GOTO_OUTPUT_BROKEN
>         bool
>         depends on CC_IS_GCC
>         default y if GCC_VERSION < 110500
>         ..
> 
> with then CC_HAS_ASM_GOTO_OUTPUT just having a
> 
>         depends on !GCC_ASM_GOTO_OUTPUT_BROKEN
> 
> in it. That looks fairly legible to me, and seems to work fine.
> 
> I left it all credited to you, since you found all the problems and
> wrote that big nice commit log. But it means that if I screwed up in
> my edits, you get the blame too. So if that happens, just point haters
> at this email and say it's all my fault.

Thanks; that all looks good to me.

Just to be sure, I ran a few quick checks (below) and the detection logic looks
sound. I also built and boot-tested defconfig for arm64 and x86_64 with the
most recent broken GCC (13.2.0), and see no problems.

Toolchains which SHOULD work:

| % usellvm 14.0.0 make ARCH=arm64 LLVM=1 -s defconfig
| % grep OUTPUT .config                               
| CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
| CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
| 
| % usellvm 13.0.1 make ARCH=arm64 LLVM=1 -s defconfig
| % grep OUTPUT .config                               
| CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
| 
| % usekorg 14.1.0 make ARCH=arm64 CROSS_COMPILE=aarch64-linux- -s defconfig
| % grep OUTPUT .config                                                     
| CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
| CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
| 
| % usekorg 13.3.0 make ARCH=arm64 CROSS_COMPILE=aarch64-linux- -s defconfig
| % grep OUTPUT .config                                                     
| CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
| CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
| 
| % usekorg 12.4.0 make ARCH=arm64 CROSS_COMPILE=aarch64-linux- -s defconfig
| % grep OUTPUT .config                                                     
| CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
| CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y

Toolchains which SHOULD NOT work

| % usekorg 13.2.0 make ARCH=arm64 CROSS_COMPILE=aarch64-linux- -s defconfig 
| % grep OUTPUT .config                                                     
| CONFIG_GCC_ASM_GOTO_OUTPUT_BROKEN=y
| 
| % usekorg 12.3.0 make ARCH=arm64 CROSS_COMPILE=aarch64-linux- -s defconfig
| % grep OUTPUT .config                                                     
| CONFIG_GCC_ASM_GOTO_OUTPUT_BROKEN=y
| 
| % usekorg 11.4.0 make ARCH=arm64 CROSS_COMPILE=aarch64-linux- -s defconfig
| % grep OUTPUT .config                                                     
| CONFIG_GCC_ASM_GOTO_OUTPUT_BROKEN=y
| 
| % usekorg 10.5.0 make ARCH=arm64 CROSS_COMPILE=aarch64-linux- -s defconfig
| % grep OUTPUT .config                                                     
| CONFIG_GCC_ASM_GOTO_OUTPUT_BROKEN=y

Mark.
diff mbox series

Patch

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index aff92b1d284fd..f805adaa316e9 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -64,26 +64,6 @@ 
 		__builtin_unreachable();	\
 	} while (0)
 
-/*
- * GCC 'asm goto' with outputs miscompiles certain code sequences:
- *
- *   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
- *
- * Work around it via the same compiler barrier quirk that we used
- * to use for the old 'asm goto' workaround.
- *
- * Also, always mark such 'asm goto' statements as volatile: all
- * asm goto statements are supposed to be volatile as per the
- * documentation, but some versions of gcc didn't actually do
- * that for asms with outputs:
- *
- *    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619
- */
-#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND
-#define asm_goto_output(x...) \
-	do { asm volatile goto(x); asm (""); } while (0)
-#endif
-
 #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP)
 #define __HAVE_BUILTIN_BSWAP32__
 #define __HAVE_BUILTIN_BSWAP64__
diff --git a/init/Kconfig b/init/Kconfig
index febdea2afc3be..e236498911870 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -82,6 +82,11 @@  config CC_CAN_LINK_STATIC
 	default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static)
 
 config CC_HAS_ASM_GOTO_OUTPUT
+	# Fixed in GCC 14, 13.3, 12.4 and 11.5
+	# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
+	default n if CC_IS_GCC && GCC_VERSION < 110500
+	default n if CC_IS_GCC && GCC_VERSION >= 120000 && GCC_VERSION < 120400
+	default n if CC_IS_GCC && GCC_VERSION >= 130000 && GCC_VERSION < 130300
 	def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null)
 
 config CC_HAS_ASM_GOTO_TIED_OUTPUT
@@ -89,15 +94,6 @@  config CC_HAS_ASM_GOTO_TIED_OUTPUT
 	# Detect buggy gcc and clang, fixed in gcc-11 clang-14.
 	def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null)
 
-config GCC_ASM_GOTO_OUTPUT_WORKAROUND
-	bool
-	depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT
-	# Fixed in GCC 14, 13.3, 12.4 and 11.5
-	# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
-	default y if GCC_VERSION < 110500
-	default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400
-	default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300
-
 config TOOLS_SUPPORT_RELR
 	def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)