diff mbox series

[RFC/RFT] SCS:Add gcc plugin to support Shadow Call Stack

Message ID 1632069436-25075-1-git-send-email-ashimida@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series [RFC/RFT] SCS:Add gcc plugin to support Shadow Call Stack | expand

Commit Message

Dan Li Sept. 19, 2021, 4:37 p.m. UTC
The Clang-based shadow call stack protection has been integrated into the
mainline, but kernel compiled by gcc cannot enable this feature for now.

This Patch supports gcc-based SCS protection by adding a plugin.

For each function that x30 will be pushed onto the stack during execution,
this plugin:
1) insert "str x30, [x18], #8" at the entry of the function to save x30
   to current SCS
2) insert "ldr x30, [x18, #-8]!"  before the exit of this function to
   restore x30

At present, this patch has been successfully compiled(based on defconfig)
in the following gcc versions(if plugin is supported) and startup normally:
* 6.3.1
* 7.3.1
* 7.5.0
* 8.2.1
* 9.2.0
* 10.3.1

with commands:
make ARCH=arm64 defconfig
./scripts/config -e CONFIG_GCC_PLUGINS -e CONFIG_GCC_PLUGIN_SHADOW_CALL_STACK
make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu-

---
FYI:
1) The function can be used to test whether the shadow stack is effective:
//noinline void __noscs scs_test(void)
noinline void scs_test(void)
{
    register unsigned long *sp asm("sp");
    unsigned long * lr = sp + 1;

    asm volatile("":::"x30");
    *lr = 0;
}

ffff800010012670 <scs_test>:
ffff800010012670:       f800865e        str     x30, [x18], #8
ffff800010012674:       a9bf7bfd        stp     x29, x30, [sp, #-16]!
ffff800010012678:       910003fd        mov     x29, sp
ffff80001001267c:       f90007ff        str     xzr, [sp, #8]
ffff800010012680:       a8c17bfd        ldp     x29, x30, [sp], #16
ffff800010012684:       f85f8e5e        ldr     x30, [x18, #-8]!
ffff800010012688:       d65f03c0        ret

If SCS protection is enabled, this function will return normally.
If the function has __noscs attribute (scs disabled), it will crash due to 0
address access.

2) Other tests are in progress ...

Signed-off-by: Dan Li <ashimida@linux.alibaba.com>
---
 Makefile                               |   2 +-
 arch/Kconfig                           |   2 +-
 include/linux/compiler-gcc.h           |   4 +
 scripts/Makefile.gcc-plugins           |   4 +
 scripts/gcc-plugins/Kconfig            |   8 ++
 scripts/gcc-plugins/arm64_scs_plugin.c | 256 +++++++++++++++++++++++++++++++++
 6 files changed, 274 insertions(+), 2 deletions(-)
 create mode 100644 scripts/gcc-plugins/arm64_scs_plugin.c

Comments

Nathan Chancellor Sept. 19, 2021, 9:45 p.m. UTC | #1
Hi Dan,

A couple of initial high level comments, I do not really feel qualified
to review the plugin itself.

On Mon, Sep 20, 2021 at 12:37:16AM +0800, Dan Li wrote:
> The Clang-based shadow call stack protection has been integrated into the
> mainline, but kernel compiled by gcc cannot enable this feature for now.
> 
> This Patch supports gcc-based SCS protection by adding a plugin.
> 
> For each function that x30 will be pushed onto the stack during execution,
> this plugin:
> 1) insert "str x30, [x18], #8" at the entry of the function to save x30
>    to current SCS
> 2) insert "ldr x30, [x18, #-8]!"  before the exit of this function to
>    restore x30
> 
> At present, this patch has been successfully compiled(based on defconfig)
> in the following gcc versions(if plugin is supported) and startup normally:
> * 6.3.1
> * 7.3.1
> * 7.5.0
> * 8.2.1
> * 9.2.0
> * 10.3.1
> 
> with commands:
> make ARCH=arm64 defconfig
> ./scripts/config -e CONFIG_GCC_PLUGINS -e CONFIG_GCC_PLUGIN_SHADOW_CALL_STACK
> make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu-
> 
> ---
> FYI:
> 1) The function can be used to test whether the shadow stack is effective:
> //noinline void __noscs scs_test(void)
> noinline void scs_test(void)
> {
>     register unsigned long *sp asm("sp");
>     unsigned long * lr = sp + 1;
> 
>     asm volatile("":::"x30");
>     *lr = 0;
> }
> 
> ffff800010012670 <scs_test>:
> ffff800010012670:       f800865e        str     x30, [x18], #8
> ffff800010012674:       a9bf7bfd        stp     x29, x30, [sp, #-16]!
> ffff800010012678:       910003fd        mov     x29, sp
> ffff80001001267c:       f90007ff        str     xzr, [sp, #8]
> ffff800010012680:       a8c17bfd        ldp     x29, x30, [sp], #16
> ffff800010012684:       f85f8e5e        ldr     x30, [x18, #-8]!
> ffff800010012688:       d65f03c0        ret
> 
> If SCS protection is enabled, this function will return normally.
> If the function has __noscs attribute (scs disabled), it will crash due to 0
> address access.
> 
> 2) Other tests are in progress ...
> 
> Signed-off-by: Dan Li <ashimida@linux.alibaba.com>
> ---
>  Makefile                               |   2 +-
>  arch/Kconfig                           |   2 +-
>  include/linux/compiler-gcc.h           |   4 +
>  scripts/Makefile.gcc-plugins           |   4 +
>  scripts/gcc-plugins/Kconfig            |   8 ++
>  scripts/gcc-plugins/arm64_scs_plugin.c | 256 +++++++++++++++++++++++++++++++++
>  6 files changed, 274 insertions(+), 2 deletions(-)
>  create mode 100644 scripts/gcc-plugins/arm64_scs_plugin.c
> 
> diff --git a/Makefile b/Makefile
> index 61741e9..0f0121a 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -924,7 +924,7 @@ LDFLAGS_vmlinux += --gc-sections
>  endif
>  
>  ifdef CONFIG_SHADOW_CALL_STACK

I would rather see this become

ifeq ($(CONFIG_SHADOW_CALL_STACK)$(CONFIG_CC_IS_CLANG), yy)
...
endif

rather than just avoiding assigning to CC_FLAGS_SCS.

However, how does disabling the shadow call stack plugin work for a
whole translation unit or directory? There are a few places where
CC_FLAGS_SCS are filtered out and I am not sure I see where that happens
here? It looks like the plugin has a disabled option but I do not see it
hooked in anywhere.

> -CC_FLAGS_SCS	:= -fsanitize=shadow-call-stack
> +CC_FLAGS_SCS	:= $(if $(CONFIG_CC_IS_CLANG),-fsanitize=shadow-call-stack,)
>  KBUILD_CFLAGS	+= $(CC_FLAGS_SCS)
>  export CC_FLAGS_SCS
>  endif
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 98db634..81ff127 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -594,7 +594,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
>  
>  config SHADOW_CALL_STACK
>  	bool "Clang Shadow Call Stack"
> -	depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
> +	depends on (CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK) || GCC_PLUGIN_SHADOW_CALL_STACK

Is this logic right? SHADOW_CALL_STACK is only supported by arm64 (as
they set ARCH_SUPPORTS_SHADOW_CALL_STACK) but now you are enabling it
for any architecture, even though it seems like it still only works on
arm64. I think this wants to be

depends on (CC_IS_CLANG || GCC_PLUGIN_SHADOW_CALL_STACK) && ARCH_SUPPORTS_SHADOW_CALL_STACK

>  	depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
>  	help
>  	  This option enables Clang's Shadow Call Stack, which uses a
> diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> index cb9217f..426c8e5 100644
> --- a/include/linux/compiler-gcc.h
> +++ b/include/linux/compiler-gcc.h
> @@ -50,6 +50,10 @@
>  #define __latent_entropy __attribute__((latent_entropy))
>  #endif
>  
> +#if defined(SHADOW_CALL_STACK_PLUGIN) && !defined(__CHECKER__)
> +#define __noscs __attribute__((no_shadow_call_stack))
> +#endif
> +
>  /*
>   * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
>   * confuse the stack allocation in gcc, leading to overly large stack
> diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
> index 952e468..eeaf2c6 100644
> --- a/scripts/Makefile.gcc-plugins
> +++ b/scripts/Makefile.gcc-plugins
> @@ -46,6 +46,10 @@ ifdef CONFIG_GCC_PLUGIN_ARM_SSP_PER_TASK
>  endif
>  export DISABLE_ARM_SSP_PER_TASK_PLUGIN
>  
> +gcc-plugin-$(CONFIG_GCC_PLUGIN_SHADOW_CALL_STACK) += arm64_scs_plugin.so
> +gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_SHADOW_CALL_STACK)	\
> +		+= -DSHADOW_CALL_STACK_PLUGIN
> +
>  # All the plugin CFLAGS are collected here in case a build target needs to
>  # filter them out of the KBUILD_CFLAGS.
>  GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
> diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
> index ab9eb4c..2534195e 100644
> --- a/scripts/gcc-plugins/Kconfig
> +++ b/scripts/gcc-plugins/Kconfig
> @@ -19,6 +19,14 @@ menuconfig GCC_PLUGINS
>  
>  if GCC_PLUGINS
>  
> +config GCC_PLUGIN_SHADOW_CALL_STACK
> +	bool "GCC Shadow Call Stack plugin"

This should also have a

depends on ARCH_SUPPORTS_SHADOW_CALL_STACK

if you are selecting SHADOW_CALL_STACK, as selecting does not account
for dependencies.

> +	select SHADOW_CALL_STACK
> +	help
> +	  This plugin is used to support the kernel CONFIG_SHADOW_CALL_STACK
> +	  compiled by gcc. Its principle is basically the same as that of CLANG.
> +	  For more information, please refer to "config SHADOW_CALL_STACK"
> +
>  config GCC_PLUGIN_CYC_COMPLEXITY
>  	bool "Compute the cyclomatic complexity of a function" if EXPERT
>  	depends on !COMPILE_TEST	# too noisy

Cheers,
Nathan
Ard Biesheuvel Sept. 20, 2021, 7:18 a.m. UTC | #2
Hi Dan,

On Sun, 19 Sept 2021 at 18:37, Dan Li <ashimida@linux.alibaba.com> wrote:
>
> The Clang-based shadow call stack protection has been integrated into the
> mainline, but kernel compiled by gcc cannot enable this feature for now.
>
> This Patch supports gcc-based SCS protection by adding a plugin.
>

Thanks for working on this. I had a stab at this myself about 2 years
ago and couldn't make it work.

> For each function that x30 will be pushed onto the stack during execution,
> this plugin:
> 1) insert "str x30, [x18], #8" at the entry of the function to save x30
>    to current SCS
> 2) insert "ldr x30, [x18, #-8]!"  before the exit of this function to
>    restore x30
>

This logic seems sound to me, but it would be nice if someone more
familiar with Clang's implementation could confirm that it is really
this simple.

Looking at your plugin, there is an issue with tail calls, and I don't
think Clang simply disables those altogether as well, right?

> At present, this patch has been successfully compiled(based on defconfig)
> in the following gcc versions(if plugin is supported) and startup normally:
> * 6.3.1
> * 7.3.1
> * 7.5.0
> * 8.2.1
> * 9.2.0
> * 10.3.1
>
> with commands:
> make ARCH=arm64 defconfig
> ./scripts/config -e CONFIG_GCC_PLUGINS -e CONFIG_GCC_PLUGIN_SHADOW_CALL_STACK
> make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu-
>
> ---
> FYI:
> 1) The function can be used to test whether the shadow stack is effective:
> //noinline void __noscs scs_test(void)
> noinline void scs_test(void)
> {
>     register unsigned long *sp asm("sp");
>     unsigned long * lr = sp + 1;
>
>     asm volatile("":::"x30");
>     *lr = 0;
> }
>
> ffff800010012670 <scs_test>:
> ffff800010012670:       f800865e        str     x30, [x18], #8
> ffff800010012674:       a9bf7bfd        stp     x29, x30, [sp, #-16]!
> ffff800010012678:       910003fd        mov     x29, sp
> ffff80001001267c:       f90007ff        str     xzr, [sp, #8]
> ffff800010012680:       a8c17bfd        ldp     x29, x30, [sp], #16
> ffff800010012684:       f85f8e5e        ldr     x30, [x18, #-8]!
> ffff800010012688:       d65f03c0        ret
>
> If SCS protection is enabled, this function will return normally.
> If the function has __noscs attribute (scs disabled), it will crash due to 0
> address access.
>
> 2) Other tests are in progress ...
>
> Signed-off-by: Dan Li <ashimida@linux.alibaba.com>
> ---
>  Makefile                               |   2 +-
>  arch/Kconfig                           |   2 +-
>  include/linux/compiler-gcc.h           |   4 +
>  scripts/Makefile.gcc-plugins           |   4 +
>  scripts/gcc-plugins/Kconfig            |   8 ++
>  scripts/gcc-plugins/arm64_scs_plugin.c | 256 +++++++++++++++++++++++++++++++++
>  6 files changed, 274 insertions(+), 2 deletions(-)
>  create mode 100644 scripts/gcc-plugins/arm64_scs_plugin.c
>
> diff --git a/Makefile b/Makefile
> index 61741e9..0f0121a 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -924,7 +924,7 @@ LDFLAGS_vmlinux += --gc-sections
>  endif
>
>  ifdef CONFIG_SHADOW_CALL_STACK
> -CC_FLAGS_SCS   := -fsanitize=shadow-call-stack
> +CC_FLAGS_SCS   := $(if $(CONFIG_CC_IS_CLANG),-fsanitize=shadow-call-stack,)

This variable should contain whatever needs to be added to the
compiler comamand line
>  KBUILD_CFLAGS  += $(CC_FLAGS_SCS)
>  export CC_FLAGS_SCS
>  endif
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 98db634..81ff127 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -594,7 +594,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
>
>  config SHADOW_CALL_STACK
>         bool "Clang Shadow Call Stack"
> -       depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
> +       depends on (CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK) || GCC_PLUGIN_SHADOW_CALL_STACK

This logic needs to be defined in such a way that a builtin
implementation provided by GCC will take precedence once it becomes
available.

>         depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
>         help
>           This option enables Clang's Shadow Call Stack, which uses a
> diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> index cb9217f..426c8e5 100644
> --- a/include/linux/compiler-gcc.h
> +++ b/include/linux/compiler-gcc.h
> @@ -50,6 +50,10 @@
>  #define __latent_entropy __attribute__((latent_entropy))
>  #endif
>
> +#if defined(SHADOW_CALL_STACK_PLUGIN) && !defined(__CHECKER__)
> +#define __noscs __attribute__((no_shadow_call_stack))
> +#endif
> +

>  /*
>   * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
>   * confuse the stack allocation in gcc, leading to overly large stack
> diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
> index 952e468..eeaf2c6 100644
> --- a/scripts/Makefile.gcc-plugins
> +++ b/scripts/Makefile.gcc-plugins
> @@ -46,6 +46,10 @@ ifdef CONFIG_GCC_PLUGIN_ARM_SSP_PER_TASK
>  endif
>  export DISABLE_ARM_SSP_PER_TASK_PLUGIN
>
> +gcc-plugin-$(CONFIG_GCC_PLUGIN_SHADOW_CALL_STACK) += arm64_scs_plugin.so
> +gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_SHADOW_CALL_STACK)       \
> +               += -DSHADOW_CALL_STACK_PLUGIN
> +
>  # All the plugin CFLAGS are collected here in case a build target needs to
>  # filter them out of the KBUILD_CFLAGS.
>  GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
> diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
> index ab9eb4c..2534195e 100644
> --- a/scripts/gcc-plugins/Kconfig
> +++ b/scripts/gcc-plugins/Kconfig
> @@ -19,6 +19,14 @@ menuconfig GCC_PLUGINS
>
>  if GCC_PLUGINS
>
> +config GCC_PLUGIN_SHADOW_CALL_STACK
> +       bool "GCC Shadow Call Stack plugin"
> +       select SHADOW_CALL_STACK

You shouldn't 'select' something like this if the symbol has its own
dependencies which may be unsatisfied, as this causes a Kconfig
warning. Also, en/disabling shadow call stacks for the architecture
should be done from the arch's 'kernel features' menu, it shouldn't be
buried in the GCC plugins menu.

> +       help
> +         This plugin is used to support the kernel CONFIG_SHADOW_CALL_STACK
> +         compiled by gcc. Its principle is basically the same as that of CLANG.
> +         For more information, please refer to "config SHADOW_CALL_STACK"
> +
>  config GCC_PLUGIN_CYC_COMPLEXITY
>         bool "Compute the cyclomatic complexity of a function" if EXPERT
>         depends on !COMPILE_TEST        # too noisy
> diff --git a/scripts/gcc-plugins/arm64_scs_plugin.c b/scripts/gcc-plugins/arm64_scs_plugin.c
> new file mode 100644
> index 0000000..c5a66140
> --- /dev/null
> +++ b/scripts/gcc-plugins/arm64_scs_plugin.c
> @@ -0,0 +1,256 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include "gcc-common.h"
> +
> +#define v_info(fmt, ...)                                                       \
> +       do {                                                                    \
> +               if (verbose)                                                    \
> +                       fprintf(stderr, "[SCS]: " fmt,  ## __VA_ARGS__);        \
> +       } while (0)
> +
> +#define NOSCS_ATTR_STR  "no_shadow_call_stack"
> +#define SCS_ASM_PUSH_STR "str x30, [x18], #8\n\t"
> +#define SCS_ASM_POP_STR  "ldr x30, [x18, #-8]!\n\t"
> +
> +__visible int plugin_is_GPL_compatible;
> +
> +static struct plugin_info arm64_scs_plugin_info = {
> +       .version        = "20210926vanilla",

I will respond to this obvious invitation at bikeshedding by saying
that 'salted caramel' is clearly the superior flavor of ice cream.

> +       .help           = "disable\tdo not activate plugin\n"
> +                         "verbose\tprint all debug infos\n",
> +};
> +
> +static bool verbose;
> +
> +static rtx gen_scs_push(location_t loc)
> +{
> +       rtx insn = gen_rtx_ASM_INPUT_loc(VOIDmode, ggc_strdup(SCS_ASM_PUSH_STR), loc);
> +
> +       MEM_VOLATILE_P(insn) = 1;
> +       return insn;
> +}
> +
> +static rtx gen_scs_pop(location_t loc)
> +{
> +       rtx insn = gen_rtx_ASM_INPUT_loc(VOIDmode, ggc_strdup(SCS_ASM_POP_STR), loc);
> +
> +       MEM_VOLATILE_P(insn) = 1;
> +       return insn;
> +}
> +
> +static bool arm64_scs_gate(void)
> +{
> +       bool is_ignored;
> +
> +#if BUILDING_GCC_VERSION >= 8002
> +       is_ignored = !cfun->machine->frame.emit_frame_chain;
> +#else
> +       is_ignored = !frame_pointer_needed;
> +#endif
> +
> +       /* No need to insert protection code into functions that do not push LR into stack */
> +       if (is_ignored) {
> +               v_info("No protection code inserted into func:%s in file:%s\n",
> +                       get_name(current_function_decl), main_input_filename);
> +               return 0;
> +       }
> +
> +       gcc_assert(cfun->machine->frame.wb_candidate2 == R30_REGNUM);
> +
> +       /* Don't insert protection code into functions with NOSCS_ATTR_STR attribute */
> +       if (lookup_attribute(NOSCS_ATTR_STR, DECL_ATTRIBUTES(current_function_decl))) {
> +               v_info("No protection code inserted into %s func:%s in file:%s\n", NOSCS_ATTR_STR,
> +                               get_name(current_function_decl), main_input_filename);
> +               return 0;
> +       }
> +       return 1;
> +}
> +
> +enum scs_state {
> +       /* The first valid instruction has not been found in the current instruction sequence */
> +       SCS_SEARCHING_FIRST_INSN,
> +       /* Currently searching for the return rtx instruction in this function */
> +       SCS_SEARCHING_FUNC_RETURN,
> +       /* Found an EPILOGUE_BEGIN before the function return instruction */
> +       SCS_FOUND_ONE_EPILOGUE_NOTE,
> +};
> +
> +static unsigned int arm64_scs_execute(void)
> +{
> +       rtx_insn *insn;
> +       enum scs_state state = SCS_SEARCHING_FIRST_INSN;
> +
> +       for (insn = get_insns(); insn; insn = NEXT_INSN(insn)) {
> +               rtx mark = NULL;
> +
> +               switch (GET_CODE(insn)) {
> +               case NOTE:
> +               case BARRIER:
> +               case CODE_LABEL:
> +               case INSN:
> +               case DEBUG_INSN:
> +               case JUMP_INSN:
> +               case JUMP_TABLE_DATA:
> +                       break;
> +               case CALL_INSN:
> +                       if (SIBLING_CALL_P(insn)) {
> +                               error(G_("Sibling call found in func:%s, file:%s\n"),
> +                                               get_name(current_function_decl),
> +                                               main_input_filename);
> +                               gcc_unreachable();
> +                       }

Sibling calls are an important optimization, not only for performance
but also for stack utilization, so this needs to be fixed. Can you
elaborate on the issue you are working around here?

> +                       break;
> +               default:
> +                       error(G_("Invalid rtx_insn seqs found with type:%s in func:%s, file:%s\n"),
> +                                       GET_RTX_NAME(GET_CODE(insn)),
> +                                       get_name(current_function_decl), main_input_filename);
> +                       gcc_unreachable();
> +                       break;
> +               }
> +
> +               if (state == SCS_SEARCHING_FIRST_INSN) {
> +                       /* A function that needs to be instrumented should not found epilogue
> +                        * before its first insn
> +                        */
> +                       gcc_assert(!(NOTE_P(insn) && (NOTE_KIND(insn) == NOTE_INSN_EPILOGUE_BEG)));
> +
> +                       if (NOTE_P(insn) || INSN_DELETED_P(insn))
> +                               continue;
> +
> +                       state = SCS_SEARCHING_FUNC_RETURN;
> +
> +                       /* Insert scs pop before the first instruction found */
> +                       mark = gen_scs_push(RESERVED_LOCATION_COUNT);
> +                       emit_insn_before(mark, insn);
> +               }
> +
> +               /* Find the corresponding epilogue before 'RETURN' instruction (if any) */
> +               if (state == SCS_SEARCHING_FUNC_RETURN) {
> +                       if (NOTE_P(insn) && (NOTE_KIND(insn) == NOTE_INSN_EPILOGUE_BEG)) {
> +                               state = SCS_FOUND_ONE_EPILOGUE_NOTE;
> +                               continue;
> +                       }
> +               }
> +
> +               if (!JUMP_P(insn))
> +                       continue;
> +
> +               /* A function return insn was found */
> +               if (ANY_RETURN_P(PATTERN(insn))) {
> +                       /* There should be an epilogue before 'RETURN' inst */
> +                       if (GET_CODE(PATTERN(insn)) == RETURN) {
> +                               gcc_assert(state == SCS_FOUND_ONE_EPILOGUE_NOTE);
> +                               state = SCS_SEARCHING_FUNC_RETURN;
> +                       }
> +
> +                       /* There is no epilogue before 'SIMPLE_RETURN' insn */
> +                       if (GET_CODE(PATTERN(insn)) == SIMPLE_RETURN)
> +                               gcc_assert(state == SCS_SEARCHING_FUNC_RETURN);

These assert()s will crash the compiler if the RTL doesn't have quite
the right structure, correct? Could we issue a warning instead, saying
function 'x' could not be handled, and back out gracefully (i.e.,
don't insert the push either)?

> +
> +                       /* Insert scs pop instruction(s) before return insn */
> +                       mark = gen_scs_pop(RESERVED_LOCATION_COUNT);
> +                       emit_insn_before(mark, insn);
> +               }
> +       }
> +       return 0;
> +}
> +
> +static tree handle_noscs_attribute(tree *node, tree name, tree args __unused, int flags,
> +               bool *no_add_attrs)
> +{
> +       *no_add_attrs = true;
> +
> +       gcc_assert(DECL_P(*node));
> +       switch (TREE_CODE(*node)) {
> +       default:
> +               error(G_("%qE attribute can be applies to function decl only (%qE)"), name, *node);
> +               gcc_unreachable();
> +
> +       case FUNCTION_DECL:     /* the attribute is only used for function declarations */
> +               break;
> +       }
> +
> +       *no_add_attrs = false;

I'm not familiar with this idiom: what is the purpose of setting this
to true initially and then to false again when the expected flow
through the function is to do nothing at all?

> +       return NULL_TREE;
> +}
> +
> +static struct attribute_spec noscs_attr = {};
> +
> +static void scs_register_attributes(void *event_data __unused, void *data __unused)
> +{
> +       noscs_attr.name = NOSCS_ATTR_STR;
> +       noscs_attr.decl_required = true;
> +       noscs_attr.handler = handle_noscs_attribute;
> +       register_attribute(&noscs_attr);
> +}
> +
> +static void (*old_override_options_after_change)(void);
> +
> +static void scs_override_options_after_change(void)
> +{
> +       if (old_override_options_after_change)
> +               old_override_options_after_change();
> +
> +       flag_optimize_sibling_calls = 0;
> +}
> +
> +static void callback_before_start_unit(void *gcc_data __unused, void *user_data __unused)
> +{
> +       /* Turn off sibling call to avoid inserting duplicate scs pop codes */

Sibling calls will restore x30 before the calk, right? So where do the
duplicate pops come from?

> +       old_override_options_after_change = targetm.override_options_after_change;
> +       targetm.override_options_after_change = scs_override_options_after_change;
> +
> +       flag_optimize_sibling_calls = 0;

Do we need this twice?

> +}
> +
> +#define PASS_NAME arm64_scs
> +#define TODO_FLAGS_FINISH (TODO_dump_func | TODO_verify_rtl_sharing)
> +#include "gcc-generate-rtl-pass.h"
> +
> +__visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version)
> +{
> +       int i;
> +       const char * const plugin_name = plugin_info->base_name;
> +       const int argc = plugin_info->argc;
> +       const struct plugin_argument * const argv = plugin_info->argv;
> +       bool enable = true;
> +
> +       PASS_INFO(arm64_scs, "shorten", 1, PASS_POS_INSERT_BEFORE);
> +
> +       if (!plugin_default_version_check(version, &gcc_version)) {
> +               error(G_("Incompatible gcc/plugin versions"));
> +               return 1;
> +       }
> +
> +       if (strncmp(lang_hooks.name, "GNU C", 5) && !strncmp(lang_hooks.name, "GNU C+", 6)) {
> +               inform(UNKNOWN_LOCATION, G_("%s supports C only, not %s"), plugin_name,
> +                               lang_hooks.name);
> +               enable = false;
> +       }
> +

Do we need this check?

> +       for (i = 0; i < argc; ++i) {
> +               if (!strcmp(argv[i].key, "disable")) {
> +                       enable = false;
> +                       continue;
> +               }
> +               if (!strcmp(argv[i].key, "verbose")) {
> +                       verbose = true;
> +                       continue;
> +               }
> +               error(G_("unknown option '-fplugin-arg-%s-%s'"), plugin_name, argv[i].key);
> +       }
> +
> +       register_callback(plugin_name, PLUGIN_INFO, NULL, &arm64_scs_plugin_info);
> +
> +       register_callback(plugin_name, PLUGIN_ATTRIBUTES, scs_register_attributes, NULL);
> +
> +       if (!enable) {
> +               v_info("Plugin disabled for file:%s\n", main_input_filename);
> +               return 0;
> +       }
> +
> +       register_callback(plugin_name, PLUGIN_START_UNIT, callback_before_start_unit, NULL);
> +
> +       register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &arm64_scs_pass_info);
> +
> +       return 0;
> +}
> --
> 2.7.4
>
Dan Li Sept. 20, 2021, 6:07 p.m. UTC | #3
Hi Nathan,

Thanks for your comments.
I rewrite the configuration as follows:

1) Change the plugin to be enabled by default, and add this option to CC_FLAGS_SCS to keep its behavior consistent with clang
---
diff --git a/Makefile b/Makefile
@@ -923,12 +923,6 @@ KBUILD_CFLAGS_KERNEL += -ffunction-sections -fdata-sections
-ifdef CONFIG_SHADOW_CALL_STACK
-CC_FLAGS_SCS	:= -fsanitize=shadow-call-stack
-KBUILD_CFLAGS	+= $(CC_FLAGS_SCS)
-export CC_FLAGS_SCS
-endif

@@ -1034,6 +1028,20 @@ include-$(CONFIG_GCC_PLUGINS)	+= scripts/Makefile.gcc-plugins
  include $(addprefix $(srctree)/, $(include-y))
+ifdef CONFIG_SHADOW_CALL_STACK
+
+ifdef CONFIG_CC_IS_CLANG
+CC_FLAGS_SCS	:= -fsanitize=shadow-call-stack
+endif
+
+ifdef CONFIG_CC_IS_GCC
+CC_FLAGS_SCS	:= $(ENABLE_SHADOW_CALL_STACK_PLUGIN)
+endif
+
+KBUILD_CFLAGS	+= $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif

diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
@@ -46,6 +46,13 @@ ifdef CONFIG_GCC_PLUGIN_ARM_SSP_PER_TASK
+gcc-plugin-$(CONFIG_GCC_PLUGIN_SHADOW_CALL_STACK) += arm64_scs_plugin.so
+gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_SHADOW_CALL_STACK)	\
+		+= -DSHADOW_CALL_STACK_PLUGIN
+ifdef CONFIG_GCC_PLUGIN_SHADOW_CALL_STACK
+    ENABLE_SHADOW_CALL_STACK_PLUGIN += -fplugin-arg-arm64_scs_plugin-enable
+endif

2) Whether SCS is turned on or not is determined by CONFIG_SHADOW_CALL_STACK
    * GCC_PLUGIN_SHADOW_CALL_STACK is only used to indicate whether current platform needs the support of the gcc SCS plugin
      - It only enabled on ARM64 platform with gcc which does not support SCS(!CC_HAVE_SHADOW_CALL_STACK)
      - If one compiler supports SCS (clang or gcc), then CC_HAVE_SHADOW_CALL_STACK should be true at this time, and the plugin is automatically closed
    * As long as the current platform can support SCS(compiler or plugin), ARCH_SUPPORTS_SHADOW_CALL_STACK is always selected
    * CONFIG_SHADOW_CALL_STACK no longer depends on CC_IS_CLANG
---
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
@@ -19,6 +19,15 @@ menuconfig GCC_PLUGINS
+config GCC_PLUGIN_SHADOW_CALL_STACK
+	bool "GCC Shadow Call Stack plugin"
+	depends on (!CC_HAVE_SHADOW_CALL_STACK) && ARM64
+	default y
+	help	....

diff --git a/arch/Kconfig b/arch/Kconfig
@@ -594,7 +594,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
  
  config SHADOW_CALL_STACK
  	bool "Clang Shadow Call Stack"
-	depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
+	depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
  	depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
  	help
  	  This option enables Clang's Shadow Call Stack, which uses a
	
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
@@ -81,7 +81,7 @@ config ARM64
-	select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
+	select ARCH_SUPPORTS_SHADOW_CALL_STACK if (CC_HAVE_SHADOW_CALL_STACK || GCC_PLUGIN_SHADOW_CALL_STACK)
  	select ARCH_SUPPORTS_LTO_CLANG if CPU_LITTLE_ENDIAN
@@ -1060,9 +1060,13 @@ config HW_PERF_EVENTS
  # Supported by clang >= 7.0
  config CC_HAVE_SHADOW_CALL_STACK
-	def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+	def_bool (CC_IS_CLANG && $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18))


On 9/20/21 5:45 AM, Nathan Chancellor wrote:
> Hi Dan,
>> diff --git a/Makefile b/Makefile
>>   ifdef CONFIG_SHADOW_CALL_STACK
> 
> I would rather see this become
> 
> ifeq ($(CONFIG_SHADOW_CALL_STACK)$(CONFIG_CC_IS_CLANG), yy)
> ...
> endif
> 
> rather than just avoiding assigning to CC_FLAGS_SCS.
> 
> However, how does disabling the shadow call stack plugin work for a
> whole translation unit or directory? There are a few places where
> CC_FLAGS_SCS are filtered out and I am not sure I see where that happens
> here? It looks like the plugin has a disabled option but I do not see it
> hooked in anywhere.
   In the new code, translation unit can only enable SCS when CC_FLAGS_SCS is specified.
   This behavior will be consistent with clang.
   If there are other problems in the future, those two can be modified together.
> 
>> -CC_FLAGS_SCS	:= -fsanitize=shadow-call-stack
>> +CC_FLAGS_SCS	:= $(if $(CONFIG_CC_IS_CLANG),-fsanitize=shadow-call-stack,)

>>   KBUILD_CFLAGS	+= $(CC_FLAGS_SCS)
>>   export CC_FLAGS_SCS
>>   endif
>> diff --git a/arch/Kconfig b/arch/Kconfig
>> index 98db634..81ff127 100644
>> --- a/arch/Kconfig
>> +++ b/arch/Kconfig
>> @@ -594,7 +594,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
>>   
>>   config SHADOW_CALL_STACK
>>   	bool "Clang Shadow Call Stack"
>> -	depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
>> +	depends on (CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK) || GCC_PLUGIN_SHADOW_CALL_STACK
> 
> Is this logic right? SHADOW_CALL_STACK is only supported by arm64 (as
> they set ARCH_SUPPORTS_SHADOW_CALL_STACK) but now you are enabling it
> for any architecture, even though it seems like it still only works on
> arm64. I think this wants to be
> 
> depends on (CC_IS_CLANG || GCC_PLUGIN_SHADOW_CALL_STACK) && ARCH_SUPPORTS_SHADOW_CALL_STACK
> 
   It's modified to rely only on ARCH_SUPPORTS_SHADOW_CALL_STACK	
>> --- a/scripts/gcc-plugins/Kconfig
>> +++ b/scripts/gcc-plugins/Kconfig
>> @@ -19,6 +19,14 @@ menuconfig GCC_PLUGINS
>>   
>>   if GCC_PLUGINS
>>   
>> +config GCC_PLUGIN_SHADOW_CALL_STACK
>> +	bool "GCC Shadow Call Stack plugin"
> 
> This should also have a
> 
> depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> 
> if you are selecting SHADOW_CALL_STACK, as selecting does not account
> for dependencies.
   Select is removed from the code above
>> +	select SHADOW_CALL_STACK
>> +	help
>> +	  This plugin is used to support the kernel CONFIG_SHADOW_CALL_STACK
>> +	  compiled by gcc. Its principle is basically the same as that of CLANG.
>> +	  For more information, please refer to "config SHADOW_CALL_STACK"
>> +
>>   config GCC_PLUGIN_CYC_COMPLEXITY
>>   	bool "Compute the cyclomatic complexity of a function" if EXPERT
>>   	depends on !COMPILE_TEST	# too noisy
> 
> Cheers,
> Nathan
>
Dan Li Sept. 20, 2021, 6:53 p.m. UTC | #4
Hi Ard,

Thanks for your comment.

I pasted a copy of the config code in my last email, could you please check it again?

On 9/20/21 3:18 PM, Ard Biesheuvel wrote:
> Hi Dan,
> 
> On Sun, 19 Sept 2021 at 18:37, Dan Li <ashimida@linux.alibaba.com> wrote:
>>
>> The Clang-based shadow call stack protection has been integrated into the
>> mainline, but kernel compiled by gcc cannot enable this feature for now.
>>
>> This Patch supports gcc-based SCS protection by adding a plugin.
>>
> 
> Thanks for working on this. I had a stab at this myself about 2 years
> ago and couldn't make it work.
> 
>> For each function that x30 will be pushed onto the stack during execution,
>> this plugin:
>> 1) insert "str x30, [x18], #8" at the entry of the function to save x30
>>     to current SCS
>> 2) insert "ldr x30, [x18, #-8]!"  before the exit of this function to
>>     restore x30
>>
> 
> This logic seems sound to me, but it would be nice if someone more
> familiar with Clang's implementation could confirm that it is really
> this simple.
> 
> Looking at your plugin, there is an issue with tail calls, and I don't
> think Clang simply disables those altogether as well, right?

I am not familiar with clang's code, the logic comes from clang's description and the
disassembled binary code for now, so it may be different from the actual situation.

The tail call could be handled (theoretically), and I will try to solve the issue in
the next version.
> 
>>   ifdef CONFIG_SHADOW_CALL_STACK
>> -CC_FLAGS_SCS   := -fsanitize=shadow-call-stack
>> +CC_FLAGS_SCS   := $(if $(CONFIG_CC_IS_CLANG),-fsanitize=shadow-call-stack,)
> 
> This variable should contain whatever needs to be added to the
> compiler comamand line
   In the new code, an 'enable' option is added here to enable the plugin
>>   KBUILD_CFLAGS  += $(CC_FLAGS_SCS)
>>   export CC_FLAGS_SCS
>>   endif
>> diff --git a/arch/Kconfig b/arch/Kconfig
>> index 98db634..81ff127 100644
>> --- a/arch/Kconfig
>> +++ b/arch/Kconfig
>> @@ -594,7 +594,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
>>
>>   config SHADOW_CALL_STACK
>>          bool "Clang Shadow Call Stack"
>> -       depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
>> +       depends on (CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK) || GCC_PLUGIN_SHADOW_CALL_STACK
> 
> This logic needs to be defined in such a way that a builtin
> implementation provided by GCC will take precedence once it becomes
> available.
> 
   In new code, if gcc supports SCS in the future, the plugin will be closed due to
   CC_HAVE_SHADOW_CALL_STACK is true.
>>          depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
>>          help
>>            This option enables Clang's Shadow Call Stack, which uses a
>> diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
>> index ab9eb4c..2534195e 100644
>> --- a/scripts/gcc-plugins/Kconfig
>> +++ b/scripts/gcc-plugins/Kconfig
>> @@ -19,6 +19,14 @@ menuconfig GCC_PLUGINS
>>
>>   if GCC_PLUGINS
>>
>> +config GCC_PLUGIN_SHADOW_CALL_STACK
>> +       bool "GCC Shadow Call Stack plugin"
>> +       select SHADOW_CALL_STACK
> 
> You shouldn't 'select' something like this if the symbol has its own
> dependencies which may be unsatisfied, as this causes a Kconfig
> warning. Also, en/disabling shadow call stacks for the architecture
> should be done from the arch's 'kernel features' menu, it shouldn't be
> buried in the GCC plugins menu.
    I removed 'select' in the new version.
    SCS's enable is changed to rely on CONFIG_SHADOW_CALL_STACK in arch/kernel,
    the GCC_PLUGIN_SHADOW_CALL_STACK config is just to add a usable platform to it.
>> +       help
>> +         This plugin is used to support the kernel CONFIG_SHADOW_CALL_STACK
>> +         compiled by gcc. Its principle is basically the same as that of CLANG.
>> +         For more information, please refer to "config SHADOW_CALL_STACK"
>> +
>> +__visible int plugin_is_GPL_compatible;
>> +
>> +static struct plugin_info arm64_scs_plugin_info = {
>> +       .version        = "20210926vanilla",
> 
> I will respond to this obvious invitation at bikeshedding by saying
> that 'salted caramel' is clearly the superior flavor of ice cream.
   I'm sorry, as a non-native English speaker, I think I might not understand
   what you mean here. My intention is to say that this is the first/initial
   version, do I miss something?
>> +       .help           = "disable\tdo not activate plugin\n"
>> +                         "verbose\tprint all debug infos\n",
>> +};
>> +static unsigned int arm64_scs_execute(void)
>> +{
>> +       rtx_insn *insn;
>> +       enum scs_state state = SCS_SEARCHING_FIRST_INSN;
>> +
>> +       for (insn = get_insns(); insn; insn = NEXT_INSN(insn)) {
>> +               rtx mark = NULL;
>> +
>> +               switch (GET_CODE(insn)) {
>> +               case NOTE:
>> +               case BARRIER:
>> +               case CODE_LABEL:
>> +               case INSN:
>> +               case DEBUG_INSN:
>> +               case JUMP_INSN:
>> +               case JUMP_TABLE_DATA:
>> +                       break;
>> +               case CALL_INSN:
>> +                       if (SIBLING_CALL_P(insn)) {
>> +                               error(G_("Sibling call found in func:%s, file:%s\n"),
>> +                                               get_name(current_function_decl),
>> +                                               main_input_filename);
>> +                               gcc_unreachable();
>> +                       }
> 
> Sibling calls are an important optimization, not only for performance
> but also for stack utilization, so this needs to be fixed. Can you
> elaborate on the issue you are working around here?
>
   Since the ARM64 has disabled sibling calls (-fno-optimize-sibling-calls) by default,
   there is almost no sibling call appear in the kernel I encountered.
   So I did not provide support for it, and I will fix this issue in the next version.
>> +                       break;
>> +               default:
>> +                       error(G_("Invalid rtx_insn seqs found with type:%s in func:%s, file:%s\n"),
>> +                                       GET_RTX_NAME(GET_CODE(insn)),
>> +                                       get_name(current_function_decl), main_input_filename);
>> +                       gcc_unreachable();
>> +                       break;
>> +               }
>> +               /* A function return insn was found */
>> +               if (ANY_RETURN_P(PATTERN(insn))) {
>> +                       /* There should be an epilogue before 'RETURN' inst */
>> +                       if (GET_CODE(PATTERN(insn)) == RETURN) {
>> +                               gcc_assert(state == SCS_FOUND_ONE_EPILOGUE_NOTE);
>> +                               state = SCS_SEARCHING_FUNC_RETURN;
>> +                       }
>> +
>> +                       /* There is no epilogue before 'SIMPLE_RETURN' insn */
>> +                       if (GET_CODE(PATTERN(insn)) == SIMPLE_RETURN)
>> +                               gcc_assert(state == SCS_SEARCHING_FUNC_RETURN);
> 
> These assert()s will crash the compiler if the RTL doesn't have quite
> the right structure, correct? Could we issue a warning instead, saying
> function 'x' could not be handled, and back out gracefully (i.e.,
> don't insert the push either)?
> 
    Sure, I think I need to dynamically mark all instrumented positions here,
    and then confirm that the instruction sequence is correct before inserting in batches.
>> +
>> +                       /* Insert scs pop instruction(s) before return insn */
>> +                       mark = gen_scs_pop(RESERVED_LOCATION_COUNT);
>> +                       emit_insn_before(mark, insn);
>> +               }
>> +       }
>> +       return 0;
>> +}
>> +
>> +static tree handle_noscs_attribute(tree *node, tree name, tree args __unused, int flags,
>> +               bool *no_add_attrs)
>> +{
>> +       *no_add_attrs = true;
>> +
>> +       gcc_assert(DECL_P(*node));
>> +       switch (TREE_CODE(*node)) {
>> +       default:
>> +               error(G_("%qE attribute can be applies to function decl only (%qE)"), name, *node);
>> +               gcc_unreachable();
>> +
>> +       case FUNCTION_DECL:     /* the attribute is only used for function declarations */
>> +               break;
>> +       }
>> +
>> +       *no_add_attrs = false;
> 
> I'm not familiar with this idiom: what is the purpose of setting this
> to true initially and then to false again when the expected flow
> through the function is to do nothing at all?
> 
    This is my mistake, at the beginning default case only return 0 directly after a warning;
    At that time, if *no_add_attrs is true, the corresponding attribute will not be added to 'node',
    and it means __noscs attribute can only be added for FUNCTION_DECL.
    For now, *no_add_attrs = true; is useless, it should be deleted.

    But if, as you said, try to back out gracefully, is it better to report warning in the default case?
>> +       return NULL_TREE;
>> +}
>> +
>> +static void (*old_override_options_after_change)(void);
>> +
>> +static void scs_override_options_after_change(void)
>> +{
>> +       if (old_override_options_after_change)
>> +               old_override_options_after_change();
>> +
>> +       flag_optimize_sibling_calls = 0;
>> +}
>> +
>> +static void callback_before_start_unit(void *gcc_data __unused, void *user_data __unused)
>> +{
>> +       /* Turn off sibling call to avoid inserting duplicate scs pop codes */
> 
> Sibling calls will restore x30 before the calk, right? So where do the
> duplicate pops come from?
    a sibling call could be like:
    stp     x29, x30, [sp, #-xx]!
    .......
    ldp     x29, x30, [sp], #xx
    ---> p1
    b       callee
    ldp     x29, x30, [sp], #xx
    ---> p2
    ret
    
    What i mean here is if we need to insert, the scs pop code should be insert in both p1/p2,
> 
>> +       old_override_options_after_change = targetm.override_options_after_change;
>> +       targetm.override_options_after_change = scs_override_options_after_change;
>> +
>> +       flag_optimize_sibling_calls = 0;
> 
> Do we need this twice?
   I think so, there are functions similar to push/pop in gcc (cl_optimization_restore/save)
   * callback_before_start_unit is used to set zero during initialization
   * scs_override_options_after_change is used to reset to 0 after a 'push' occurs
>> +}
>> +
>> +#define PASS_NAME arm64_scs
>> +#define TODO_FLAGS_FINISH (TODO_dump_func | TODO_verify_rtl_sharing)
>> +#include "gcc-generate-rtl-pass.h"
>> +
>> +__visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version)
>> +{
>> +       int i;
>> +       const char * const plugin_name = plugin_info->base_name;
>> +       const int argc = plugin_info->argc;
>> +       const struct plugin_argument * const argv = plugin_info->argv;
>> +       bool enable = true;
>> +
>> +       PASS_INFO(arm64_scs, "shorten", 1, PASS_POS_INSERT_BEFORE);
>> +
>> +       if (!plugin_default_version_check(version, &gcc_version)) {
>> +               error(G_("Incompatible gcc/plugin versions"));
>> +               return 1;
>> +       }
>> +
>> +       if (strncmp(lang_hooks.name, "GNU C", 5) && !strncmp(lang_hooks.name, "GNU C+", 6)) {
>> +               inform(UNKNOWN_LOCATION, G_("%s supports C only, not %s"), plugin_name,
>> +                               lang_hooks.name);
>> +               enable = false;
>> +       }
>> +
> 
> Do we need this check?
   This code is copied from structleak_plugin.c, I misunderstood the meaning here, and I will delete it later
> 
>> +       for (i = 0; i < argc; ++i) {
>> +               if (!strcmp(argv[i].key, "disable")) {
>> +                       enable = false;
>> +                       continue;
>> +               }
>> +               if (!strcmp(argv[i].key, "verbose")) {
>> +                       verbose = true;
>> +                       continue;
>> +               }
>> +               error(G_("unknown option '-fplugin-arg-%s-%s'"), plugin_name, argv[i].key);
>> +       }
>> +
>> +       register_callback(plugin_name, PLUGIN_INFO, NULL, &arm64_scs_plugin_info);
>> +
>> +       register_callback(plugin_name, PLUGIN_ATTRIBUTES, scs_register_attributes, NULL);
>> +
>> +       if (!enable) {
>> +               v_info("Plugin disabled for file:%s\n", main_input_filename);
>> +               return 0;
>> +       }
>> +
>> +       register_callback(plugin_name, PLUGIN_START_UNIT, callback_before_start_unit, NULL);
>> +
>> +       register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &arm64_scs_pass_info);
>> +
>> +       return 0;
>> +}
>> --
>> 2.7.4
>>
Ard Biesheuvel Sept. 20, 2021, 9:22 p.m. UTC | #5
On Mon, 20 Sept 2021 at 20:53, Dan Li <ashimida@linux.alibaba.com> wrote:
>
> Hi Ard,
>
> Thanks for your comment.
>
> I pasted a copy of the config code in my last email, could you please check it again?
>
> On 9/20/21 3:18 PM, Ard Biesheuvel wrote:
> > Hi Dan,
> >
> > On Sun, 19 Sept 2021 at 18:37, Dan Li <ashimida@linux.alibaba.com> wrote:
> >>
> >> The Clang-based shadow call stack protection has been integrated into the
> >> mainline, but kernel compiled by gcc cannot enable this feature for now.
> >>
> >> This Patch supports gcc-based SCS protection by adding a plugin.
> >>
> >
> > Thanks for working on this. I had a stab at this myself about 2 years
> > ago and couldn't make it work.
> >
> >> For each function that x30 will be pushed onto the stack during execution,
> >> this plugin:
> >> 1) insert "str x30, [x18], #8" at the entry of the function to save x30
> >>     to current SCS
> >> 2) insert "ldr x30, [x18, #-8]!"  before the exit of this function to
> >>     restore x30
> >>
> >
> > This logic seems sound to me, but it would be nice if someone more
> > familiar with Clang's implementation could confirm that it is really
> > this simple.
> >
> > Looking at your plugin, there is an issue with tail calls, and I don't
> > think Clang simply disables those altogether as well, right?
>
> I am not familiar with clang's code, the logic comes from clang's description and the
> disassembled binary code for now, so it may be different from the actual situation.
>

OK

> The tail call could be handled (theoretically), and I will try to solve the issue in
> the next version.
> >
> >>   ifdef CONFIG_SHADOW_CALL_STACK
> >> -CC_FLAGS_SCS   := -fsanitize=shadow-call-stack
> >> +CC_FLAGS_SCS   := $(if $(CONFIG_CC_IS_CLANG),-fsanitize=shadow-call-stack,)
> >
> > This variable should contain whatever needs to be added to the
> > compiler comamand line
>    In the new code, an 'enable' option is added here to enable the plugin
> >>   KBUILD_CFLAGS  += $(CC_FLAGS_SCS)
> >>   export CC_FLAGS_SCS
> >>   endif
> >> diff --git a/arch/Kconfig b/arch/Kconfig
> >> index 98db634..81ff127 100644
> >> --- a/arch/Kconfig
> >> +++ b/arch/Kconfig
> >> @@ -594,7 +594,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
> >>
> >>   config SHADOW_CALL_STACK
> >>          bool "Clang Shadow Call Stack"
> >> -       depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
> >> +       depends on (CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK) || GCC_PLUGIN_SHADOW_CALL_STACK
> >
> > This logic needs to be defined in such a way that a builtin
> > implementation provided by GCC will take precedence once it becomes
> > available.
> >
>    In new code, if gcc supports SCS in the future, the plugin will be closed due to
>    CC_HAVE_SHADOW_CALL_STACK is true.
> >>          depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
> >>          help
> >>            This option enables Clang's Shadow Call Stack, which uses a
> >> diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
> >> index ab9eb4c..2534195e 100644
> >> --- a/scripts/gcc-plugins/Kconfig
> >> +++ b/scripts/gcc-plugins/Kconfig
> >> @@ -19,6 +19,14 @@ menuconfig GCC_PLUGINS
> >>
> >>   if GCC_PLUGINS
> >>
> >> +config GCC_PLUGIN_SHADOW_CALL_STACK
> >> +       bool "GCC Shadow Call Stack plugin"
> >> +       select SHADOW_CALL_STACK
> >
> > You shouldn't 'select' something like this if the symbol has its own
> > dependencies which may be unsatisfied, as this causes a Kconfig
> > warning. Also, en/disabling shadow call stacks for the architecture
> > should be done from the arch's 'kernel features' menu, it shouldn't be
> > buried in the GCC plugins menu.
>     I removed 'select' in the new version.
>     SCS's enable is changed to rely on CONFIG_SHADOW_CALL_STACK in arch/kernel,
>     the GCC_PLUGIN_SHADOW_CALL_STACK config is just to add a usable platform to it.
> >> +       help
> >> +         This plugin is used to support the kernel CONFIG_SHADOW_CALL_STACK
> >> +         compiled by gcc. Its principle is basically the same as that of CLANG.
> >> +         For more information, please refer to "config SHADOW_CALL_STACK"
> >> +
> >> +__visible int plugin_is_GPL_compatible;
> >> +
> >> +static struct plugin_info arm64_scs_plugin_info = {
> >> +       .version        = "20210926vanilla",
> >
> > I will respond to this obvious invitation at bikeshedding by saying
> > that 'salted caramel' is clearly the superior flavor of ice cream.
>    I'm sorry, as a non-native English speaker, I think I might not understand
>    what you mean here. My intention is to say that this is the first/initial
>    version, do I miss something?

It was a joke - don't worry about it.

> >> +       .help           = "disable\tdo not activate plugin\n"
> >> +                         "verbose\tprint all debug infos\n",
> >> +};
> >> +static unsigned int arm64_scs_execute(void)
> >> +{
> >> +       rtx_insn *insn;
> >> +       enum scs_state state = SCS_SEARCHING_FIRST_INSN;
> >> +
> >> +       for (insn = get_insns(); insn; insn = NEXT_INSN(insn)) {
> >> +               rtx mark = NULL;
> >> +
> >> +               switch (GET_CODE(insn)) {
> >> +               case NOTE:
> >> +               case BARRIER:
> >> +               case CODE_LABEL:
> >> +               case INSN:
> >> +               case DEBUG_INSN:
> >> +               case JUMP_INSN:
> >> +               case JUMP_TABLE_DATA:
> >> +                       break;
> >> +               case CALL_INSN:
> >> +                       if (SIBLING_CALL_P(insn)) {
> >> +                               error(G_("Sibling call found in func:%s, file:%s\n"),
> >> +                                               get_name(current_function_decl),
> >> +                                               main_input_filename);
> >> +                               gcc_unreachable();
> >> +                       }
> >
> > Sibling calls are an important optimization, not only for performance
> > but also for stack utilization, so this needs to be fixed. Can you
> > elaborate on the issue you are working around here?
> >
>    Since the ARM64 has disabled sibling calls (-fno-optimize-sibling-calls) by default,
>    there is almost no sibling call appear in the kernel I encountered.

What do you mean this is disabled by default? Is that a compiler
setting or a Linux setting?




>    So I did not provide support for it, and I will fix this issue in the next version.
> >> +                       break;
> >> +               default:
> >> +                       error(G_("Invalid rtx_insn seqs found with type:%s in func:%s, file:%s\n"),
> >> +                                       GET_RTX_NAME(GET_CODE(insn)),
> >> +                                       get_name(current_function_decl), main_input_filename);
> >> +                       gcc_unreachable();
> >> +                       break;
> >> +               }
> >> +               /* A function return insn was found */
> >> +               if (ANY_RETURN_P(PATTERN(insn))) {
> >> +                       /* There should be an epilogue before 'RETURN' inst */
> >> +                       if (GET_CODE(PATTERN(insn)) == RETURN) {
> >> +                               gcc_assert(state == SCS_FOUND_ONE_EPILOGUE_NOTE);
> >> +                               state = SCS_SEARCHING_FUNC_RETURN;
> >> +                       }
> >> +
> >> +                       /* There is no epilogue before 'SIMPLE_RETURN' insn */
> >> +                       if (GET_CODE(PATTERN(insn)) == SIMPLE_RETURN)
> >> +                               gcc_assert(state == SCS_SEARCHING_FUNC_RETURN);
> >
> > These assert()s will crash the compiler if the RTL doesn't have quite
> > the right structure, correct? Could we issue a warning instead, saying
> > function 'x' could not be handled, and back out gracefully (i.e.,
> > don't insert the push either)?
> >
>     Sure, I think I need to dynamically mark all instrumented positions here,
>     and then confirm that the instruction sequence is correct before inserting in batches.

Yes, that sounds more suitable.

> >> +
> >> +                       /* Insert scs pop instruction(s) before return insn */
> >> +                       mark = gen_scs_pop(RESERVED_LOCATION_COUNT);
> >> +                       emit_insn_before(mark, insn);
> >> +               }
> >> +       }
> >> +       return 0;
> >> +}
> >> +
> >> +static tree handle_noscs_attribute(tree *node, tree name, tree args __unused, int flags,
> >> +               bool *no_add_attrs)
> >> +{
> >> +       *no_add_attrs = true;
> >> +
> >> +       gcc_assert(DECL_P(*node));
> >> +       switch (TREE_CODE(*node)) {
> >> +       default:
> >> +               error(G_("%qE attribute can be applies to function decl only (%qE)"), name, *node);
> >> +               gcc_unreachable();
> >> +
> >> +       case FUNCTION_DECL:     /* the attribute is only used for function declarations */
> >> +               break;
> >> +       }
> >> +
> >> +       *no_add_attrs = false;
> >
> > I'm not familiar with this idiom: what is the purpose of setting this
> > to true initially and then to false again when the expected flow
> > through the function is to do nothing at all?
> >
>     This is my mistake, at the beginning default case only return 0 directly after a warning;
>     At that time, if *no_add_attrs is true, the corresponding attribute will not be added to 'node',
>     and it means __noscs attribute can only be added for FUNCTION_DECL.
>     For now, *no_add_attrs = true; is useless, it should be deleted.
>
>     But if, as you said, try to back out gracefully, is it better to report warning in the default case?

error() just terminates the compile with an error, right? I think that is fine.


> >> +       return NULL_TREE;
> >> +}
> >> +
> >> +static void (*old_override_options_after_change)(void);
> >> +
> >> +static void scs_override_options_after_change(void)
> >> +{
> >> +       if (old_override_options_after_change)
> >> +               old_override_options_after_change();
> >> +
> >> +       flag_optimize_sibling_calls = 0;
> >> +}
> >> +
> >> +static void callback_before_start_unit(void *gcc_data __unused, void *user_data __unused)
> >> +{
> >> +       /* Turn off sibling call to avoid inserting duplicate scs pop codes */
> >
> > Sibling calls will restore x30 before the calk, right? So where do the
> > duplicate pops come from?
>     a sibling call could be like:
>     stp     x29, x30, [sp, #-xx]!
>     .......
>     ldp     x29, x30, [sp], #xx
>     ---> p1
>     b       callee
>     ldp     x29, x30, [sp], #xx
>     ---> p2
>     ret
>
>     What i mean here is if we need to insert, the scs pop code should be insert in both p1/p2,

Yes, so you have to identify the 'b' insn as a function return so it
is treated the same.

> >
> >> +       old_override_options_after_change = targetm.override_options_after_change;
> >> +       targetm.override_options_after_change = scs_override_options_after_change;
> >> +
> >> +       flag_optimize_sibling_calls = 0;
> >
> > Do we need this twice?
>    I think so, there are functions similar to push/pop in gcc (cl_optimization_restore/save)
>    * callback_before_start_unit is used to set zero during initialization
>    * scs_override_options_after_change is used to reset to 0 after a 'push' occurs

OK

> >> +}
> >> +
> >> +#define PASS_NAME arm64_scs
> >> +#define TODO_FLAGS_FINISH (TODO_dump_func | TODO_verify_rtl_sharing)
> >> +#include "gcc-generate-rtl-pass.h"
> >> +
> >> +__visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version)
> >> +{
> >> +       int i;
> >> +       const char * const plugin_name = plugin_info->base_name;
> >> +       const int argc = plugin_info->argc;
> >> +       const struct plugin_argument * const argv = plugin_info->argv;
> >> +       bool enable = true;
> >> +
> >> +       PASS_INFO(arm64_scs, "shorten", 1, PASS_POS_INSERT_BEFORE);
> >> +
> >> +       if (!plugin_default_version_check(version, &gcc_version)) {
> >> +               error(G_("Incompatible gcc/plugin versions"));
> >> +               return 1;
> >> +       }
> >> +
> >> +       if (strncmp(lang_hooks.name, "GNU C", 5) && !strncmp(lang_hooks.name, "GNU C+", 6)) {
> >> +               inform(UNKNOWN_LOCATION, G_("%s supports C only, not %s"), plugin_name,
> >> +                               lang_hooks.name);
> >> +               enable = false;
> >> +       }
> >> +
> >
> > Do we need this check?
>    This code is copied from structleak_plugin.c, I misunderstood the meaning here, and I will delete it later

OK. Kees should correct me if I'm wrong, but we use GCC in the kernel
only to compile C files, so this check should be redundant.


> >
> >> +       for (i = 0; i < argc; ++i) {
> >> +               if (!strcmp(argv[i].key, "disable")) {
> >> +                       enable = false;
> >> +                       continue;
> >> +               }
> >> +               if (!strcmp(argv[i].key, "verbose")) {
> >> +                       verbose = true;
> >> +                       continue;
> >> +               }
> >> +               error(G_("unknown option '-fplugin-arg-%s-%s'"), plugin_name, argv[i].key);
> >> +       }
> >> +
> >> +       register_callback(plugin_name, PLUGIN_INFO, NULL, &arm64_scs_plugin_info);
> >> +
> >> +       register_callback(plugin_name, PLUGIN_ATTRIBUTES, scs_register_attributes, NULL);
> >> +
> >> +       if (!enable) {
> >> +               v_info("Plugin disabled for file:%s\n", main_input_filename);
> >> +               return 0;
> >> +       }
> >> +
> >> +       register_callback(plugin_name, PLUGIN_START_UNIT, callback_before_start_unit, NULL);
> >> +
> >> +       register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &arm64_scs_pass_info);
> >> +
> >> +       return 0;
> >> +}
> >> --
> >> 2.7.4
> >>
Dan Li Sept. 21, 2021, 6 a.m. UTC | #6
On 9/21/21 5:22 AM, Ard Biesheuvel wrote:
> On Mon, 20 Sept 2021 at 20:53, Dan Li <ashimida@linux.alibaba.com> wrote:
>>
>> Hi Ard,
>>
>> Thanks for your comment.
>>
>> I pasted a copy of the config code in my last email, could you please check it again?
>>
>> On 9/20/21 3:18 PM, Ard Biesheuvel wrote:
>>> Hi Dan,
>>>
>>> On Sun, 19 Sept 2021 at 18:37, Dan Li <ashimida@linux.alibaba.com> wrote:
>>>>
>>>> The Clang-based shadow call stack protection has been integrated into the
>>>> mainline, but kernel compiled by gcc cannot enable this feature for now.
>>>>
>>>> This Patch supports gcc-based SCS protection by adding a plugin.
>>>>
>>>
>>> Thanks for working on this. I had a stab at this myself about 2 years
>>> ago and couldn't make it work.
>>>
>>>> For each function that x30 will be pushed onto the stack during execution,
>>>> this plugin:
>>>> 1) insert "str x30, [x18], #8" at the entry of the function to save x30
>>>>      to current SCS
>>>> 2) insert "ldr x30, [x18, #-8]!"  before the exit of this function to
>>>>      restore x30
>>>>
>>>
>>> This logic seems sound to me, but it would be nice if someone more
>>> familiar with Clang's implementation could confirm that it is really
>>> this simple.
>>>
>>> Looking at your plugin, there is an issue with tail calls, and I don't
>>> think Clang simply disables those altogether as well, right?
>>
>> I am not familiar with clang's code, the logic comes from clang's description and the
>> disassembled binary code for now, so it may be different from the actual situation.
>>
> 
> OK
> 
>> The tail call could be handled (theoretically), and I will try to solve the issue in
>> the next version.
>>>
>>>>    ifdef CONFIG_SHADOW_CALL_STACK
>>>> -CC_FLAGS_SCS   := -fsanitize=shadow-call-stack
>>>> +CC_FLAGS_SCS   := $(if $(CONFIG_CC_IS_CLANG),-fsanitize=shadow-call-stack,)
>>>
>>> This variable should contain whatever needs to be added to the
>>> compiler comamand line
>>     In the new code, an 'enable' option is added here to enable the plugin
>>>>    KBUILD_CFLAGS  += $(CC_FLAGS_SCS)
>>>>    export CC_FLAGS_SCS
>>>>    endif
>>>> diff --git a/arch/Kconfig b/arch/Kconfig
>>>> index 98db634..81ff127 100644
>>>> --- a/arch/Kconfig
>>>> +++ b/arch/Kconfig
>>>> @@ -594,7 +594,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
>>>>
>>>>    config SHADOW_CALL_STACK
>>>>           bool "Clang Shadow Call Stack"
>>>> -       depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
>>>> +       depends on (CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK) || GCC_PLUGIN_SHADOW_CALL_STACK
>>>
>>> This logic needs to be defined in such a way that a builtin
>>> implementation provided by GCC will take precedence once it becomes
>>> available.
>>>
>>     In new code, if gcc supports SCS in the future, the plugin will be closed due to
>>     CC_HAVE_SHADOW_CALL_STACK is true.
>>>>           depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
>>>>           help
>>>>             This option enables Clang's Shadow Call Stack, which uses a
>>>> diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
>>>> index ab9eb4c..2534195e 100644
>>>> --- a/scripts/gcc-plugins/Kconfig
>>>> +++ b/scripts/gcc-plugins/Kconfig
>>>> @@ -19,6 +19,14 @@ menuconfig GCC_PLUGINS
>>>>
>>>>    if GCC_PLUGINS
>>>>
>>>> +config GCC_PLUGIN_SHADOW_CALL_STACK
>>>> +       bool "GCC Shadow Call Stack plugin"
>>>> +       select SHADOW_CALL_STACK
>>>
>>> You shouldn't 'select' something like this if the symbol has its own
>>> dependencies which may be unsatisfied, as this causes a Kconfig
>>> warning. Also, en/disabling shadow call stacks for the architecture
>>> should be done from the arch's 'kernel features' menu, it shouldn't be
>>> buried in the GCC plugins menu.
>>      I removed 'select' in the new version.
>>      SCS's enable is changed to rely on CONFIG_SHADOW_CALL_STACK in arch/kernel,
>>      the GCC_PLUGIN_SHADOW_CALL_STACK config is just to add a usable platform to it.
>>>> +       help
>>>> +         This plugin is used to support the kernel CONFIG_SHADOW_CALL_STACK
>>>> +         compiled by gcc. Its principle is basically the same as that of CLANG.
>>>> +         For more information, please refer to "config SHADOW_CALL_STACK"
>>>> +
>>>> +__visible int plugin_is_GPL_compatible;
>>>> +
>>>> +static struct plugin_info arm64_scs_plugin_info = {
>>>> +       .version        = "20210926vanilla",
>>>
>>> I will respond to this obvious invitation at bikeshedding by saying
>>> that 'salted caramel' is clearly the superior flavor of ice cream.
>>     I'm sorry, as a non-native English speaker, I think I might not understand
>>     what you mean here. My intention is to say that this is the first/initial
>>     version, do I miss something?
> 
> It was a joke - don't worry about it.
> 
>>>> +       .help           = "disable\tdo not activate plugin\n"
>>>> +                         "verbose\tprint all debug infos\n",
>>>> +};
>>>> +static unsigned int arm64_scs_execute(void)
>>>> +{
>>>> +       rtx_insn *insn;
>>>> +       enum scs_state state = SCS_SEARCHING_FIRST_INSN;
>>>> +
>>>> +       for (insn = get_insns(); insn; insn = NEXT_INSN(insn)) {
>>>> +               rtx mark = NULL;
>>>> +
>>>> +               switch (GET_CODE(insn)) {
>>>> +               case NOTE:
>>>> +               case BARRIER:
>>>> +               case CODE_LABEL:
>>>> +               case INSN:
>>>> +               case DEBUG_INSN:
>>>> +               case JUMP_INSN:
>>>> +               case JUMP_TABLE_DATA:
>>>> +                       break;
>>>> +               case CALL_INSN:
>>>> +                       if (SIBLING_CALL_P(insn)) {
>>>> +                               error(G_("Sibling call found in func:%s, file:%s\n"),
>>>> +                                               get_name(current_function_decl),
>>>> +                                               main_input_filename);
>>>> +                               gcc_unreachable();
>>>> +                       }
>>>
>>> Sibling calls are an important optimization, not only for performance
>>> but also for stack utilization, so this needs to be fixed. Can you
>>> elaborate on the issue you are working around here?
>>>
>>     Since the ARM64 has disabled sibling calls (-fno-optimize-sibling-calls) by default,
>>     there is almost no sibling call appear in the kernel I encountered.
> 
> What do you mean this is disabled by default? Is that a compiler
> setting or a Linux setting?
It's a linux setting in aarch64 kernel.

In aarch64, since CONFIG_FRAME_POINTER is always selected, -fno-optimize-sibling-calls is
usually enable by default, and I think sibling calls rarely appear (I only encountered
it once in my cases from bsp's code):

./arch/arm64/Kconfig
config ARM64
...
select FRAME_POINTER

./Makefile
ifdef CONFIG_FRAME_POINTER
KBUILD_CFLAGS   += -fno-omit-frame-pointer -fno-optimize-sibling-calls
...


>>     So I did not provide support for it, and I will fix this issue in the next version.
>>>> +                       break;
>>>> +               default:
>>>> +                       error(G_("Invalid rtx_insn seqs found with type:%s in func:%s, file:%s\n"),
>>>> +                                       GET_RTX_NAME(GET_CODE(insn)),
>>>> +                                       get_name(current_function_decl), main_input_filename);
>>>> +                       gcc_unreachable();
>>>> +                       break;
>>>> +               }
>>>> +               /* A function return insn was found */
>>>> +               if (ANY_RETURN_P(PATTERN(insn))) {
>>>> +                       /* There should be an epilogue before 'RETURN' inst */
>>>> +                       if (GET_CODE(PATTERN(insn)) == RETURN) {
>>>> +                               gcc_assert(state == SCS_FOUND_ONE_EPILOGUE_NOTE);
>>>> +                               state = SCS_SEARCHING_FUNC_RETURN;
>>>> +                       }
>>>> +
>>>> +                       /* There is no epilogue before 'SIMPLE_RETURN' insn */
>>>> +                       if (GET_CODE(PATTERN(insn)) == SIMPLE_RETURN)
>>>> +                               gcc_assert(state == SCS_SEARCHING_FUNC_RETURN);
>>>
>>> These assert()s will crash the compiler if the RTL doesn't have quite
>>> the right structure, correct? Could we issue a warning instead, saying
>>> function 'x' could not be handled, and back out gracefully (i.e.,
>>> don't insert the push either)?
>>>
>>      Sure, I think I need to dynamically mark all instrumented positions here,
>>      and then confirm that the instruction sequence is correct before inserting in batches.
> 
> Yes, that sounds more suitable.
> 
>>>> +
>>>> +                       /* Insert scs pop instruction(s) before return insn */
>>>> +                       mark = gen_scs_pop(RESERVED_LOCATION_COUNT);
>>>> +                       emit_insn_before(mark, insn);
>>>> +               }
>>>> +       }
>>>> +       return 0;
>>>> +}
>>>> +
>>>> +static tree handle_noscs_attribute(tree *node, tree name, tree args __unused, int flags,
>>>> +               bool *no_add_attrs)
>>>> +{
>>>> +       *no_add_attrs = true;
>>>> +
>>>> +       gcc_assert(DECL_P(*node));
>>>> +       switch (TREE_CODE(*node)) {
>>>> +       default:
>>>> +               error(G_("%qE attribute can be applies to function decl only (%qE)"), name, *node);
>>>> +               gcc_unreachable();
>>>> +
>>>> +       case FUNCTION_DECL:     /* the attribute is only used for function declarations */
>>>> +               break;
>>>> +       }
>>>> +
>>>> +       *no_add_attrs = false;
>>>
>>> I'm not familiar with this idiom: what is the purpose of setting this
>>> to true initially and then to false again when the expected flow
>>> through the function is to do nothing at all?
>>>
>>      This is my mistake, at the beginning default case only return 0 directly after a warning;
>>      At that time, if *no_add_attrs is true, the corresponding attribute will not be added to 'node',
>>      and it means __noscs attribute can only be added for FUNCTION_DECL.
>>      For now, *no_add_attrs = true; is useless, it should be deleted.
>>
>>      But if, as you said, try to back out gracefully, is it better to report warning in the default case?
> 
> error() just terminates the compile with an error, right? I think that is fine.
> 
   Yes. I got it.

>>>> +       return NULL_TREE;
>>>> +}
>>>> +
>>>> +static void (*old_override_options_after_change)(void);
>>>> +
>>>> +static void scs_override_options_after_change(void)
>>>> +{
>>>> +       if (old_override_options_after_change)
>>>> +               old_override_options_after_change();
>>>> +
>>>> +       flag_optimize_sibling_calls = 0;
>>>> +}
>>>> +
>>>> +static void callback_before_start_unit(void *gcc_data __unused, void *user_data __unused)
>>>> +{
>>>> +       /* Turn off sibling call to avoid inserting duplicate scs pop codes */
>>>
>>> Sibling calls will restore x30 before the calk, right? So where do the
>>> duplicate pops come from?
>>      a sibling call could be like:
>>      stp     x29, x30, [sp, #-xx]!
>>      .......
>>      ldp     x29, x30, [sp], #xx
>>      ---> p1
>>      b       callee
>>      ldp     x29, x30, [sp], #xx
>>      ---> p2
>>      ret
>>
>>      What i mean here is if we need to insert, the scs pop code should be insert in both p1/p2,
> 
> Yes, so you have to identify the 'b' insn as a function return so it
> is treated the same.
>
   Thanks, let me try.
>>>
>>>> +       old_override_options_after_change = targetm.override_options_after_change;
>>>> +       targetm.override_options_after_change = scs_override_options_after_change;
>>>> +
>>>> +       flag_optimize_sibling_calls = 0;
>>>
>>> Do we need this twice?
>>     I think so, there are functions similar to push/pop in gcc (cl_optimization_restore/save)
>>     * callback_before_start_unit is used to set zero during initialization
>>     * scs_override_options_after_change is used to reset to 0 after a 'push' occurs
> 
> OK
> 
>>>> +}
>>>> +
>>>> +#define PASS_NAME arm64_scs
>>>> +#define TODO_FLAGS_FINISH (TODO_dump_func | TODO_verify_rtl_sharing)
>>>> +#include "gcc-generate-rtl-pass.h"
>>>> +
>>>> +__visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version)
>>>> +{
>>>> +       int i;
>>>> +       const char * const plugin_name = plugin_info->base_name;
>>>> +       const int argc = plugin_info->argc;
>>>> +       const struct plugin_argument * const argv = plugin_info->argv;
>>>> +       bool enable = true;
>>>> +
>>>> +       PASS_INFO(arm64_scs, "shorten", 1, PASS_POS_INSERT_BEFORE);
>>>> +
>>>> +       if (!plugin_default_version_check(version, &gcc_version)) {
>>>> +               error(G_("Incompatible gcc/plugin versions"));
>>>> +               return 1;
>>>> +       }
>>>> +
>>>> +       if (strncmp(lang_hooks.name, "GNU C", 5) && !strncmp(lang_hooks.name, "GNU C+", 6)) {
>>>> +               inform(UNKNOWN_LOCATION, G_("%s supports C only, not %s"), plugin_name,
>>>> +                               lang_hooks.name);
>>>> +               enable = false;
>>>> +       }
>>>> +
>>>
>>> Do we need this check?
>>     This code is copied from structleak_plugin.c, I misunderstood the meaning here, and I will delete it later
> 
> OK. Kees should correct me if I'm wrong, but we use GCC in the kernel
> only to compile C files, so this check should be redundant.
> 
> 
>>>
>>>> +       for (i = 0; i < argc; ++i) {
>>>> +               if (!strcmp(argv[i].key, "disable")) {
>>>> +                       enable = false;
>>>> +                       continue;
>>>> +               }
>>>> +               if (!strcmp(argv[i].key, "verbose")) {
>>>> +                       verbose = true;
>>>> +                       continue;
>>>> +               }
>>>> +               error(G_("unknown option '-fplugin-arg-%s-%s'"), plugin_name, argv[i].key);
>>>> +       }
>>>> +
>>>> +       register_callback(plugin_name, PLUGIN_INFO, NULL, &arm64_scs_plugin_info);
>>>> +
>>>> +       register_callback(plugin_name, PLUGIN_ATTRIBUTES, scs_register_attributes, NULL);
>>>> +
>>>> +       if (!enable) {
>>>> +               v_info("Plugin disabled for file:%s\n", main_input_filename);
>>>> +               return 0;
>>>> +       }
>>>> +
>>>> +       register_callback(plugin_name, PLUGIN_START_UNIT, callback_before_start_unit, NULL);
>>>> +
>>>> +       register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &arm64_scs_pass_info);
>>>> +
>>>> +       return 0;
>>>> +}
>>>> --
>>>> 2.7.4
>>>>
Ard Biesheuvel Sept. 22, 2021, 1:48 p.m. UTC | #7
On Tue, 21 Sept 2021 at 08:00, Dan Li <ashimida@linux.alibaba.com> wrote:
>
>
>
> On 9/21/21 5:22 AM, Ard Biesheuvel wrote:
> > On Mon, 20 Sept 2021 at 20:53, Dan Li <ashimida@linux.alibaba.com> wrote:
> >>
> >> Hi Ard,
> >>
> >> Thanks for your comment.
> >>
> >> I pasted a copy of the config code in my last email, could you please check it again?
> >>
> >> On 9/20/21 3:18 PM, Ard Biesheuvel wrote:
> >>> Hi Dan,
> >>>
> >>> On Sun, 19 Sept 2021 at 18:37, Dan Li <ashimida@linux.alibaba.com> wrote:
> >>>>
> >>>> The Clang-based shadow call stack protection has been integrated into the
> >>>> mainline, but kernel compiled by gcc cannot enable this feature for now.
> >>>>
> >>>> This Patch supports gcc-based SCS protection by adding a plugin.
> >>>>
> >>>
> >>> Thanks for working on this. I had a stab at this myself about 2 years
> >>> ago and couldn't make it work.
> >>>
> >>>> For each function that x30 will be pushed onto the stack during execution,
> >>>> this plugin:
> >>>> 1) insert "str x30, [x18], #8" at the entry of the function to save x30
> >>>>      to current SCS
> >>>> 2) insert "ldr x30, [x18, #-8]!"  before the exit of this function to
> >>>>      restore x30
> >>>>
> >>>
> >>> This logic seems sound to me, but it would be nice if someone more
> >>> familiar with Clang's implementation could confirm that it is really
> >>> this simple.
> >>>
> >>> Looking at your plugin, there is an issue with tail calls, and I don't
> >>> think Clang simply disables those altogether as well, right?
> >>
> >> I am not familiar with clang's code, the logic comes from clang's description and the
> >> disassembled binary code for now, so it may be different from the actual situation.
> >>
> >
> > OK
> >
> >> The tail call could be handled (theoretically), and I will try to solve the issue in
> >> the next version.
> >>>
> >>>>    ifdef CONFIG_SHADOW_CALL_STACK
> >>>> -CC_FLAGS_SCS   := -fsanitize=shadow-call-stack
> >>>> +CC_FLAGS_SCS   := $(if $(CONFIG_CC_IS_CLANG),-fsanitize=shadow-call-stack,)
> >>>
> >>> This variable should contain whatever needs to be added to the
> >>> compiler comamand line
> >>     In the new code, an 'enable' option is added here to enable the plugin
> >>>>    KBUILD_CFLAGS  += $(CC_FLAGS_SCS)
> >>>>    export CC_FLAGS_SCS
> >>>>    endif
> >>>> diff --git a/arch/Kconfig b/arch/Kconfig
> >>>> index 98db634..81ff127 100644
> >>>> --- a/arch/Kconfig
> >>>> +++ b/arch/Kconfig
> >>>> @@ -594,7 +594,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
> >>>>
> >>>>    config SHADOW_CALL_STACK
> >>>>           bool "Clang Shadow Call Stack"
> >>>> -       depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
> >>>> +       depends on (CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK) || GCC_PLUGIN_SHADOW_CALL_STACK
> >>>
> >>> This logic needs to be defined in such a way that a builtin
> >>> implementation provided by GCC will take precedence once it becomes
> >>> available.
> >>>
> >>     In new code, if gcc supports SCS in the future, the plugin will be closed due to
> >>     CC_HAVE_SHADOW_CALL_STACK is true.
> >>>>           depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
> >>>>           help
> >>>>             This option enables Clang's Shadow Call Stack, which uses a
> >>>> diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
> >>>> index ab9eb4c..2534195e 100644
> >>>> --- a/scripts/gcc-plugins/Kconfig
> >>>> +++ b/scripts/gcc-plugins/Kconfig
> >>>> @@ -19,6 +19,14 @@ menuconfig GCC_PLUGINS
> >>>>
> >>>>    if GCC_PLUGINS
> >>>>
> >>>> +config GCC_PLUGIN_SHADOW_CALL_STACK
> >>>> +       bool "GCC Shadow Call Stack plugin"
> >>>> +       select SHADOW_CALL_STACK
> >>>
> >>> You shouldn't 'select' something like this if the symbol has its own
> >>> dependencies which may be unsatisfied, as this causes a Kconfig
> >>> warning. Also, en/disabling shadow call stacks for the architecture
> >>> should be done from the arch's 'kernel features' menu, it shouldn't be
> >>> buried in the GCC plugins menu.
> >>      I removed 'select' in the new version.
> >>      SCS's enable is changed to rely on CONFIG_SHADOW_CALL_STACK in arch/kernel,
> >>      the GCC_PLUGIN_SHADOW_CALL_STACK config is just to add a usable platform to it.
> >>>> +       help
> >>>> +         This plugin is used to support the kernel CONFIG_SHADOW_CALL_STACK
> >>>> +         compiled by gcc. Its principle is basically the same as that of CLANG.
> >>>> +         For more information, please refer to "config SHADOW_CALL_STACK"
> >>>> +
> >>>> +__visible int plugin_is_GPL_compatible;
> >>>> +
> >>>> +static struct plugin_info arm64_scs_plugin_info = {
> >>>> +       .version        = "20210926vanilla",
> >>>
> >>> I will respond to this obvious invitation at bikeshedding by saying
> >>> that 'salted caramel' is clearly the superior flavor of ice cream.
> >>     I'm sorry, as a non-native English speaker, I think I might not understand
> >>     what you mean here. My intention is to say that this is the first/initial
> >>     version, do I miss something?
> >
> > It was a joke - don't worry about it.
> >
> >>>> +       .help           = "disable\tdo not activate plugin\n"
> >>>> +                         "verbose\tprint all debug infos\n",
> >>>> +};
> >>>> +static unsigned int arm64_scs_execute(void)
> >>>> +{
> >>>> +       rtx_insn *insn;
> >>>> +       enum scs_state state = SCS_SEARCHING_FIRST_INSN;
> >>>> +
> >>>> +       for (insn = get_insns(); insn; insn = NEXT_INSN(insn)) {
> >>>> +               rtx mark = NULL;
> >>>> +
> >>>> +               switch (GET_CODE(insn)) {
> >>>> +               case NOTE:
> >>>> +               case BARRIER:
> >>>> +               case CODE_LABEL:
> >>>> +               case INSN:
> >>>> +               case DEBUG_INSN:
> >>>> +               case JUMP_INSN:
> >>>> +               case JUMP_TABLE_DATA:
> >>>> +                       break;
> >>>> +               case CALL_INSN:
> >>>> +                       if (SIBLING_CALL_P(insn)) {
> >>>> +                               error(G_("Sibling call found in func:%s, file:%s\n"),
> >>>> +                                               get_name(current_function_decl),
> >>>> +                                               main_input_filename);
> >>>> +                               gcc_unreachable();
> >>>> +                       }
> >>>
> >>> Sibling calls are an important optimization, not only for performance
> >>> but also for stack utilization, so this needs to be fixed. Can you
> >>> elaborate on the issue you are working around here?
> >>>
> >>     Since the ARM64 has disabled sibling calls (-fno-optimize-sibling-calls) by default,
> >>     there is almost no sibling call appear in the kernel I encountered.
> >
> > What do you mean this is disabled by default? Is that a compiler
> > setting or a Linux setting?
> It's a linux setting in aarch64 kernel.
>
> In aarch64, since CONFIG_FRAME_POINTER is always selected, -fno-optimize-sibling-calls is
> usually enable by default, and I think sibling calls rarely appear (I only encountered
> it once in my cases from bsp's code):
>
> ./arch/arm64/Kconfig
> config ARM64
> ...
> select FRAME_POINTER
>
> ./Makefile
> ifdef CONFIG_FRAME_POINTER
> KBUILD_CFLAGS   += -fno-omit-frame-pointer -fno-optimize-sibling-calls
> ...
>

Ah good to know. I don't think we should disable this optimization -
we need the frame pointer to unwind the call stack, but that doesn't
mean we should obsess about function calls disappearing from the call
stack because they end in a tail call.

Anyway, I spotted another issue with your code:

0000000000000080 <sysctl_net_exit>:
{
  80:   f800865e        str     x30, [x18], #8
  84:   d503245f        bti     c
  88:   d503233f        paciasp

You cannot put that str at the start of the function like that: the
BTI needs to come first, or you will trigger BTI faults if any of
these functions are called indirectly.

There are other reasons why adding it at the start is a bad idea: we
insert NOPs there for ftrace, for instance, which also should appear
at a fixed offset in the prologue.
Dan Li Sept. 23, 2021, 3:25 p.m. UTC | #8
On 9/22/21 9:48 PM, Ard Biesheuvel wrote:
> On Tue, 21 Sept 2021 at 08:00, Dan Li <ashimida@linux.alibaba.com> wrote:
>>
>>
>>
>> On 9/21/21 5:22 AM, Ard Biesheuvel wrote:
>>> On Mon, 20 Sept 2021 at 20:53, Dan Li <ashimida@linux.alibaba.com> wrote:
>>>>
>>>> Hi Ard,
>>>>
>>>> Thanks for your comment.
>>>>
>>>> I pasted a copy of the config code in my last email, could you please check it again?
>>>>
>>>> On 9/20/21 3:18 PM, Ard Biesheuvel wrote:
>>>>> Hi Dan,
>>>>>
>>>>> On Sun, 19 Sept 2021 at 18:37, Dan Li <ashimida@linux.alibaba.com> wrote:
>>>>>>
>>>>>> The Clang-based shadow call stack protection has been integrated into the
>>>>>> mainline, but kernel compiled by gcc cannot enable this feature for now.
>>>>>>
>>>>>> This Patch supports gcc-based SCS protection by adding a plugin.
>>>>>>
>>>>>
>>>>> Thanks for working on this. I had a stab at this myself about 2 years
>>>>> ago and couldn't make it work.
>>>>>
>>>>>> For each function that x30 will be pushed onto the stack during execution,
>>>>>> this plugin:
>>>>>> 1) insert "str x30, [x18], #8" at the entry of the function to save x30
>>>>>>       to current SCS
>>>>>> 2) insert "ldr x30, [x18, #-8]!"  before the exit of this function to
>>>>>>       restore x30
>>>>>>
>>>>>
>>>>> This logic seems sound to me, but it would be nice if someone more
>>>>> familiar with Clang's implementation could confirm that it is really
>>>>> this simple.
>>>>>
>>>>> Looking at your plugin, there is an issue with tail calls, and I don't
>>>>> think Clang simply disables those altogether as well, right?
>>>>
>>>> I am not familiar with clang's code, the logic comes from clang's description and the
>>>> disassembled binary code for now, so it may be different from the actual situation.
>>>>
>>>
>>> OK
>>>
>>>> The tail call could be handled (theoretically), and I will try to solve the issue in
>>>> the next version.
>>>>>
>>>>>>     ifdef CONFIG_SHADOW_CALL_STACK
>>>>>> -CC_FLAGS_SCS   := -fsanitize=shadow-call-stack
>>>>>> +CC_FLAGS_SCS   := $(if $(CONFIG_CC_IS_CLANG),-fsanitize=shadow-call-stack,)
>>>>>
>>>>> This variable should contain whatever needs to be added to the
>>>>> compiler comamand line
>>>>      In the new code, an 'enable' option is added here to enable the plugin
>>>>>>     KBUILD_CFLAGS  += $(CC_FLAGS_SCS)
>>>>>>     export CC_FLAGS_SCS
>>>>>>     endif
>>>>>> diff --git a/arch/Kconfig b/arch/Kconfig
>>>>>> index 98db634..81ff127 100644
>>>>>> --- a/arch/Kconfig
>>>>>> +++ b/arch/Kconfig
>>>>>> @@ -594,7 +594,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
>>>>>>
>>>>>>     config SHADOW_CALL_STACK
>>>>>>            bool "Clang Shadow Call Stack"
>>>>>> -       depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
>>>>>> +       depends on (CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK) || GCC_PLUGIN_SHADOW_CALL_STACK
>>>>>
>>>>> This logic needs to be defined in such a way that a builtin
>>>>> implementation provided by GCC will take precedence once it becomes
>>>>> available.
>>>>>
>>>>      In new code, if gcc supports SCS in the future, the plugin will be closed due to
>>>>      CC_HAVE_SHADOW_CALL_STACK is true.
>>>>>>            depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
>>>>>>            help
>>>>>>              This option enables Clang's Shadow Call Stack, which uses a
>>>>>> diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
>>>>>> index ab9eb4c..2534195e 100644
>>>>>> --- a/scripts/gcc-plugins/Kconfig
>>>>>> +++ b/scripts/gcc-plugins/Kconfig
>>>>>> @@ -19,6 +19,14 @@ menuconfig GCC_PLUGINS
>>>>>>
>>>>>>     if GCC_PLUGINS
>>>>>>
>>>>>> +config GCC_PLUGIN_SHADOW_CALL_STACK
>>>>>> +       bool "GCC Shadow Call Stack plugin"
>>>>>> +       select SHADOW_CALL_STACK
>>>>>
>>>>> You shouldn't 'select' something like this if the symbol has its own
>>>>> dependencies which may be unsatisfied, as this causes a Kconfig
>>>>> warning. Also, en/disabling shadow call stacks for the architecture
>>>>> should be done from the arch's 'kernel features' menu, it shouldn't be
>>>>> buried in the GCC plugins menu.
>>>>       I removed 'select' in the new version.
>>>>       SCS's enable is changed to rely on CONFIG_SHADOW_CALL_STACK in arch/kernel,
>>>>       the GCC_PLUGIN_SHADOW_CALL_STACK config is just to add a usable platform to it.
>>>>>> +       help
>>>>>> +         This plugin is used to support the kernel CONFIG_SHADOW_CALL_STACK
>>>>>> +         compiled by gcc. Its principle is basically the same as that of CLANG.
>>>>>> +         For more information, please refer to "config SHADOW_CALL_STACK"
>>>>>> +
>>>>>> +__visible int plugin_is_GPL_compatible;
>>>>>> +
>>>>>> +static struct plugin_info arm64_scs_plugin_info = {
>>>>>> +       .version        = "20210926vanilla",
>>>>>
>>>>> I will respond to this obvious invitation at bikeshedding by saying
>>>>> that 'salted caramel' is clearly the superior flavor of ice cream.
>>>>      I'm sorry, as a non-native English speaker, I think I might not understand
>>>>      what you mean here. My intention is to say that this is the first/initial
>>>>      version, do I miss something?
>>>
>>> It was a joke - don't worry about it.
>>>
>>>>>> +       .help           = "disable\tdo not activate plugin\n"
>>>>>> +                         "verbose\tprint all debug infos\n",
>>>>>> +};
>>>>>> +static unsigned int arm64_scs_execute(void)
>>>>>> +{
>>>>>> +       rtx_insn *insn;
>>>>>> +       enum scs_state state = SCS_SEARCHING_FIRST_INSN;
>>>>>> +
>>>>>> +       for (insn = get_insns(); insn; insn = NEXT_INSN(insn)) {
>>>>>> +               rtx mark = NULL;
>>>>>> +
>>>>>> +               switch (GET_CODE(insn)) {
>>>>>> +               case NOTE:
>>>>>> +               case BARRIER:
>>>>>> +               case CODE_LABEL:
>>>>>> +               case INSN:
>>>>>> +               case DEBUG_INSN:
>>>>>> +               case JUMP_INSN:
>>>>>> +               case JUMP_TABLE_DATA:
>>>>>> +                       break;
>>>>>> +               case CALL_INSN:
>>>>>> +                       if (SIBLING_CALL_P(insn)) {
>>>>>> +                               error(G_("Sibling call found in func:%s, file:%s\n"),
>>>>>> +                                               get_name(current_function_decl),
>>>>>> +                                               main_input_filename);
>>>>>> +                               gcc_unreachable();
>>>>>> +                       }
>>>>>
>>>>> Sibling calls are an important optimization, not only for performance
>>>>> but also for stack utilization, so this needs to be fixed. Can you
>>>>> elaborate on the issue you are working around here?
>>>>>
>>>>      Since the ARM64 has disabled sibling calls (-fno-optimize-sibling-calls) by default,
>>>>      there is almost no sibling call appear in the kernel I encountered.
>>>
>>> What do you mean this is disabled by default? Is that a compiler
>>> setting or a Linux setting?
>> It's a linux setting in aarch64 kernel.
>>
>> In aarch64, since CONFIG_FRAME_POINTER is always selected, -fno-optimize-sibling-calls is
>> usually enable by default, and I think sibling calls rarely appear (I only encountered
>> it once in my cases from bsp's code):
>>
>> ./arch/arm64/Kconfig
>> config ARM64
>> ...
>> select FRAME_POINTER
>>
>> ./Makefile
>> ifdef CONFIG_FRAME_POINTER
>> KBUILD_CFLAGS   += -fno-omit-frame-pointer -fno-optimize-sibling-calls
>> ...
>>
> 
> Ah good to know. I don't think we should disable this optimization -
> we need the frame pointer to unwind the call stack, but that doesn't
> mean we should obsess about function calls disappearing from the call
> stack because they end in a tail call.
> 
> Anyway, I spotted another issue with your code:
> 
> 0000000000000080 <sysctl_net_exit>:
> {
>    80:   f800865e        str     x30, [x18], #8
>    84:   d503245f        bti     c
>    88:   d503233f        paciasp
> 
> You cannot put that str at the start of the function like that: the
> BTI needs to come first, or you will trigger BTI faults if any of
> these functions are called indirectly.
> 
> There are other reasons why adding it at the start is a bad idea: we
> insert NOPs there for ftrace, for instance, which also should appear
> at a fixed offset in the prologue.

Thanks for your help, Ard, I did not consider of BTI and NOP
instructions before.

It took me some time to view the source code. Currently, I think
there may be three problems here(please let me know if i miss
something):
1)NOP instruction insertion
2)Position of BTI instruction
3)PAC verification

1)NOP instruction insertion
As far as I know, the insertion of the kernel nop instruction is
caused by CONFIG_DYNAMIC_FTRACE_WITH_REGS, which sets
-fpatchable-function-entry=2 to leave positions at the beginning
of each function.

SCS push should not be affected, because NOP instructions is
generated during assembly code output stage(pass final =>
assemble_start_function),which occurs after the scs pass.

So in the final binary, the nop instruction is always inserted
before scs push, such as:
ffff800010010260 <gic_handle_irq>:
ffff800010010260:       d503201f        nop
ffff800010010264:       d503201f        nop
ffff800010010268:       f800865e        str     x30, [x18], #8
ffff80001001026c:       a9bb7bfd        stp     x29, x30, [sp, #-80]!
  
I enabled CONFIG_DYNAMIC_FTRACE_WITH_REGS + SCS_plugin on the 5.14
kernel compiled by gcc 10.3, the kernel can startup normally.

2)Position of BTI instruction
BTI instructions are mainly inserted in pass_insert_bti, and the pass
sequence is:
rtl-mach		:  OFF
......
rtl-bti			:  ON
rtl-arm64_scs		:  ON
rtl-shorten		:  ON
.......
rtl-final
  
Since arm64_scs is after rtl-bti, the scs push will inserted before
bti insns.

To solve this, I changed the insertion point of scs pass to before
'mach' (which is the insertion point I originally used), patch:

--- a/scripts/gcc-plugins/arm64_scs_plugin.c
+++ b/scripts/gcc-plugins/arm64_scs_plugin.c
@@ -214,7 +214,7 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gc
         const struct plugin_argument * const argv = plugin_info->argv;
         bool enable = true;
  
-       PASS_INFO(arm64_scs, "shorten", 1, PASS_POS_INSERT_BEFORE);
+       PASS_INFO(arm64_scs, "mach", 1, PASS_POS_INSERT_AFTER);

Then BTI will be inserted before scs push, as follows:
ffff800010010290 <gic_handle_irq>:
ffff800010010290:       d503245f        bti     c
ffff800010010294:       d503201f        nop
ffff800010010298:       d503201f        nop
ffff80001001029c:       f800865e        str     x30, [x18], #8
ffff8000100102a0:       d503233f        paciasp
ffff8000100102a4:       a9bb7bfd        stp     x29, x30, [sp, #-80]!
ffff8000100102a8:       910003fd        mov     x29, sp
   
At present, the system startup normally with
CONFIG_ARM64_PTR_AUTH_KERNEL + CONFIG_ARM64_BTI_KERNEL (but my qemu
does not seem to support BTI commands, and I'm still trying to build
a test environment)

3)PAC verification
PAC is processed in rtl-pro_and_epilogue, scs can't be put in front
of it.

In current patch, it will generate instructions like:
ffff800010010290 <gic_handle_irq>:
......
ffff80001001029c:       f800865e        str     x30, [x18], #8
ffff8000100102a0:       d503233f        paciasp
......
ffff800010010364:       d50323bf        autiasp
ffff800010010368:       f85f8e5e        ldr     x30, [x18, #-8]!

which means pac is invalid, the attacker only needs to modify the
x30 from shadow stack.

Modifying the insertion point of scs push/pop should solve this
problem(I will try it later).
   
But what puzzles me is that PAC should be an enhanced implementation
of SCS. Do we need to support PAC and SCS at the same time?
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index 61741e9..0f0121a 100644
--- a/Makefile
+++ b/Makefile
@@ -924,7 +924,7 @@  LDFLAGS_vmlinux += --gc-sections
 endif
 
 ifdef CONFIG_SHADOW_CALL_STACK
-CC_FLAGS_SCS	:= -fsanitize=shadow-call-stack
+CC_FLAGS_SCS	:= $(if $(CONFIG_CC_IS_CLANG),-fsanitize=shadow-call-stack,)
 KBUILD_CFLAGS	+= $(CC_FLAGS_SCS)
 export CC_FLAGS_SCS
 endif
diff --git a/arch/Kconfig b/arch/Kconfig
index 98db634..81ff127 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -594,7 +594,7 @@  config ARCH_SUPPORTS_SHADOW_CALL_STACK
 
 config SHADOW_CALL_STACK
 	bool "Clang Shadow Call Stack"
-	depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
+	depends on (CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK) || GCC_PLUGIN_SHADOW_CALL_STACK
 	depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
 	help
 	  This option enables Clang's Shadow Call Stack, which uses a
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cb9217f..426c8e5 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -50,6 +50,10 @@ 
 #define __latent_entropy __attribute__((latent_entropy))
 #endif
 
+#if defined(SHADOW_CALL_STACK_PLUGIN) && !defined(__CHECKER__)
+#define __noscs __attribute__((no_shadow_call_stack))
+#endif
+
 /*
  * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
  * confuse the stack allocation in gcc, leading to overly large stack
diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
index 952e468..eeaf2c6 100644
--- a/scripts/Makefile.gcc-plugins
+++ b/scripts/Makefile.gcc-plugins
@@ -46,6 +46,10 @@  ifdef CONFIG_GCC_PLUGIN_ARM_SSP_PER_TASK
 endif
 export DISABLE_ARM_SSP_PER_TASK_PLUGIN
 
+gcc-plugin-$(CONFIG_GCC_PLUGIN_SHADOW_CALL_STACK) += arm64_scs_plugin.so
+gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_SHADOW_CALL_STACK)	\
+		+= -DSHADOW_CALL_STACK_PLUGIN
+
 # All the plugin CFLAGS are collected here in case a build target needs to
 # filter them out of the KBUILD_CFLAGS.
 GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index ab9eb4c..2534195e 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -19,6 +19,14 @@  menuconfig GCC_PLUGINS
 
 if GCC_PLUGINS
 
+config GCC_PLUGIN_SHADOW_CALL_STACK
+	bool "GCC Shadow Call Stack plugin"
+	select SHADOW_CALL_STACK
+	help
+	  This plugin is used to support the kernel CONFIG_SHADOW_CALL_STACK
+	  compiled by gcc. Its principle is basically the same as that of CLANG.
+	  For more information, please refer to "config SHADOW_CALL_STACK"
+
 config GCC_PLUGIN_CYC_COMPLEXITY
 	bool "Compute the cyclomatic complexity of a function" if EXPERT
 	depends on !COMPILE_TEST	# too noisy
diff --git a/scripts/gcc-plugins/arm64_scs_plugin.c b/scripts/gcc-plugins/arm64_scs_plugin.c
new file mode 100644
index 0000000..c5a66140
--- /dev/null
+++ b/scripts/gcc-plugins/arm64_scs_plugin.c
@@ -0,0 +1,256 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include "gcc-common.h"
+
+#define v_info(fmt, ...)							\
+	do {									\
+		if (verbose)							\
+			fprintf(stderr, "[SCS]: " fmt,  ## __VA_ARGS__);	\
+	} while (0)
+
+#define NOSCS_ATTR_STR  "no_shadow_call_stack"
+#define SCS_ASM_PUSH_STR "str x30, [x18], #8\n\t"
+#define SCS_ASM_POP_STR  "ldr x30, [x18, #-8]!\n\t"
+
+__visible int plugin_is_GPL_compatible;
+
+static struct plugin_info arm64_scs_plugin_info = {
+	.version	= "20210926vanilla",
+	.help		= "disable\tdo not activate plugin\n"
+			  "verbose\tprint all debug infos\n",
+};
+
+static bool verbose;
+
+static rtx gen_scs_push(location_t loc)
+{
+	rtx insn = gen_rtx_ASM_INPUT_loc(VOIDmode, ggc_strdup(SCS_ASM_PUSH_STR), loc);
+
+	MEM_VOLATILE_P(insn) = 1;
+	return insn;
+}
+
+static rtx gen_scs_pop(location_t loc)
+{
+	rtx insn = gen_rtx_ASM_INPUT_loc(VOIDmode, ggc_strdup(SCS_ASM_POP_STR), loc);
+
+	MEM_VOLATILE_P(insn) = 1;
+	return insn;
+}
+
+static bool arm64_scs_gate(void)
+{
+	bool is_ignored;
+
+#if BUILDING_GCC_VERSION >= 8002
+	is_ignored = !cfun->machine->frame.emit_frame_chain;
+#else
+	is_ignored = !frame_pointer_needed;
+#endif
+
+	/* No need to insert protection code into functions that do not push LR into stack */
+	if (is_ignored) {
+		v_info("No protection code inserted into func:%s in file:%s\n",
+			get_name(current_function_decl), main_input_filename);
+		return 0;
+	}
+
+	gcc_assert(cfun->machine->frame.wb_candidate2 == R30_REGNUM);
+
+	/* Don't insert protection code into functions with NOSCS_ATTR_STR attribute */
+	if (lookup_attribute(NOSCS_ATTR_STR, DECL_ATTRIBUTES(current_function_decl))) {
+		v_info("No protection code inserted into %s func:%s in file:%s\n", NOSCS_ATTR_STR,
+				get_name(current_function_decl), main_input_filename);
+		return 0;
+	}
+	return 1;
+}
+
+enum scs_state {
+	/* The first valid instruction has not been found in the current instruction sequence */
+	SCS_SEARCHING_FIRST_INSN,
+	/* Currently searching for the return rtx instruction in this function */
+	SCS_SEARCHING_FUNC_RETURN,
+	/* Found an EPILOGUE_BEGIN before the function return instruction */
+	SCS_FOUND_ONE_EPILOGUE_NOTE,
+};
+
+static unsigned int arm64_scs_execute(void)
+{
+	rtx_insn *insn;
+	enum scs_state state = SCS_SEARCHING_FIRST_INSN;
+
+	for (insn = get_insns(); insn; insn = NEXT_INSN(insn)) {
+		rtx mark = NULL;
+
+		switch (GET_CODE(insn)) {
+		case NOTE:
+		case BARRIER:
+		case CODE_LABEL:
+		case INSN:
+		case DEBUG_INSN:
+		case JUMP_INSN:
+		case JUMP_TABLE_DATA:
+			break;
+		case CALL_INSN:
+			if (SIBLING_CALL_P(insn)) {
+				error(G_("Sibling call found in func:%s, file:%s\n"),
+						get_name(current_function_decl),
+						main_input_filename);
+				gcc_unreachable();
+			}
+			break;
+		default:
+			error(G_("Invalid rtx_insn seqs found with type:%s in func:%s, file:%s\n"),
+					GET_RTX_NAME(GET_CODE(insn)),
+					get_name(current_function_decl), main_input_filename);
+			gcc_unreachable();
+			break;
+		}
+
+		if (state == SCS_SEARCHING_FIRST_INSN) {
+			/* A function that needs to be instrumented should not found epilogue
+			 * before its first insn
+			 */
+			gcc_assert(!(NOTE_P(insn) && (NOTE_KIND(insn) == NOTE_INSN_EPILOGUE_BEG)));
+
+			if (NOTE_P(insn) || INSN_DELETED_P(insn))
+				continue;
+
+			state = SCS_SEARCHING_FUNC_RETURN;
+
+			/* Insert scs pop before the first instruction found */
+			mark = gen_scs_push(RESERVED_LOCATION_COUNT);
+			emit_insn_before(mark, insn);
+		}
+
+		/* Find the corresponding epilogue before 'RETURN' instruction (if any) */
+		if (state == SCS_SEARCHING_FUNC_RETURN) {
+			if (NOTE_P(insn) && (NOTE_KIND(insn) == NOTE_INSN_EPILOGUE_BEG)) {
+				state = SCS_FOUND_ONE_EPILOGUE_NOTE;
+				continue;
+			}
+		}
+
+		if (!JUMP_P(insn))
+			continue;
+
+		/* A function return insn was found */
+		if (ANY_RETURN_P(PATTERN(insn))) {
+			/* There should be an epilogue before 'RETURN' inst */
+			if (GET_CODE(PATTERN(insn)) == RETURN) {
+				gcc_assert(state == SCS_FOUND_ONE_EPILOGUE_NOTE);
+				state = SCS_SEARCHING_FUNC_RETURN;
+			}
+
+			/* There is no epilogue before 'SIMPLE_RETURN' insn */
+			if (GET_CODE(PATTERN(insn)) == SIMPLE_RETURN)
+				gcc_assert(state == SCS_SEARCHING_FUNC_RETURN);
+
+			/* Insert scs pop instruction(s) before return insn */
+			mark = gen_scs_pop(RESERVED_LOCATION_COUNT);
+			emit_insn_before(mark, insn);
+		}
+	}
+	return 0;
+}
+
+static tree handle_noscs_attribute(tree *node, tree name, tree args __unused, int flags,
+		bool *no_add_attrs)
+{
+	*no_add_attrs = true;
+
+	gcc_assert(DECL_P(*node));
+	switch (TREE_CODE(*node)) {
+	default:
+		error(G_("%qE attribute can be applies to function decl only (%qE)"), name, *node);
+		gcc_unreachable();
+
+	case FUNCTION_DECL:	/* the attribute is only used for function declarations */
+		break;
+	}
+
+	*no_add_attrs = false;
+	return NULL_TREE;
+}
+
+static struct attribute_spec noscs_attr = {};
+
+static void scs_register_attributes(void *event_data __unused, void *data __unused)
+{
+	noscs_attr.name	= NOSCS_ATTR_STR;
+	noscs_attr.decl_required = true;
+	noscs_attr.handler = handle_noscs_attribute;
+	register_attribute(&noscs_attr);
+}
+
+static void (*old_override_options_after_change)(void);
+
+static void scs_override_options_after_change(void)
+{
+	if (old_override_options_after_change)
+		old_override_options_after_change();
+
+	flag_optimize_sibling_calls = 0;
+}
+
+static void callback_before_start_unit(void *gcc_data __unused, void *user_data __unused)
+{
+	/* Turn off sibling call to avoid inserting duplicate scs pop codes */
+	old_override_options_after_change = targetm.override_options_after_change;
+	targetm.override_options_after_change = scs_override_options_after_change;
+
+	flag_optimize_sibling_calls = 0;
+}
+
+#define PASS_NAME arm64_scs
+#define TODO_FLAGS_FINISH (TODO_dump_func | TODO_verify_rtl_sharing)
+#include "gcc-generate-rtl-pass.h"
+
+__visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version)
+{
+	int i;
+	const char * const plugin_name = plugin_info->base_name;
+	const int argc = plugin_info->argc;
+	const struct plugin_argument * const argv = plugin_info->argv;
+	bool enable = true;
+
+	PASS_INFO(arm64_scs, "shorten", 1, PASS_POS_INSERT_BEFORE);
+
+	if (!plugin_default_version_check(version, &gcc_version)) {
+		error(G_("Incompatible gcc/plugin versions"));
+		return 1;
+	}
+
+	if (strncmp(lang_hooks.name, "GNU C", 5) && !strncmp(lang_hooks.name, "GNU C+", 6)) {
+		inform(UNKNOWN_LOCATION, G_("%s supports C only, not %s"), plugin_name,
+				lang_hooks.name);
+		enable = false;
+	}
+
+	for (i = 0; i < argc; ++i) {
+		if (!strcmp(argv[i].key, "disable")) {
+			enable = false;
+			continue;
+		}
+		if (!strcmp(argv[i].key, "verbose")) {
+			verbose = true;
+			continue;
+		}
+		error(G_("unknown option '-fplugin-arg-%s-%s'"), plugin_name, argv[i].key);
+	}
+
+	register_callback(plugin_name, PLUGIN_INFO, NULL, &arm64_scs_plugin_info);
+
+	register_callback(plugin_name, PLUGIN_ATTRIBUTES, scs_register_attributes, NULL);
+
+	if (!enable) {
+		v_info("Plugin disabled for file:%s\n", main_input_filename);
+		return 0;
+	}
+
+	register_callback(plugin_name, PLUGIN_START_UNIT, callback_before_start_unit, NULL);
+
+	register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &arm64_scs_pass_info);
+
+	return 0;
+}