diff mbox series

[v4,1/3] kallsyms: Avoid weak references for kallsyms symbols

Message ID 20240415162041.2491523-6-ardb+git@google.com (mailing list archive)
State New
Headers show
Series kbuild: Avoid weak external linkage where possible | expand

Commit Message

Ard Biesheuvel April 15, 2024, 4:20 p.m. UTC
From: Ard Biesheuvel <ardb@kernel.org>

kallsyms is a directory of all the symbols in the vmlinux binary, and so
creating it is somewhat of a chicken-and-egg problem, as its non-zero
size affects the layout of the binary, and therefore the values of the
symbols.

For this reason, the kernel is linked more than once, and the first pass
does not include any kallsyms data at all. For the linker to accept
this, the symbol declarations describing the kallsyms metadata are
emitted as having weak linkage, so they can remain unsatisfied. During
the subsequent passes, the weak references are satisfied by the kallsyms
metadata that was constructed based on information gathered from the
preceding passes.

Weak references lead to somewhat worse codegen, because taking their
address may need to produce NULL (if the reference was unsatisfied), and
this is not usually supported by RIP or PC relative symbol references.

Given that these references are ultimately always satisfied in the final
link, let's drop the weak annotation, and instead, provide fallback
definitions in the linker script that are only emitted if an unsatisfied
reference exists.

While at it, drop the FRV specific annotation that these symbols reside
in .rodata - FRV is long gone.

Tested-by: Nick Desaulniers <ndesaulniers@google.com> # Boot
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lkml.kernel.org/r/20230504174320.3930345-1-ardb%40kernel.org
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 19 +++++++++++++
 kernel/kallsyms.c                 |  6 ----
 kernel/kallsyms_internal.h        | 30 ++++++++------------
 3 files changed, 31 insertions(+), 24 deletions(-)

Comments

Masahiro Yamada April 23, 2024, 1:44 p.m. UTC | #1
On Tue, Apr 16, 2024 at 1:20 AM Ard Biesheuvel <ardb+git@google.com> wrote:
>
> From: Ard Biesheuvel <ardb@kernel.org>
>
> kallsyms is a directory of all the symbols in the vmlinux binary, and so
> creating it is somewhat of a chicken-and-egg problem, as its non-zero
> size affects the layout of the binary, and therefore the values of the
> symbols.
>
> For this reason, the kernel is linked more than once, and the first pass
> does not include any kallsyms data at all. For the linker to accept
> this, the symbol declarations describing the kallsyms metadata are
> emitted as having weak linkage, so they can remain unsatisfied. During
> the subsequent passes, the weak references are satisfied by the kallsyms
> metadata that was constructed based on information gathered from the
> preceding passes.
>
> Weak references lead to somewhat worse codegen, because taking their
> address may need to produce NULL (if the reference was unsatisfied), and
> this is not usually supported by RIP or PC relative symbol references.
>
> Given that these references are ultimately always satisfied in the final
> link, let's drop the weak annotation, and instead, provide fallback
> definitions in the linker script that are only emitted if an unsatisfied
> reference exists.
>
> While at it, drop the FRV specific annotation that these symbols reside
> in .rodata - FRV is long gone.
>
> Tested-by: Nick Desaulniers <ndesaulniers@google.com> # Boot
> Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
> Reviewed-by: Kees Cook <keescook@chromium.org>
> Acked-by: Arnd Bergmann <arnd@arndb.de>
> Link: https://lkml.kernel.org/r/20230504174320.3930345-1-ardb%40kernel.org
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---


I dropped v5, and picked up this one.

Thanks.



>  include/asm-generic/vmlinux.lds.h | 19 +++++++++++++
>  kernel/kallsyms.c                 |  6 ----
>  kernel/kallsyms_internal.h        | 30 ++++++++------------
>  3 files changed, 31 insertions(+), 24 deletions(-)
>
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index f7749d0f2562..e8449be62058 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -448,11 +448,30 @@
>  #endif
>  #endif
>
> +/*
> + * Some symbol definitions will not exist yet during the first pass of the
> + * link, but are guaranteed to exist in the final link. Provide preliminary
> + * definitions that will be superseded in the final link to avoid having to
> + * rely on weak external linkage, which requires a GOT when used in position
> + * independent code.
> + */
> +#define PRELIMINARY_SYMBOL_DEFINITIONS                                 \
> +       PROVIDE(kallsyms_addresses = .);                                \
> +       PROVIDE(kallsyms_offsets = .);                                  \
> +       PROVIDE(kallsyms_names = .);                                    \
> +       PROVIDE(kallsyms_num_syms = .);                                 \
> +       PROVIDE(kallsyms_relative_base = .);                            \
> +       PROVIDE(kallsyms_token_table = .);                              \
> +       PROVIDE(kallsyms_token_index = .);                              \
> +       PROVIDE(kallsyms_markers = .);                                  \
> +       PROVIDE(kallsyms_seqs_of_names = .);
> +
>  /*
>   * Read only Data
>   */
>  #define RO_DATA(align)                                                 \
>         . = ALIGN((align));                                             \
> +       PRELIMINARY_SYMBOL_DEFINITIONS                                  \
>         .rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {           \
>                 __start_rodata = .;                                     \
>                 *(.rodata) *(.rodata.*)                                 \
> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> index 18edd57b5fe8..22ea19a36e6e 100644
> --- a/kernel/kallsyms.c
> +++ b/kernel/kallsyms.c
> @@ -325,12 +325,6 @@ static unsigned long get_symbol_pos(unsigned long addr,
>         unsigned long symbol_start = 0, symbol_end = 0;
>         unsigned long i, low, high, mid;
>
> -       /* This kernel should never had been booted. */
> -       if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE))
> -               BUG_ON(!kallsyms_addresses);
> -       else
> -               BUG_ON(!kallsyms_offsets);
> -
>         /* Do a binary search on the sorted kallsyms_addresses array. */
>         low = 0;
>         high = kallsyms_num_syms;
> diff --git a/kernel/kallsyms_internal.h b/kernel/kallsyms_internal.h
> index 27fabdcc40f5..85480274fc8f 100644
> --- a/kernel/kallsyms_internal.h
> +++ b/kernel/kallsyms_internal.h
> @@ -5,27 +5,21 @@
>  #include <linux/types.h>
>
>  /*
> - * These will be re-linked against their real values
> - * during the second link stage.
> + * These will be re-linked against their real values during the second link
> + * stage. Preliminary values must be provided in the linker script using the
> + * PROVIDE() directive so that the first link stage can complete successfully.
>   */
> -extern const unsigned long kallsyms_addresses[] __weak;
> -extern const int kallsyms_offsets[] __weak;
> -extern const u8 kallsyms_names[] __weak;
> +extern const unsigned long kallsyms_addresses[];
> +extern const int kallsyms_offsets[];
> +extern const u8 kallsyms_names[];
>
> -/*
> - * Tell the compiler that the count isn't in the small data section if the arch
> - * has one (eg: FRV).
> - */
> -extern const unsigned int kallsyms_num_syms
> -__section(".rodata") __attribute__((weak));
> -
> -extern const unsigned long kallsyms_relative_base
> -__section(".rodata") __attribute__((weak));
> +extern const unsigned int kallsyms_num_syms;
> +extern const unsigned long kallsyms_relative_base;
>
> -extern const char kallsyms_token_table[] __weak;
> -extern const u16 kallsyms_token_index[] __weak;
> +extern const char kallsyms_token_table[];
> +extern const u16 kallsyms_token_index[];
>
> -extern const unsigned int kallsyms_markers[] __weak;
> -extern const u8 kallsyms_seqs_of_names[] __weak;
> +extern const unsigned int kallsyms_markers[];
> +extern const u8 kallsyms_seqs_of_names[];
>
>  #endif // LINUX_KALLSYMS_INTERNAL_H_
> --
> 2.44.0.683.g7961c838ac-goog
>
>
diff mbox series

Patch

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f7749d0f2562..e8449be62058 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -448,11 +448,30 @@ 
 #endif
 #endif
 
+/*
+ * Some symbol definitions will not exist yet during the first pass of the
+ * link, but are guaranteed to exist in the final link. Provide preliminary
+ * definitions that will be superseded in the final link to avoid having to
+ * rely on weak external linkage, which requires a GOT when used in position
+ * independent code.
+ */
+#define PRELIMINARY_SYMBOL_DEFINITIONS					\
+	PROVIDE(kallsyms_addresses = .);				\
+	PROVIDE(kallsyms_offsets = .);					\
+	PROVIDE(kallsyms_names = .);					\
+	PROVIDE(kallsyms_num_syms = .);					\
+	PROVIDE(kallsyms_relative_base = .);				\
+	PROVIDE(kallsyms_token_table = .);				\
+	PROVIDE(kallsyms_token_index = .);				\
+	PROVIDE(kallsyms_markers = .);					\
+	PROVIDE(kallsyms_seqs_of_names = .);
+
 /*
  * Read only Data
  */
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
+	PRELIMINARY_SYMBOL_DEFINITIONS					\
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
 		__start_rodata = .;					\
 		*(.rodata) *(.rodata.*)					\
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 18edd57b5fe8..22ea19a36e6e 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -325,12 +325,6 @@  static unsigned long get_symbol_pos(unsigned long addr,
 	unsigned long symbol_start = 0, symbol_end = 0;
 	unsigned long i, low, high, mid;
 
-	/* This kernel should never had been booted. */
-	if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE))
-		BUG_ON(!kallsyms_addresses);
-	else
-		BUG_ON(!kallsyms_offsets);
-
 	/* Do a binary search on the sorted kallsyms_addresses array. */
 	low = 0;
 	high = kallsyms_num_syms;
diff --git a/kernel/kallsyms_internal.h b/kernel/kallsyms_internal.h
index 27fabdcc40f5..85480274fc8f 100644
--- a/kernel/kallsyms_internal.h
+++ b/kernel/kallsyms_internal.h
@@ -5,27 +5,21 @@ 
 #include <linux/types.h>
 
 /*
- * These will be re-linked against their real values
- * during the second link stage.
+ * These will be re-linked against their real values during the second link
+ * stage. Preliminary values must be provided in the linker script using the
+ * PROVIDE() directive so that the first link stage can complete successfully.
  */
-extern const unsigned long kallsyms_addresses[] __weak;
-extern const int kallsyms_offsets[] __weak;
-extern const u8 kallsyms_names[] __weak;
+extern const unsigned long kallsyms_addresses[];
+extern const int kallsyms_offsets[];
+extern const u8 kallsyms_names[];
 
-/*
- * Tell the compiler that the count isn't in the small data section if the arch
- * has one (eg: FRV).
- */
-extern const unsigned int kallsyms_num_syms
-__section(".rodata") __attribute__((weak));
-
-extern const unsigned long kallsyms_relative_base
-__section(".rodata") __attribute__((weak));
+extern const unsigned int kallsyms_num_syms;
+extern const unsigned long kallsyms_relative_base;
 
-extern const char kallsyms_token_table[] __weak;
-extern const u16 kallsyms_token_index[] __weak;
+extern const char kallsyms_token_table[];
+extern const u16 kallsyms_token_index[];
 
-extern const unsigned int kallsyms_markers[] __weak;
-extern const u8 kallsyms_seqs_of_names[] __weak;
+extern const unsigned int kallsyms_markers[];
+extern const u8 kallsyms_seqs_of_names[];
 
 #endif // LINUX_KALLSYMS_INTERNAL_H_