diff mbox series

[RFC,2/6] kvm: arm64: Fix up RELA relocations in hyp code/data

Message ID 20201119162543.78001-3-dbrazdil@google.com (mailing list archive)
State New, archived
Headers show
Series kvm: arm64: Fix up hyp relocations | expand

Commit Message

David Brazdil Nov. 19, 2020, 4:25 p.m. UTC
KVM nVHE code runs under a different VA mapping than the kernel, hence
so far it relied only on PC-relative addressing to avoid accidentally
using a relocated kernel VA from a constant pool (see hyp_symbol_addr).

So as to reduce the possibility of a programmer error, fixup the
relocated addresses instead. Let the kernel relocate them to kernel VA
first, but then iterate over them again, filter those that point to hyp
code/data and convert the kernel VA to hyp VA.

This is done after kvm_compute_layout and before apply_alternatives.

Signed-off-by: David Brazdil <dbrazdil@google.com>
---
 arch/arm64/include/asm/kvm_mmu.h |  1 +
 arch/arm64/kernel/smp.c          |  4 +-
 arch/arm64/kvm/va_layout.c       | 76 ++++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+), 1 deletion(-)

Comments

Marc Zyngier Nov. 24, 2020, 1:09 p.m. UTC | #1
On 2020-11-19 16:25, David Brazdil wrote:
> KVM nVHE code runs under a different VA mapping than the kernel, hence
> so far it relied only on PC-relative addressing to avoid accidentally
> using a relocated kernel VA from a constant pool (see hyp_symbol_addr).
> 
> So as to reduce the possibility of a programmer error, fixup the
> relocated addresses instead. Let the kernel relocate them to kernel VA
> first, but then iterate over them again, filter those that point to hyp
> code/data and convert the kernel VA to hyp VA.
> 
> This is done after kvm_compute_layout and before apply_alternatives.
> 
> Signed-off-by: David Brazdil <dbrazdil@google.com>
> ---
>  arch/arm64/include/asm/kvm_mmu.h |  1 +
>  arch/arm64/kernel/smp.c          |  4 +-
>  arch/arm64/kvm/va_layout.c       | 76 ++++++++++++++++++++++++++++++++
>  3 files changed, 80 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_mmu.h 
> b/arch/arm64/include/asm/kvm_mmu.h
> index 5168a0c516ae..e5226f7e4732 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -105,6 +105,7 @@ alternative_cb_end
>  void kvm_update_va_mask(struct alt_instr *alt,
>  			__le32 *origptr, __le32 *updptr, int nr_inst);
>  void kvm_compute_layout(void);
> +void kvm_fixup_hyp_relocations(void);
> 
>  static __always_inline unsigned long __kern_hyp_va(unsigned long v)
>  {
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 18e9727d3f64..30241afc2c93 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
>  			   "CPU: CPUs started in inconsistent modes");
>  	else
>  		pr_info("CPU: All CPU(s) started at EL1\n");
> -	if (IS_ENABLED(CONFIG_KVM))
> +	if (IS_ENABLED(CONFIG_KVM)) {
>  		kvm_compute_layout();
> +		kvm_fixup_hyp_relocations();
> +	}
>  }
> 
>  void __init smp_cpus_done(unsigned int max_cpus)
> diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
> index d8cc51bd60bf..b80fab974896 100644
> --- a/arch/arm64/kvm/va_layout.c
> +++ b/arch/arm64/kvm/va_layout.c
> @@ -10,6 +10,7 @@
>  #include <asm/alternative.h>
>  #include <asm/debug-monitors.h>
>  #include <asm/insn.h>
> +#include <asm/kvm_asm.h>
>  #include <asm/kvm_mmu.h>
>  #include <asm/memory.h>
> 
> @@ -82,6 +83,81 @@ __init void kvm_compute_layout(void)
>  	init_hyp_physvirt_offset();
>  }
> 
> +#define __load_elf_u64(s)					\
> +	({							\
> +		extern u64 s;					\
> +		u64 val;					\
> +								\
> +		asm ("ldr %0, =%1" : "=r"(val) : "S"(&s));	\
> +		val;						\
> +	})

I'm not sure I get the rational about the naming here. None of this
has much to do with ELF, but seems to just load a value from a
constant pool.

> +
> +static bool __is_within_bounds(u64 addr, char *start, char *end)
> +{
> +	return start <= (char*)addr && (char*)addr < end;
> +}
> +
> +static bool __is_in_hyp_section(u64 addr)
> +{
> +	return __is_within_bounds(addr, __hyp_text_start, __hyp_text_end) ||
> +	       __is_within_bounds(addr, __hyp_rodata_start, __hyp_rodata_end) 
> ||
> +	       __is_within_bounds(addr,
> +				  CHOOSE_NVHE_SYM(__per_cpu_start),
> +				  CHOOSE_NVHE_SYM(__per_cpu_end));
> +}
> +
> +static void __fixup_hyp_rel(u64 addr)
> +{
> +	u64 *ptr, kern_va, hyp_va;
> +
> +	/* Adjust the relocation address taken from ELF for KASLR. */
> +	addr += kaslr_offset();
> +
> +	/* Skip addresses not in any of the hyp sections. */
> +	if (!__is_in_hyp_section(addr))
> +		return;
> +
> +	/* Get the LM alias of the relocation address. */
> +	ptr = (u64*)kvm_ksym_ref((void*)addr);

Why the casting? We should be perfectly fine without.

nit: we really need to change the name of this helper, it doesn't have
anything to do with symbols anymore. And actually, lm_alias() *is* the
right thing to use here (we don't relocate anything on VHE).

> +
> +	/*
> +	 * Read the value at the relocation address. It has already been
> +	 * relocated to the actual kernel kimg VA.
> +	 */
> +	kern_va = (u64)kvm_ksym_ref((void*)*ptr);

Same comment.

> +
> +	/* Convert to hyp VA. */
> +	hyp_va = __early_kern_hyp_va(kern_va);
> +
> +	/* Store hyp VA at the relocation address. */
> +	*ptr = __early_kern_hyp_va(kern_va);
> +}
> +
> +static void __fixup_hyp_rela(void)
> +{
> +	Elf64_Rela *rel;
> +	size_t i, n;
> +
> +	rel = (Elf64_Rela*)(kimage_vaddr + __load_elf_u64(__rela_offset));
> +	n = __load_elf_u64(__rela_size) / sizeof(*rel);
> +
> +	for (i = 0; i < n; ++i)
> +		__fixup_hyp_rel(rel[i].r_offset);
> +}
> +
> +/*
> + * The kernel relocated pointers to kernel VA. Iterate over 
> relocations in
> + * the hypervisor ELF sections and convert them to hyp VA. This avoids 
> the
> + * need to only use PC-relative addressing in hyp.
> + */
> +__init void kvm_fixup_hyp_relocations(void)
> +{
> +	if (!IS_ENABLED(CONFIG_RELOCATABLE) || has_vhe())

What do we do if CONFIG_RELOCATABLE is not selected? As far as I can 
tell,
bad things will happen...

I'm also worried that at this stage, the kernel is broken, until you
remove the other bits involved in runtime offsetting pointers.

Thanks,

         M.
Ard Biesheuvel Nov. 24, 2020, 1:45 p.m. UTC | #2
On Thu, 19 Nov 2020 at 17:25, David Brazdil <dbrazdil@google.com> wrote:
>
> KVM nVHE code runs under a different VA mapping than the kernel, hence
> so far it relied only on PC-relative addressing to avoid accidentally
> using a relocated kernel VA from a constant pool (see hyp_symbol_addr).
>
> So as to reduce the possibility of a programmer error, fixup the
> relocated addresses instead. Let the kernel relocate them to kernel VA
> first, but then iterate over them again, filter those that point to hyp
> code/data and convert the kernel VA to hyp VA.
>
> This is done after kvm_compute_layout and before apply_alternatives.
>

If this is significant enough to call out, please include the reason for it.

> Signed-off-by: David Brazdil <dbrazdil@google.com>
> ---
>  arch/arm64/include/asm/kvm_mmu.h |  1 +
>  arch/arm64/kernel/smp.c          |  4 +-
>  arch/arm64/kvm/va_layout.c       | 76 ++++++++++++++++++++++++++++++++
>  3 files changed, 80 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index 5168a0c516ae..e5226f7e4732 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -105,6 +105,7 @@ alternative_cb_end
>  void kvm_update_va_mask(struct alt_instr *alt,
>                         __le32 *origptr, __le32 *updptr, int nr_inst);
>  void kvm_compute_layout(void);
> +void kvm_fixup_hyp_relocations(void);
>
>  static __always_inline unsigned long __kern_hyp_va(unsigned long v)
>  {
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 18e9727d3f64..30241afc2c93 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
>                            "CPU: CPUs started in inconsistent modes");
>         else
>                 pr_info("CPU: All CPU(s) started at EL1\n");
> -       if (IS_ENABLED(CONFIG_KVM))
> +       if (IS_ENABLED(CONFIG_KVM)) {
>                 kvm_compute_layout();
> +               kvm_fixup_hyp_relocations();
> +       }
>  }
>
>  void __init smp_cpus_done(unsigned int max_cpus)
> diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
> index d8cc51bd60bf..b80fab974896 100644
> --- a/arch/arm64/kvm/va_layout.c
> +++ b/arch/arm64/kvm/va_layout.c
> @@ -10,6 +10,7 @@
>  #include <asm/alternative.h>
>  #include <asm/debug-monitors.h>
>  #include <asm/insn.h>
> +#include <asm/kvm_asm.h>
>  #include <asm/kvm_mmu.h>
>  #include <asm/memory.h>
>
> @@ -82,6 +83,81 @@ __init void kvm_compute_layout(void)
>         init_hyp_physvirt_offset();
>  }
>
> +#define __load_elf_u64(s)                                      \
> +       ({                                                      \
> +               extern u64 s;                                   \
> +               u64 val;                                        \
> +                                                               \
> +               asm ("ldr %0, =%1" : "=r"(val) : "S"(&s));      \
> +               val;                                            \
> +       })
> +

Do you need this to ensure that the reference is absolute? There may
be more elegant ways to achieve that, using weak references for
instance.

Also, in the relocation startup code, I deliberately used a 32-bit
quantity here, as it won't get confused for an absolute virtual
address that needs relocation.


> +static bool __is_within_bounds(u64 addr, char *start, char *end)
> +{
> +       return start <= (char*)addr && (char*)addr < end;
> +}
> +
> +static bool __is_in_hyp_section(u64 addr)
> +{
> +       return __is_within_bounds(addr, __hyp_text_start, __hyp_text_end) ||
> +              __is_within_bounds(addr, __hyp_rodata_start, __hyp_rodata_end) ||
> +              __is_within_bounds(addr,
> +                                 CHOOSE_NVHE_SYM(__per_cpu_start),
> +                                 CHOOSE_NVHE_SYM(__per_cpu_end));
> +}
> +

It is slightly disappointing that we need to filter these one by one
like this, but I don't think there are any guarantees about the order
in which the R_AARCH64_RELATIVE entries appear.

> +static void __fixup_hyp_rel(u64 addr)

__init ?

> +{
> +       u64 *ptr, kern_va, hyp_va;
> +
> +       /* Adjust the relocation address taken from ELF for KASLR. */
> +       addr += kaslr_offset();
> +
> +       /* Skip addresses not in any of the hyp sections. */
> +       if (!__is_in_hyp_section(addr))
> +               return;
> +
> +       /* Get the LM alias of the relocation address. */
> +       ptr = (u64*)kvm_ksym_ref((void*)addr);
> +
> +       /*
> +        * Read the value at the relocation address. It has already been
> +        * relocated to the actual kernel kimg VA.
> +        */
> +       kern_va = (u64)kvm_ksym_ref((void*)*ptr);
> +
> +       /* Convert to hyp VA. */
> +       hyp_va = __early_kern_hyp_va(kern_va);
> +
> +       /* Store hyp VA at the relocation address. */
> +       *ptr = __early_kern_hyp_va(kern_va);
> +}
> +
> +static void __fixup_hyp_rela(void)

__init ?

> +{
> +       Elf64_Rela *rel;
> +       size_t i, n;
> +
> +       rel = (Elf64_Rela*)(kimage_vaddr + __load_elf_u64(__rela_offset));
> +       n = __load_elf_u64(__rela_size) / sizeof(*rel);
> +
> +       for (i = 0; i < n; ++i)
> +               __fixup_hyp_rel(rel[i].r_offset);
> +}
> +
> +/*
> + * The kernel relocated pointers to kernel VA. Iterate over relocations in
> + * the hypervisor ELF sections and convert them to hyp VA. This avoids the
> + * need to only use PC-relative addressing in hyp.
> + */
> +__init void kvm_fixup_hyp_relocations(void)

It is more idiomatic to put the __init after the 'void', and someone
is undoubtedly going to send a patch to 'fix' that if we merge it like
this.

> +{
> +       if (!IS_ENABLED(CONFIG_RELOCATABLE) || has_vhe())
> +               return;
> +
> +       __fixup_hyp_rela();
> +}
> +
>  static u32 compute_instruction(int n, u32 rd, u32 rn)
>  {
>         u32 insn = AARCH64_BREAK_FAULT;
> --
> 2.29.2.299.gdc1121823c-goog
>
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 5168a0c516ae..e5226f7e4732 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -105,6 +105,7 @@  alternative_cb_end
 void kvm_update_va_mask(struct alt_instr *alt,
 			__le32 *origptr, __le32 *updptr, int nr_inst);
 void kvm_compute_layout(void);
+void kvm_fixup_hyp_relocations(void);
 
 static __always_inline unsigned long __kern_hyp_va(unsigned long v)
 {
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 18e9727d3f64..30241afc2c93 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -434,8 +434,10 @@  static void __init hyp_mode_check(void)
 			   "CPU: CPUs started in inconsistent modes");
 	else
 		pr_info("CPU: All CPU(s) started at EL1\n");
-	if (IS_ENABLED(CONFIG_KVM))
+	if (IS_ENABLED(CONFIG_KVM)) {
 		kvm_compute_layout();
+		kvm_fixup_hyp_relocations();
+	}
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index d8cc51bd60bf..b80fab974896 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -10,6 +10,7 @@ 
 #include <asm/alternative.h>
 #include <asm/debug-monitors.h>
 #include <asm/insn.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
 #include <asm/memory.h>
 
@@ -82,6 +83,81 @@  __init void kvm_compute_layout(void)
 	init_hyp_physvirt_offset();
 }
 
+#define __load_elf_u64(s)					\
+	({							\
+		extern u64 s;					\
+		u64 val;					\
+								\
+		asm ("ldr %0, =%1" : "=r"(val) : "S"(&s));	\
+		val;						\
+	})
+
+static bool __is_within_bounds(u64 addr, char *start, char *end)
+{
+	return start <= (char*)addr && (char*)addr < end;
+}
+
+static bool __is_in_hyp_section(u64 addr)
+{
+	return __is_within_bounds(addr, __hyp_text_start, __hyp_text_end) ||
+	       __is_within_bounds(addr, __hyp_rodata_start, __hyp_rodata_end) ||
+	       __is_within_bounds(addr,
+				  CHOOSE_NVHE_SYM(__per_cpu_start),
+				  CHOOSE_NVHE_SYM(__per_cpu_end));
+}
+
+static void __fixup_hyp_rel(u64 addr)
+{
+	u64 *ptr, kern_va, hyp_va;
+
+	/* Adjust the relocation address taken from ELF for KASLR. */
+	addr += kaslr_offset();
+
+	/* Skip addresses not in any of the hyp sections. */
+	if (!__is_in_hyp_section(addr))
+		return;
+
+	/* Get the LM alias of the relocation address. */
+	ptr = (u64*)kvm_ksym_ref((void*)addr);
+
+	/*
+	 * Read the value at the relocation address. It has already been
+	 * relocated to the actual kernel kimg VA.
+	 */
+	kern_va = (u64)kvm_ksym_ref((void*)*ptr);
+
+	/* Convert to hyp VA. */
+	hyp_va = __early_kern_hyp_va(kern_va);
+
+	/* Store hyp VA at the relocation address. */
+	*ptr = __early_kern_hyp_va(kern_va);
+}
+
+static void __fixup_hyp_rela(void)
+{
+	Elf64_Rela *rel;
+	size_t i, n;
+
+	rel = (Elf64_Rela*)(kimage_vaddr + __load_elf_u64(__rela_offset));
+	n = __load_elf_u64(__rela_size) / sizeof(*rel);
+
+	for (i = 0; i < n; ++i)
+		__fixup_hyp_rel(rel[i].r_offset);
+}
+
+/*
+ * The kernel relocated pointers to kernel VA. Iterate over relocations in
+ * the hypervisor ELF sections and convert them to hyp VA. This avoids the
+ * need to only use PC-relative addressing in hyp.
+ */
+__init void kvm_fixup_hyp_relocations(void)
+{
+	if (!IS_ENABLED(CONFIG_RELOCATABLE) || has_vhe())
+		return;
+
+	__fixup_hyp_rela();
+}
+
 static u32 compute_instruction(int n, u32 rd, u32 rn)
 {
 	u32 insn = AARCH64_BREAK_FAULT;