diff mbox

[RFC,06/22] kvm: Adapt assembly for PIE support

Message ID 20170718223333.110371-7-thgarnie@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Thomas Garnier July 18, 2017, 10:33 p.m. UTC
Change the assembly code to use only relative references of symbols for the
kernel to be PIE compatible. The new __ASM_GET_PTR_PRE macro is used to
get the address of a symbol on both 32 and 64-bit with PIE support.

Position Independent Executable (PIE) support will allow to extended the
KASLR randomization range below the -2G memory limit.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
 arch/x86/include/asm/kvm_host.h | 6 ++++--
 arch/x86/kernel/kvm.c           | 6 ++++--
 arch/x86/kvm/svm.c              | 4 ++--
 3 files changed, 10 insertions(+), 6 deletions(-)

Comments

Brian Gerst July 19, 2017, 2:49 a.m. UTC | #1
On Tue, Jul 18, 2017 at 6:33 PM, Thomas Garnier <thgarnie@google.com> wrote:
> Change the assembly code to use only relative references of symbols for the
> kernel to be PIE compatible. The new __ASM_GET_PTR_PRE macro is used to
> get the address of a symbol on both 32 and 64-bit with PIE support.
>
> Position Independent Executable (PIE) support will allow to extended the
> KASLR randomization range below the -2G memory limit.
>
> Signed-off-by: Thomas Garnier <thgarnie@google.com>
> ---
>  arch/x86/include/asm/kvm_host.h | 6 ++++--
>  arch/x86/kernel/kvm.c           | 6 ++++--
>  arch/x86/kvm/svm.c              | 4 ++--
>  3 files changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 87ac4fba6d8e..3041201a3aeb 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1352,9 +1352,11 @@ asmlinkage void kvm_spurious_fault(void);
>         ".pushsection .fixup, \"ax\" \n" \
>         "667: \n\t" \
>         cleanup_insn "\n\t"                   \
> -       "cmpb $0, kvm_rebooting \n\t"         \
> +       "cmpb $0, kvm_rebooting" __ASM_SEL(,(%%rip)) " \n\t" \
>         "jne 668b \n\t"                       \
> -       __ASM_SIZE(push) " $666b \n\t"        \
> +       __ASM_SIZE(push) "%%" _ASM_AX " \n\t"           \
> +       __ASM_GET_PTR_PRE(666b) "%%" _ASM_AX "\n\t"     \
> +       "xchg %%" _ASM_AX ", (%%" _ASM_SP ") \n\t"      \
>         "call kvm_spurious_fault \n\t"        \
>         ".popsection \n\t" \
>         _ASM_EXTABLE(666b, 667b)
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 71c17a5be983..53b8ad162589 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -618,8 +618,10 @@ asm(
>  ".global __raw_callee_save___kvm_vcpu_is_preempted;"
>  ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
>  "__raw_callee_save___kvm_vcpu_is_preempted:"
> -"movq  __per_cpu_offset(,%rdi,8), %rax;"
> -"cmpb  $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
> +"leaq  __per_cpu_offset(%rip), %rax;"
> +"movq  (%rax,%rdi,8), %rax;"
> +"addq  " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rip), %rax;"

This doesn't look right.  It's accessing a per-cpu variable.  The
per-cpu section is an absolute, zero-based section and not subject to
relocation.

> +"cmpb  $0, (%rax);
>  "setne %al;"
>  "ret;"
>  ".popsection");

--
Brian Gerst
Thomas Garnier July 19, 2017, 3:40 p.m. UTC | #2
On Tue, Jul 18, 2017 at 7:49 PM, Brian Gerst <brgerst@gmail.com> wrote:
> On Tue, Jul 18, 2017 at 6:33 PM, Thomas Garnier <thgarnie@google.com> wrote:
>> Change the assembly code to use only relative references of symbols for the
>> kernel to be PIE compatible. The new __ASM_GET_PTR_PRE macro is used to
>> get the address of a symbol on both 32 and 64-bit with PIE support.
>>
>> Position Independent Executable (PIE) support will allow to extended the
>> KASLR randomization range below the -2G memory limit.
>>
>> Signed-off-by: Thomas Garnier <thgarnie@google.com>
>> ---
>>  arch/x86/include/asm/kvm_host.h | 6 ++++--
>>  arch/x86/kernel/kvm.c           | 6 ++++--
>>  arch/x86/kvm/svm.c              | 4 ++--
>>  3 files changed, 10 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 87ac4fba6d8e..3041201a3aeb 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -1352,9 +1352,11 @@ asmlinkage void kvm_spurious_fault(void);
>>         ".pushsection .fixup, \"ax\" \n" \
>>         "667: \n\t" \
>>         cleanup_insn "\n\t"                   \
>> -       "cmpb $0, kvm_rebooting \n\t"         \
>> +       "cmpb $0, kvm_rebooting" __ASM_SEL(,(%%rip)) " \n\t" \
>>         "jne 668b \n\t"                       \
>> -       __ASM_SIZE(push) " $666b \n\t"        \
>> +       __ASM_SIZE(push) "%%" _ASM_AX " \n\t"           \
>> +       __ASM_GET_PTR_PRE(666b) "%%" _ASM_AX "\n\t"     \
>> +       "xchg %%" _ASM_AX ", (%%" _ASM_SP ") \n\t"      \
>>         "call kvm_spurious_fault \n\t"        \
>>         ".popsection \n\t" \
>>         _ASM_EXTABLE(666b, 667b)
>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>> index 71c17a5be983..53b8ad162589 100644
>> --- a/arch/x86/kernel/kvm.c
>> +++ b/arch/x86/kernel/kvm.c
>> @@ -618,8 +618,10 @@ asm(
>>  ".global __raw_callee_save___kvm_vcpu_is_preempted;"
>>  ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
>>  "__raw_callee_save___kvm_vcpu_is_preempted:"
>> -"movq  __per_cpu_offset(,%rdi,8), %rax;"
>> -"cmpb  $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
>> +"leaq  __per_cpu_offset(%rip), %rax;"
>> +"movq  (%rax,%rdi,8), %rax;"
>> +"addq  " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rip), %rax;"
>
> This doesn't look right.  It's accessing a per-cpu variable.  The
> per-cpu section is an absolute, zero-based section and not subject to
> relocation.
>

PIE does not respect the zero-based section, it tries to have
everything relative. Patch 16/22 also adapt per-cpu to work with PIE
(while keeping the zero absolute design by default).

>> +"cmpb  $0, (%rax);
>>  "setne %al;"
>>  "ret;"
>>  ".popsection");
>
> --
> Brian Gerst
H. Peter Anvin July 19, 2017, 10:27 p.m. UTC | #3
On 07/19/17 08:40, Thomas Garnier wrote:
>>
>> This doesn't look right.  It's accessing a per-cpu variable.  The
>> per-cpu section is an absolute, zero-based section and not subject to
>> relocation.
> 
> PIE does not respect the zero-based section, it tries to have
> everything relative. Patch 16/22 also adapt per-cpu to work with PIE
> (while keeping the zero absolute design by default).
> 

This is silly.  The right thing is for PIE is to be explicitly absolute,
without (%rip).  The use of (%rip) memory references for percpu is just
an optimization.

	-hpa
Thomas Garnier July 19, 2017, 10:44 p.m. UTC | #4
On Wed, Jul 19, 2017 at 3:27 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 07/19/17 08:40, Thomas Garnier wrote:
>>>
>>> This doesn't look right.  It's accessing a per-cpu variable.  The
>>> per-cpu section is an absolute, zero-based section and not subject to
>>> relocation.
>>
>> PIE does not respect the zero-based section, it tries to have
>> everything relative. Patch 16/22 also adapt per-cpu to work with PIE
>> (while keeping the zero absolute design by default).
>>
>
> This is silly.  The right thing is for PIE is to be explicitly absolute,
> without (%rip).  The use of (%rip) memory references for percpu is just
> an optimization.

I agree that it is odd but that's how the compiler generates code. I
will re-explore PIC options with mcmodel=small or medium, as mentioned
on other threads.

>
>         -hpa
>
Ard Biesheuvel July 19, 2017, 10:58 p.m. UTC | #5
On 19 July 2017 at 23:27, H. Peter Anvin <hpa@zytor.com> wrote:
> On 07/19/17 08:40, Thomas Garnier wrote:
>>>
>>> This doesn't look right.  It's accessing a per-cpu variable.  The
>>> per-cpu section is an absolute, zero-based section and not subject to
>>> relocation.
>>
>> PIE does not respect the zero-based section, it tries to have
>> everything relative. Patch 16/22 also adapt per-cpu to work with PIE
>> (while keeping the zero absolute design by default).
>>
>
> This is silly.  The right thing is for PIE is to be explicitly absolute,
> without (%rip).  The use of (%rip) memory references for percpu is just
> an optimization.
>

Sadly, there is an issue in binutils that may prevent us from doing
this as cleanly as we would want.

For historical reasons, bfd.ld emits special symbols like
__GLOBAL_OFFSET_TABLE__ as absolute symbols with a section index of
SHN_ABS, even though it is quite obvious that they are relative like
any other symbol that points into the image. Unfortunately, this means
that binutils needs to emit R_X86_64_RELATIVE relocations even for
SHN_ABS symbols, which means we lose the ability to use both absolute
and relocatable symbols in the same PIE image (unless the reloc tool
can filter them out)

More info here:
https://sourceware.org/bugzilla/show_bug.cgi?id=19818
H. Peter Anvin July 19, 2017, 11:47 p.m. UTC | #6
<paul.gortmaker@windriver.com>,Chris Metcalf <cmetcalf@mellanox.com>,"Paul E . McKenney" <paulmck@linux.vnet.ibm.com>,Andrew Morton <akpm@linux-foundation.org>,Christopher Li <sparse@chrisli.org>,Dou Liyang <douly.fnst@cn.fujitsu.com>,Masahiro Yamada <yamada.masahiro@socionext.com>,Daniel Borkmann <daniel@iogearbox.net>,Markus Trippelsdorf <markus@trippelsdorf.de>,Peter Foley <pefoley2@pefoley.com>,Steven Rostedt <rostedt@goodmis.org>,Tim Chen <tim.c.chen@linux.intel.com>,Catalin Marinas <catalin.marinas@arm.com>,Matthew Wilcox <mawilcox@microsoft.com>,Michal Hocko <mhocko@suse.com>,Rob Landley <rob@landley.net>,Jiri Kosina <jkosina@suse.cz>,"H . J . Lu" <hjl.tools@gmail.com>,Paul Bolle <pebolle@tiscali.nl>,Baoquan He <bhe@redhat.com>,Daniel Micay <danielmicay@gmail.com>,the arch/x86 maintainers <x86@kernel.org>,"linux-crypto@vger.kernel.org" <linux-crypto@vger.kernel.org>,Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,xen-devel@lists.xenproject.org,kvm list
<kvm@vger.kernel.org>,linux-pm <linux-pm@vger.kernel.org>,linux-arch <linux-arch@vger.kernel.org>,Linux-Sparse <linux-sparse@vger.kernel.org>,Kernel Hardening <kernel-hardening@lists.openwall.com>
From: hpa@zytor.com
Message-ID: <83BA7600-BC8D-4C91-812C-DD2A0BF4474B@zytor.com>

On July 19, 2017 3:58:07 PM PDT, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
>On 19 July 2017 at 23:27, H. Peter Anvin <hpa@zytor.com> wrote:
>> On 07/19/17 08:40, Thomas Garnier wrote:
>>>>
>>>> This doesn't look right.  It's accessing a per-cpu variable.  The
>>>> per-cpu section is an absolute, zero-based section and not subject
>to
>>>> relocation.
>>>
>>> PIE does not respect the zero-based section, it tries to have
>>> everything relative. Patch 16/22 also adapt per-cpu to work with PIE
>>> (while keeping the zero absolute design by default).
>>>
>>
>> This is silly.  The right thing is for PIE is to be explicitly
>absolute,
>> without (%rip).  The use of (%rip) memory references for percpu is
>just
>> an optimization.
>>
>
>Sadly, there is an issue in binutils that may prevent us from doing
>this as cleanly as we would want.
>
>For historical reasons, bfd.ld emits special symbols like
>__GLOBAL_OFFSET_TABLE__ as absolute symbols with a section index of
>SHN_ABS, even though it is quite obvious that they are relative like
>any other symbol that points into the image. Unfortunately, this means
>that binutils needs to emit R_X86_64_RELATIVE relocations even for
>SHN_ABS symbols, which means we lose the ability to use both absolute
>and relocatable symbols in the same PIE image (unless the reloc tool
>can filter them out)
>
>More info here:
>https://sourceware.org/bugzilla/show_bug.cgi?id=19818

The reloc tool already has the ability to filter symbols.
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 87ac4fba6d8e..3041201a3aeb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1352,9 +1352,11 @@  asmlinkage void kvm_spurious_fault(void);
 	".pushsection .fixup, \"ax\" \n" \
 	"667: \n\t" \
 	cleanup_insn "\n\t"		      \
-	"cmpb $0, kvm_rebooting \n\t"	      \
+	"cmpb $0, kvm_rebooting" __ASM_SEL(,(%%rip)) " \n\t" \
 	"jne 668b \n\t"      		      \
-	__ASM_SIZE(push) " $666b \n\t"	      \
+	__ASM_SIZE(push) "%%" _ASM_AX " \n\t"		\
+	__ASM_GET_PTR_PRE(666b) "%%" _ASM_AX "\n\t"	\
+	"xchg %%" _ASM_AX ", (%%" _ASM_SP ") \n\t"	\
 	"call kvm_spurious_fault \n\t"	      \
 	".popsection \n\t" \
 	_ASM_EXTABLE(666b, 667b)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 71c17a5be983..53b8ad162589 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -618,8 +618,10 @@  asm(
 ".global __raw_callee_save___kvm_vcpu_is_preempted;"
 ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
 "__raw_callee_save___kvm_vcpu_is_preempted:"
-"movq	__per_cpu_offset(,%rdi,8), %rax;"
-"cmpb	$0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
+"leaq	__per_cpu_offset(%rip), %rax;"
+"movq	(%rax,%rdi,8), %rax;"
+"addq	" __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rip), %rax;"
+"cmpb	$0, (%rax);"
 "setne	%al;"
 "ret;"
 ".popsection");
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4d8141e533c3..8b718c6d6729 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -554,12 +554,12 @@  static u32 svm_msrpm_offset(u32 msr)
 
 static inline void clgi(void)
 {
-	asm volatile (__ex(SVM_CLGI));
+	asm volatile (__ex(SVM_CLGI) : :);
 }
 
 static inline void stgi(void)
 {
-	asm volatile (__ex(SVM_STGI));
+	asm volatile (__ex(SVM_STGI) : :);
 }
 
 static inline void invlpga(unsigned long addr, u32 asid)