diff mbox series

[kvm-unit-tests,01/14] x86/cstart: Don't use MSR_GS_BASE in 32-bit boot code

Message ID 20210422030504.3488253-2-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series x86: MSR_GS_BASE and friends | expand

Commit Message

Sean Christopherson April 22, 2021, 3:04 a.m. UTC
Load the per-cpu GS.base for 32-bit build by building a temporary GDT
and loading a "real" segment.  Using MSR_GS_BASE is wrong and broken,
it's a 64-bit only MSR and does not exist on 32-bit CPUs.  The current
code works only because 32-bit KVM VMX incorrectly disables interception
of MSR_GS_BASE, and no one runs KVM on an actual 32-bit physical CPU,
i.e. the MSR exists in hardware and so everything "works".

32-bit KVM SVM is not buggy and correctly injects #GP on the WRMSR, i.e.
the tests have never worked on 32-bit SVM.

Fixes: dfe6cb6 ("Add 32 bit smp initialization code")
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 x86/cstart.S | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

Comments

Paolo Bonzini April 22, 2021, 9:44 a.m. UTC | #1
On 22/04/21 05:04, Sean Christopherson wrote:
> Load the per-cpu GS.base for 32-bit build by building a temporary GDT
> and loading a "real" segment.  Using MSR_GS_BASE is wrong and broken,
> it's a 64-bit only MSR and does not exist on 32-bit CPUs.  The current
> code works only because 32-bit KVM VMX incorrectly disables interception
> of MSR_GS_BASE, and no one runs KVM on an actual 32-bit physical CPU,
> i.e. the MSR exists in hardware and so everything "works".
> 
> 32-bit KVM SVM is not buggy and correctly injects #GP on the WRMSR, i.e.
> the tests have never worked on 32-bit SVM.
> 
> Fixes: dfe6cb6 ("Add 32 bit smp initialization code")
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Relying on the descriptor cache is quite ugly but the only alternative 
are setting up extra segments in the GDT or having per-CPU GDTs (which 
I'd rather avoid).

Paolo

> ---
>   x86/cstart.S | 28 +++++++++++++++++++++++-----
>   1 file changed, 23 insertions(+), 5 deletions(-)
> 
> diff --git a/x86/cstart.S b/x86/cstart.S
> index 489c561..91970a2 100644
> --- a/x86/cstart.S
> +++ b/x86/cstart.S
> @@ -89,13 +89,31 @@ mb_flags = 0x0
>   	.long mb_magic, mb_flags, 0 - (mb_magic + mb_flags)
>   mb_cmdline = 16
>   
> -MSR_GS_BASE = 0xc0000101
> -
>   .macro setup_percpu_area
>   	lea -4096(%esp), %eax
> -	mov $0, %edx
> -	mov $MSR_GS_BASE, %ecx
> -	wrmsr
> +
> +	mov %eax, %edx
> +	shl $16, %edx
> +	or  $0xffff, %edx
> +	mov %edx, 0x10(%eax)
> +
> +	mov %eax, %edx
> +	and $0xff000000, %edx
> +	mov %eax, %ecx
> +	shr $16, %ecx
> +	and $0xff, %ecx
> +	or  %ecx, %edx
> +	or  $0x00cf9300, %edx
> +	mov %edx, 0x14(%eax)
> +
> +	movw $0x17, 0(%eax)
> +	mov %eax, 2(%eax)
> +	lgdtl (%eax)
> +
> +	mov $0x10, %ax
> +	mov %ax, %gs
> +
> +	lgdtl gdt32_descr
>   .endm
>   
>   .macro setup_segments
>
Paolo Bonzini April 22, 2021, 10:02 a.m. UTC | #2
On 22/04/21 05:04, Sean Christopherson wrote:
> Load the per-cpu GS.base for 32-bit build by building a temporary GDT
> and loading a "real" segment.  Using MSR_GS_BASE is wrong and broken,
> it's a 64-bit only MSR and does not exist on 32-bit CPUs.  The current
> code works only because 32-bit KVM VMX incorrectly disables interception
> of MSR_GS_BASE, and no one runs KVM on an actual 32-bit physical CPU,
> i.e. the MSR exists in hardware and so everything "works".
> 
> 32-bit KVM SVM is not buggy and correctly injects #GP on the WRMSR, i.e.
> the tests have never worked on 32-bit SVM.

Hmm, this breaks task switch.  But setting up separate descriptors is
not hard:

diff --git a/x86/cstart.S b/x86/cstart.S
index 489c561..7d9ed96 100644
--- a/x86/cstart.S
+++ b/x86/cstart.S
@@ -58,6 +58,10 @@ tss_descr:
          .rept max_cpus
          .quad 0x000089000000ffff // 32-bit avail tss
          .endr
+percpu_descr:
+        .rept max_cpus
+        .quad 0x00cf93000000ffff // 32-bit data segment for perCPU area
+        .endr
  gdt32_end:

  i = 0
@@ -89,13 +93,23 @@ mb_flags = 0x0
  	.long mb_magic, mb_flags, 0 - (mb_magic + mb_flags)
  mb_cmdline = 16

-MSR_GS_BASE = 0xc0000101
-
  .macro setup_percpu_area
  	lea -4096(%esp), %eax
-	mov $0, %edx
-	mov $MSR_GS_BASE, %ecx
-	wrmsr
+
+	/* fill GS_BASE in the GDT */
+	mov $(APIC_DEFAULT_PHYS_BASE + APIC_ID), %ebx
+	mov (%ebx), %ebx
+	shr $24, %ebx
+	or %ax, percpu_descr+2(,%ebx,8)
+
+	shr $16, %eax
+	or %al, percpu_descr+4(,%ebx,8)
+	or %ah, percpu_descr+7(,%ebx,8)
+
+	lgdtl gdt32_descr
+	lea percpu_descr-gdt32(,%ebx,8), %eax
+	mov %ax, %gs
+
  .endm

  .macro setup_segments
@@ -188,16 +202,14 @@ load_tss:
  	mov (%eax), %eax
  	shr $24, %eax
  	mov %eax, %ebx
-	shl $3, %ebx
  	mov $((tss_end - tss) / max_cpus), %edx
  	imul %edx
  	add $tss, %eax
-	mov %ax, tss_descr+2(%ebx)
+	mov %ax, tss_descr+2(,%ebx,8)
  	shr $16, %eax
-	mov %al, tss_descr+4(%ebx)
-	shr $8, %eax
-	mov %al, tss_descr+7(%ebx)
-	lea tss_descr-gdt32(%ebx), %eax
+	mov %al, tss_descr+4(,%ebx,8)
+	mov %ah, tss_descr+7(,%ebx,8)
+	lea tss_descr-gdt32(,%ebx,8), %eax
  	ltr %ax
  	ret


Paolo
Sean Christopherson April 22, 2021, 5:57 p.m. UTC | #3
On Thu, Apr 22, 2021, Paolo Bonzini wrote:
> On 22/04/21 05:04, Sean Christopherson wrote:
> > Load the per-cpu GS.base for 32-bit build by building a temporary GDT
> > and loading a "real" segment.  Using MSR_GS_BASE is wrong and broken,
> > it's a 64-bit only MSR and does not exist on 32-bit CPUs.  The current
> > code works only because 32-bit KVM VMX incorrectly disables interception
> > of MSR_GS_BASE, and no one runs KVM on an actual 32-bit physical CPU,
> > i.e. the MSR exists in hardware and so everything "works".
> > 
> > 32-bit KVM SVM is not buggy and correctly injects #GP on the WRMSR, i.e.
> > the tests have never worked on 32-bit SVM.
> 
> Hmm, this breaks task switch.  But setting up separate descriptors is
> not hard:

Much better.

> diff --git a/x86/cstart.S b/x86/cstart.S
> index 489c561..7d9ed96 100644
> --- a/x86/cstart.S
> +++ b/x86/cstart.S
> @@ -58,6 +58,10 @@ tss_descr:
>          .rept max_cpus
>          .quad 0x000089000000ffff // 32-bit avail tss
>          .endr
> +percpu_descr:
> +        .rept max_cpus
> +        .quad 0x00cf93000000ffff // 32-bit data segment for perCPU area
> +        .endr
>  gdt32_end:
> 
>  i = 0
> @@ -89,13 +93,23 @@ mb_flags = 0x0
>  	.long mb_magic, mb_flags, 0 - (mb_magic + mb_flags)
>  mb_cmdline = 16
> 
> -MSR_GS_BASE = 0xc0000101
> -
>  .macro setup_percpu_area
>  	lea -4096(%esp), %eax
> -	mov $0, %edx
> -	mov $MSR_GS_BASE, %ecx
> -	wrmsr
> +
> +	/* fill GS_BASE in the GDT */
> +	mov $(APIC_DEFAULT_PHYS_BASE + APIC_ID), %ebx

Using %ebx crushes the mbi_bootinfo pointer.  The easiest fix is to use %edx or
%ecx.

> +	mov (%ebx), %ebx

No need to load the address into a reg, just drop the "$" above and encode
"mov [imm32], <reg>".

Want to fold this into your patch?

diff --git a/x86/cstart.S b/x86/cstart.S
index 7d9ed96..fb6eda5 100644
--- a/x86/cstart.S
+++ b/x86/cstart.S
@@ -97,17 +97,16 @@ mb_cmdline = 16
        lea -4096(%esp), %eax

        /* fill GS_BASE in the GDT */
-       mov $(APIC_DEFAULT_PHYS_BASE + APIC_ID), %ebx
-       mov (%ebx), %ebx
-       shr $24, %ebx
-       or %ax, percpu_descr+2(,%ebx,8)
+       mov (APIC_DEFAULT_PHYS_BASE + APIC_ID), %edx
+       shr $24, %edx
+       or %ax, percpu_descr+2(,%edx,8)

        shr $16, %eax
-       or %al, percpu_descr+4(,%ebx,8)
-       or %ah, percpu_descr+7(,%ebx,8)
+       or %al, percpu_descr+4(,%edx,8)
+       or %ah, percpu_descr+7(,%edx,8)

        lgdtl gdt32_descr
-       lea percpu_descr-gdt32(,%ebx,8), %eax
+       lea percpu_descr-gdt32(,%edx,8), %eax
        mov %ax, %gs

 .endm

> +	shr $24, %ebx
> +	or %ax, percpu_descr+2(,%ebx,8)
> +
> +	shr $16, %eax
> +	or %al, percpu_descr+4(,%ebx,8)
> +	or %ah, percpu_descr+7(,%ebx,8)
> +
> +	lgdtl gdt32_descr
> +	lea percpu_descr-gdt32(,%ebx,8), %eax
> +	mov %ax, %gs
> +
>  .endm
> 
>  .macro setup_segments
> @@ -188,16 +202,14 @@ load_tss:
>  	mov (%eax), %eax
>  	shr $24, %eax
>  	mov %eax, %ebx
> -	shl $3, %ebx
>  	mov $((tss_end - tss) / max_cpus), %edx
>  	imul %edx
>  	add $tss, %eax
> -	mov %ax, tss_descr+2(%ebx)
> +	mov %ax, tss_descr+2(,%ebx,8)
>  	shr $16, %eax
> -	mov %al, tss_descr+4(%ebx)
> -	shr $8, %eax
> -	mov %al, tss_descr+7(%ebx)
> -	lea tss_descr-gdt32(%ebx), %eax
> +	mov %al, tss_descr+4(,%ebx,8)
> +	mov %ah, tss_descr+7(,%ebx,8)

Is there a functional change here?  If not, can you throw this into a separate
patch?

Thanks!

> +	lea tss_descr-gdt32(,%ebx,8), %eax
>  	ltr %ax
>  	ret
> 
> 
> Paolo
>
Paolo Bonzini April 23, 2021, 6:57 a.m. UTC | #4
On 22/04/21 19:57, Sean Christopherson wrote:
> On Thu, Apr 22, 2021, Paolo Bonzini wrote:
>> On 22/04/21 05:04, Sean Christopherson wrote:
>>> Load the per-cpu GS.base for 32-bit build by building a temporary GDT
>>> and loading a "real" segment.  Using MSR_GS_BASE is wrong and broken,
>>> it's a 64-bit only MSR and does not exist on 32-bit CPUs.  The current
>>> code works only because 32-bit KVM VMX incorrectly disables interception
>>> of MSR_GS_BASE, and no one runs KVM on an actual 32-bit physical CPU,
>>> i.e. the MSR exists in hardware and so everything "works".
>>>
>>> 32-bit KVM SVM is not buggy and correctly injects #GP on the WRMSR, i.e.
>>> the tests have never worked on 32-bit SVM.
>>
>> Hmm, this breaks task switch.  But setting up separate descriptors is
>> not hard:
> 
> Much better.
> 
> Using %ebx crushes the mbi_bootinfo pointer.  The easiest fix is to use %edx or
> %ecx.
> 
>> +	mov (%ebx), %ebx
> 
> No need to load the address into a reg, just drop the "$" above and encode
> "mov [imm32], <reg>".

Yep, I had already fixed more or less the same things (plus the task 
gate TSS setup, which must not hardcode GS to 0x10; no idea how it 
worked before) before seeing your mail.  I sent the result to the 
mailing list.

Paolo
diff mbox series

Patch

diff --git a/x86/cstart.S b/x86/cstart.S
index 489c561..91970a2 100644
--- a/x86/cstart.S
+++ b/x86/cstart.S
@@ -89,13 +89,31 @@  mb_flags = 0x0
 	.long mb_magic, mb_flags, 0 - (mb_magic + mb_flags)
 mb_cmdline = 16
 
-MSR_GS_BASE = 0xc0000101
-
 .macro setup_percpu_area
 	lea -4096(%esp), %eax
-	mov $0, %edx
-	mov $MSR_GS_BASE, %ecx
-	wrmsr
+
+	mov %eax, %edx
+	shl $16, %edx
+	or  $0xffff, %edx
+	mov %edx, 0x10(%eax)
+
+	mov %eax, %edx
+	and $0xff000000, %edx
+	mov %eax, %ecx
+	shr $16, %ecx
+	and $0xff, %ecx
+	or  %ecx, %edx
+	or  $0x00cf9300, %edx
+	mov %edx, 0x14(%eax)
+
+	movw $0x17, 0(%eax)
+	mov %eax, 2(%eax)
+	lgdtl (%eax)
+
+	mov $0x10, %ax
+	mov %ax, %gs
+
+	lgdtl gdt32_descr
 .endm
 
 .macro setup_segments