diff mbox series

[v2,7/9] x86/svm: VMEntry/Exit logic for MSR_SPEC_CTRL

Message ID 20220128132927.14997-8-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show
Series x86: MSR_SPEC_CTRL support for SVM guests | expand

Commit Message

Andrew Cooper Jan. 28, 2022, 1:29 p.m. UTC
Hardware maintains both host and guest versions of MSR_SPEC_CTRL, but guests
run with the logical OR of both values.  Therefore, in principle we want to
clear Xen's value before entering the guest.  However, for migration
compatibility, and for performance reasons with SEV-SNP guests, we want the
ability to use a nonzero value behind the guest's back.  Use vcpu_msrs to hold
this value, with the guest value in the VMCB.

On the VMEntry path, adjusting MSR_SPEC_CTRL must be done after CLGI so as to
be atomic with respect to NMIs/etc.  The loading of spec_ctrl_raw into %eax
was also stale from the unused old code, so can be dropped too.

Implement both pieces of logic as small pieces of C, and alternative the call
to get there based on X86_FEATURE_SC_MSR_HVM.  The use of double alternative
blocks is due to a quirk of the current infrastructure, where call
displacements only get fixed up for the first replacement instruction.  While
adjusting the clobber lists, drop the stale requirements on the VMExit side.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>

The RAS[:32] flushing side effect is under reconsideration.  It is actually a
very awkward side effect in practice, and not applicable to any
implementations (that I'm aware of), but for now, it's the documented safe
action to take.  Furthermore, it avoids complicating the logic with an lfence
in the else case for Spectre v1 safety.

v2:
 * Split last_spec_ctrl introduction into earlier patch.
 * Use STR() rather than __stringify() for brevity.
 * Use double alt blocks in order to pass function parameters.
---
 xen/arch/x86/hvm/svm/entry.S             | 12 +++++++-----
 xen/arch/x86/hvm/svm/svm.c               | 27 +++++++++++++++++++++++++++
 xen/arch/x86/include/asm/msr.h           |  9 +++++++++
 xen/arch/x86/include/asm/spec_ctrl_asm.h |  3 +++
 4 files changed, 46 insertions(+), 5 deletions(-)

Comments

Jan Beulich Jan. 31, 2022, 10:33 a.m. UTC | #1
On 28.01.2022 14:29, Andrew Cooper wrote:
> Hardware maintains both host and guest versions of MSR_SPEC_CTRL, but guests
> run with the logical OR of both values.  Therefore, in principle we want to
> clear Xen's value before entering the guest.  However, for migration
> compatibility, and for performance reasons with SEV-SNP guests, we want the
> ability to use a nonzero value behind the guest's back.  Use vcpu_msrs to hold
> this value, with the guest value in the VMCB.
> 
> On the VMEntry path, adjusting MSR_SPEC_CTRL must be done after CLGI so as to
> be atomic with respect to NMIs/etc.  The loading of spec_ctrl_raw into %eax
> was also stale from the unused old code, so can be dropped too.
> 
> Implement both pieces of logic as small pieces of C, and alternative the call
> to get there based on X86_FEATURE_SC_MSR_HVM.  The use of double alternative
> blocks is due to a quirk of the current infrastructure, where call
> displacements only get fixed up for the first replacement instruction.  While
> adjusting the clobber lists, drop the stale requirements on the VMExit side.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Again technically:
Reviewed-by: Jan Beulich <jbeulich@suse.com>
But ...

> --- a/xen/arch/x86/hvm/svm/entry.S
> +++ b/xen/arch/x86/hvm/svm/entry.S
> @@ -55,11 +55,12 @@ __UNLIKELY_END(nsvm_hap)
>          mov  %rsp, %rdi
>          call svm_vmenter_helper
>  
> -        mov VCPU_arch_msrs(%rbx), %rax
> -        mov VCPUMSR_spec_ctrl_raw(%rax), %eax
> +        clgi
>  
>          /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
> -        /* SPEC_CTRL_EXIT_TO_SVM   (nothing currently) */
> +        /* SPEC_CTRL_EXIT_TO_SVM       Req:                           Clob: C   */
> +        ALTERNATIVE "", STR(mov %rbx, %rdi; mov %rsp, %rsi), X86_FEATURE_SC_MSR_HVM
> +        ALTERNATIVE "", STR(call vmentry_spec_ctrl), X86_FEATURE_SC_MSR_HVM

Is there a reason to use a macro for converting to a string here at
all? There are no "inner" macros here which might need expanding. And
"brevity" (as you have in the rev log) would call for

        ALTERNATIVE "", "mov %rbx, %rdi; mov %rsp, %rsi", X86_FEATURE_SC_MSR_HVM
        ALTERNATIVE "", "call vmentry_spec_ctrl", X86_FEATURE_SC_MSR_HVM

.

> @@ -86,8 +86,10 @@ __UNLIKELY_END(nsvm_hap)
>  
>          GET_CURRENT(bx)
>  
> -        /* SPEC_CTRL_ENTRY_FROM_SVM    Req: b=curr %rsp=regs/cpuinfo, Clob: ac  */
> +        /* SPEC_CTRL_ENTRY_FROM_SVM    Req:                           Clob: C   */
>          ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
> +        ALTERNATIVE "", STR(mov %rsp, %rdi), X86_FEATURE_SC_MSR_HVM
> +        ALTERNATIVE "", STR(call vmexit_spec_ctrl), X86_FEATURE_SC_MSR_HVM

Same here then, obviously.

> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -3086,6 +3086,33 @@ void svm_vmexit_handler(struct cpu_user_regs *regs)
>      vmcb_set_vintr(vmcb, intr);
>  }
>  
> +/* Called with GIF=0. */
> +void vmexit_spec_ctrl(struct cpu_info *info)
> +{
> +    unsigned int val = info->xen_spec_ctrl;
> +
> +    /*
> +     * Write to MSR_SPEC_CTRL unconditionally, for the RAS[:32] flushing side
> +     * effect.
> +     */
> +    wrmsr(MSR_SPEC_CTRL, val, 0);
> +    info->last_spec_ctrl = val;
> +}
> +
> +/* Called with GIF=0. */
> +void vmentry_spec_ctrl(const struct vcpu *curr, struct cpu_info *info)
> +{
> +    unsigned int val = curr->arch.msrs->spec_ctrl.raw;
> +
> +    if ( val != info->last_spec_ctrl )
> +    {
> +        wrmsr(MSR_SPEC_CTRL, val, 0);
> +        info->last_spec_ctrl = val;
> +    }
> +
> +    /* No Spectre v1 concerns.  Execution is going to hit VMRUN imminently. */
> +}

These living in SVM code I think their names want to avoid suggesting
they could also be used for VMX (irrespective of us not meaning to use
them there). Or else they want to move to common code, with comments
slightly adjusted.

Jan
Andrew Cooper Jan. 31, 2022, 11:47 a.m. UTC | #2
On 31/01/2022 10:33, Jan Beulich wrote:
> On 28.01.2022 14:29, Andrew Cooper wrote:
>> Hardware maintains both host and guest versions of MSR_SPEC_CTRL, but guests
>> run with the logical OR of both values.  Therefore, in principle we want to
>> clear Xen's value before entering the guest.  However, for migration
>> compatibility, and for performance reasons with SEV-SNP guests, we want the
>> ability to use a nonzero value behind the guest's back.  Use vcpu_msrs to hold
>> this value, with the guest value in the VMCB.
>>
>> On the VMEntry path, adjusting MSR_SPEC_CTRL must be done after CLGI so as to
>> be atomic with respect to NMIs/etc.  The loading of spec_ctrl_raw into %eax
>> was also stale from the unused old code, so can be dropped too.
>>
>> Implement both pieces of logic as small pieces of C, and alternative the call
>> to get there based on X86_FEATURE_SC_MSR_HVM.  The use of double alternative
>> blocks is due to a quirk of the current infrastructure, where call
>> displacements only get fixed up for the first replacement instruction.  While
>> adjusting the clobber lists, drop the stale requirements on the VMExit side.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Again technically:
> Reviewed-by: Jan Beulich <jbeulich@suse.com>

Thanks.

> But ...
>
>> --- a/xen/arch/x86/hvm/svm/entry.S
>> +++ b/xen/arch/x86/hvm/svm/entry.S
>> @@ -55,11 +55,12 @@ __UNLIKELY_END(nsvm_hap)
>>          mov  %rsp, %rdi
>>          call svm_vmenter_helper
>>  
>> -        mov VCPU_arch_msrs(%rbx), %rax
>> -        mov VCPUMSR_spec_ctrl_raw(%rax), %eax
>> +        clgi
>>  
>>          /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
>> -        /* SPEC_CTRL_EXIT_TO_SVM   (nothing currently) */
>> +        /* SPEC_CTRL_EXIT_TO_SVM       Req:                           Clob: C   */
>> +        ALTERNATIVE "", STR(mov %rbx, %rdi; mov %rsp, %rsi), X86_FEATURE_SC_MSR_HVM
>> +        ALTERNATIVE "", STR(call vmentry_spec_ctrl), X86_FEATURE_SC_MSR_HVM
> Is there a reason to use a macro for converting to a string here at
> all? There are no "inner" macros here which might need expanding. And
> "brevity" (as you have in the rev log) would call for
>
>         ALTERNATIVE "", "mov %rbx, %rdi; mov %rsp, %rsi", X86_FEATURE_SC_MSR_HVM
>         ALTERNATIVE "", "call vmentry_spec_ctrl", X86_FEATURE_SC_MSR_HVM

Good point.  I'll switch to plain strings.

>
>
>> @@ -86,8 +86,10 @@ __UNLIKELY_END(nsvm_hap)
>>  
>>          GET_CURRENT(bx)
>>  
>> -        /* SPEC_CTRL_ENTRY_FROM_SVM    Req: b=curr %rsp=regs/cpuinfo, Clob: ac  */
>> +        /* SPEC_CTRL_ENTRY_FROM_SVM    Req:                           Clob: C   */
>>          ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
>> +        ALTERNATIVE "", STR(mov %rsp, %rdi), X86_FEATURE_SC_MSR_HVM
>> +        ALTERNATIVE "", STR(call vmexit_spec_ctrl), X86_FEATURE_SC_MSR_HVM
> Same here then, obviously.
>
>> --- a/xen/arch/x86/hvm/svm/svm.c
>> +++ b/xen/arch/x86/hvm/svm/svm.c
>> @@ -3086,6 +3086,33 @@ void svm_vmexit_handler(struct cpu_user_regs *regs)
>>      vmcb_set_vintr(vmcb, intr);
>>  }
>>  
>> +/* Called with GIF=0. */
>> +void vmexit_spec_ctrl(struct cpu_info *info)
>> +{
>> +    unsigned int val = info->xen_spec_ctrl;
>> +
>> +    /*
>> +     * Write to MSR_SPEC_CTRL unconditionally, for the RAS[:32] flushing side
>> +     * effect.
>> +     */
>> +    wrmsr(MSR_SPEC_CTRL, val, 0);
>> +    info->last_spec_ctrl = val;
>> +}
>> +
>> +/* Called with GIF=0. */
>> +void vmentry_spec_ctrl(const struct vcpu *curr, struct cpu_info *info)
>> +{
>> +    unsigned int val = curr->arch.msrs->spec_ctrl.raw;
>> +
>> +    if ( val != info->last_spec_ctrl )
>> +    {
>> +        wrmsr(MSR_SPEC_CTRL, val, 0);
>> +        info->last_spec_ctrl = val;
>> +    }
>> +
>> +    /* No Spectre v1 concerns.  Execution is going to hit VMRUN imminently. */
>> +}
> These living in SVM code I think their names want to avoid suggesting
> they could also be used for VMX (irrespective of us not meaning to use
> them there). Or else they want to move to common code, with comments
> slightly adjusted.

I'll add svm_ prefixes.  I can't see these being useful elsewhere.

~Andrew
Jan Beulich Jan. 31, 2022, 12:55 p.m. UTC | #3
On 28.01.2022 14:29, Andrew Cooper wrote:
> --- a/xen/arch/x86/hvm/svm/entry.S
> +++ b/xen/arch/x86/hvm/svm/entry.S
> @@ -55,11 +55,12 @@ __UNLIKELY_END(nsvm_hap)
>          mov  %rsp, %rdi
>          call svm_vmenter_helper
>  
> -        mov VCPU_arch_msrs(%rbx), %rax
> -        mov VCPUMSR_spec_ctrl_raw(%rax), %eax
> +        clgi
>  
>          /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
> -        /* SPEC_CTRL_EXIT_TO_SVM   (nothing currently) */
> +        /* SPEC_CTRL_EXIT_TO_SVM       Req:                           Clob: C   */
> +        ALTERNATIVE "", STR(mov %rbx, %rdi; mov %rsp, %rsi), X86_FEATURE_SC_MSR_HVM
> +        ALTERNATIVE "", STR(call vmentry_spec_ctrl), X86_FEATURE_SC_MSR_HVM

Both this and ...

> @@ -86,8 +86,10 @@ __UNLIKELY_END(nsvm_hap)
>  
>          GET_CURRENT(bx)
>  
> -        /* SPEC_CTRL_ENTRY_FROM_SVM    Req: b=curr %rsp=regs/cpuinfo, Clob: ac  */
> +        /* SPEC_CTRL_ENTRY_FROM_SVM    Req:                           Clob: C   */
>          ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
> +        ALTERNATIVE "", STR(mov %rsp, %rdi), X86_FEATURE_SC_MSR_HVM
> +        ALTERNATIVE "", STR(call vmexit_spec_ctrl), X86_FEATURE_SC_MSR_HVM
>          /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */

... this now effectively violate what the warning comment says, as there
is a RET involved in the C call. If this is not a problem for some reason,
I'd like to ask that the comments be updated accordingly.

Jan
Andrew Cooper Jan. 31, 2022, 2:04 p.m. UTC | #4
On 31/01/2022 12:55, Jan Beulich wrote:
> On 28.01.2022 14:29, Andrew Cooper wrote:
>> --- a/xen/arch/x86/hvm/svm/entry.S
>> +++ b/xen/arch/x86/hvm/svm/entry.S
>> @@ -55,11 +55,12 @@ __UNLIKELY_END(nsvm_hap)
>>          mov  %rsp, %rdi
>>          call svm_vmenter_helper
>>  
>> -        mov VCPU_arch_msrs(%rbx), %rax
>> -        mov VCPUMSR_spec_ctrl_raw(%rax), %eax
>> +        clgi
>>  
>>          /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
>> -        /* SPEC_CTRL_EXIT_TO_SVM   (nothing currently) */
>> +        /* SPEC_CTRL_EXIT_TO_SVM       Req:                           Clob: C   */
>> +        ALTERNATIVE "", STR(mov %rbx, %rdi; mov %rsp, %rsi), X86_FEATURE_SC_MSR_HVM
>> +        ALTERNATIVE "", STR(call vmentry_spec_ctrl), X86_FEATURE_SC_MSR_HVM
> Both this and ...
>
>> @@ -86,8 +86,10 @@ __UNLIKELY_END(nsvm_hap)
>>  
>>          GET_CURRENT(bx)
>>  
>> -        /* SPEC_CTRL_ENTRY_FROM_SVM    Req: b=curr %rsp=regs/cpuinfo, Clob: ac  */
>> +        /* SPEC_CTRL_ENTRY_FROM_SVM    Req:                           Clob: C   */
>>          ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
>> +        ALTERNATIVE "", STR(mov %rsp, %rdi), X86_FEATURE_SC_MSR_HVM
>> +        ALTERNATIVE "", STR(call vmexit_spec_ctrl), X86_FEATURE_SC_MSR_HVM
>>          /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
> ... this now effectively violate what the warning comment says, as there
> is a RET involved in the C call. If this is not a problem for some reason,
> I'd like to ask that the comments be updated accordingly.

The `ret` note pertains to two things:
1) RSB underflows falling back to indirect predictions
2) SpectreRSB executing more rets than calls

Aspect 2 is largely theoretical, but can happen with an out of bounds
write which hits the return address on the stack in an otherwise regular
call tree.

Once DO_OVERWRITE_RSB is complete, there are no user RSB entries to
consume.  I know this gets complicated with the RAS[:32] flushing which
is part of why the behaviour is up for consideration, but even the
current code completes the full flush before a ret is executed.

Aspect 1 is a feature seemingly unique to Intel CPUs, and we have to set
MSR_SPEC_CTRL.IBRS to 1 before indirect predictions are "safe".


That said, I stand by the comments as they are.  They're there for other
code to remember to be careful.  I think it is entirely reasonable to
expect the internals of the speculative safety logic to know how to stay
safe.

I'll see how it looks with the helpers inlined.  That's the easiest way
of fixing this issue.

~Andrew
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
index 276215d36aff..190f7095c65c 100644
--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -55,11 +55,12 @@  __UNLIKELY_END(nsvm_hap)
         mov  %rsp, %rdi
         call svm_vmenter_helper
 
-        mov VCPU_arch_msrs(%rbx), %rax
-        mov VCPUMSR_spec_ctrl_raw(%rax), %eax
+        clgi
 
         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
-        /* SPEC_CTRL_EXIT_TO_SVM   (nothing currently) */
+        /* SPEC_CTRL_EXIT_TO_SVM       Req:                           Clob: C   */
+        ALTERNATIVE "", STR(mov %rbx, %rdi; mov %rsp, %rsi), X86_FEATURE_SC_MSR_HVM
+        ALTERNATIVE "", STR(call vmentry_spec_ctrl), X86_FEATURE_SC_MSR_HVM
 
         pop  %r15
         pop  %r14
@@ -78,7 +79,6 @@  __UNLIKELY_END(nsvm_hap)
         pop  %rsi
         pop  %rdi
 
-        clgi
         sti
         vmrun
 
@@ -86,8 +86,10 @@  __UNLIKELY_END(nsvm_hap)
 
         GET_CURRENT(bx)
 
-        /* SPEC_CTRL_ENTRY_FROM_SVM    Req: b=curr %rsp=regs/cpuinfo, Clob: ac  */
+        /* SPEC_CTRL_ENTRY_FROM_SVM    Req:                           Clob: C   */
         ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
+        ALTERNATIVE "", STR(mov %rsp, %rdi), X86_FEATURE_SC_MSR_HVM
+        ALTERNATIVE "", STR(call vmexit_spec_ctrl), X86_FEATURE_SC_MSR_HVM
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         stgi
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index bb6b8e560a9f..f753bf48c252 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -3086,6 +3086,33 @@  void svm_vmexit_handler(struct cpu_user_regs *regs)
     vmcb_set_vintr(vmcb, intr);
 }
 
+/* Called with GIF=0. */
+void vmexit_spec_ctrl(struct cpu_info *info)
+{
+    unsigned int val = info->xen_spec_ctrl;
+
+    /*
+     * Write to MSR_SPEC_CTRL unconditionally, for the RAS[:32] flushing side
+     * effect.
+     */
+    wrmsr(MSR_SPEC_CTRL, val, 0);
+    info->last_spec_ctrl = val;
+}
+
+/* Called with GIF=0. */
+void vmentry_spec_ctrl(const struct vcpu *curr, struct cpu_info *info)
+{
+    unsigned int val = curr->arch.msrs->spec_ctrl.raw;
+
+    if ( val != info->last_spec_ctrl )
+    {
+        wrmsr(MSR_SPEC_CTRL, val, 0);
+        info->last_spec_ctrl = val;
+    }
+
+    /* No Spectre v1 concerns.  Execution is going to hit VMRUN imminently. */
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h
index 657a3295613d..ce4fe51afe54 100644
--- a/xen/arch/x86/include/asm/msr.h
+++ b/xen/arch/x86/include/asm/msr.h
@@ -297,6 +297,15 @@  struct vcpu_msrs
      *
      * For VT-x guests, the guest value is held in the MSR guest load/save
      * list.
+     *
+     * For SVM, the guest value lives in the VMCB, and hardware saves/restores
+     * the host value automatically.  However, guests run with the OR of the
+     * host and guest value, which allows Xen to set protections behind the
+     * guest's back.
+     *
+     * We must clear/restore Xen's value before/after VMRUN to avoid unduly
+     * influencing the guest.  In order to support "behind the guest's back"
+     * protections, we load this value (commonly 0) before VMRUN.
      */
     struct {
         uint32_t raw;
diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
index 9c0c7622c41f..02b3b18ce69f 100644
--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
@@ -46,6 +46,9 @@ 
  *   - On VMX by using MSR load/save lists to have vmentry/exit atomically
  *     load/save the guest value.  Xen's value is loaded in regular code, and
  *     there is no need to use the shadow logic (below).
+ *   - On SVM by altering MSR_SPEC_CTRL inside the CLGI/STGI region.  This
+ *     makes the changes atomic with respect to NMIs/etc, so no need for
+ *     shadowing logic.
  *
  * Factor 2 is harder.  We maintain a shadow_spec_ctrl value, and a use_shadow
  * boolean in the per cpu spec_ctrl_flags.  The synchronous use is: