diff mbox series

[v2,4/4] x86/spec-ctrl: Fix NMI race condition with VT-x MSR_SPEC_CTRL handling

Message ID 20220117183415.11150-5-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show
Series x86/spec-ctrl: Fix NMI race condition | expand

Commit Message

Andrew Cooper Jan. 17, 2022, 6:34 p.m. UTC
The logic was based on a mistaken understanding of how NMI blocking on vmexit
works.  NMIs are only blocked for EXIT_REASON_NMI, and not for general exits.
Therefore, an NMI can in general hit early in the vmx_asm_vmexit_handler path,
and the guest's value will be clobbered before it is saved.

Switch to using MSR load/save lists.  This causes the guest value to be saved
atomically with respect to NMIs/MCEs/etc.

First, update vmx_cpuid_policy_changed() to configure the load/save lists at
the same time as configuring the intercepts.  This function is always used in
remote context, so extend the vmx_vmcs_{enter,exit}() block to cover the whole
function, rather than having multiple remote acquisitions of the same VMCS.

Both of vmx_{add,del}_guest_msr() can fail.  The -ESRCH delete case is fine,
but all others are fatal to the running of the VM, so handle them using
domain_crash() - this path is only used during domain construction anyway.

Second, update vmx_{get,set}_reg() to use the MSR load/save lists rather than
vcpu_msrs, and update the vcpu_msrs comment to describe the new state
location.

Finally, adjust the entry/exit asm.

Because the guest value is saved and loaded atomically, we do not need to
manually load the guest value, nor do we need to enable SCF_use_shadow.  This
lets us remove the use of DO_SPEC_CTRL_EXIT_TO_GUEST.  Additionally,
SPEC_CTRL_ENTRY_FROM_PV gets removed too, because on an early entry failure,
we're no longer in the guest MSR_SPEC_CTRL context needing to switch back to
Xen's context.

The only action remaining is to load Xen's MSR_SPEC_CTRL value on vmexit.  We
could in principle use the host msr list, but is expected to complicated
future work.  Delete DO_SPEC_CTRL_ENTRY_FROM_HVM entirely, and use a shorter
code sequence to simply reload Xen's setting from the top-of-stack block.

Adjust the comment at the top of spec_ctrl_asm.h in light of this bugfix.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Jun Nakajima <jun.nakajima@intel.com>
CC: Kevin Tian <kevin.tian@intel.com>

Needs backporting as far as people can tolerate.

If the entry/exit logic were in C, I'd ASSERT() that shadow tracking is off,
but this is awkard to arrange in asm.

v2:
 * Rework on top of {get,set}_reg() infrastructure.
 * Future-proof against other vmx_del_guest_msr() failures.
 * Rewrite the commit message to explain things better.
---
 xen/arch/x86/hvm/vmx/entry.S             | 21 +++++++++------
 xen/arch/x86/hvm/vmx/vmx.c               | 44 +++++++++++++++++++++++++++++---
 xen/arch/x86/include/asm/msr.h           | 10 +++++++-
 xen/arch/x86/include/asm/spec_ctrl_asm.h | 32 +++--------------------
 4 files changed, 67 insertions(+), 40 deletions(-)

Comments

Jan Beulich Jan. 19, 2022, 1:42 p.m. UTC | #1
On 17.01.2022 19:34, Andrew Cooper wrote:
> --- a/xen/arch/x86/hvm/vmx/entry.S
> +++ b/xen/arch/x86/hvm/vmx/entry.S
> @@ -35,7 +35,14 @@ ENTRY(vmx_asm_vmexit_handler)
>  
>          /* SPEC_CTRL_ENTRY_FROM_VMX    Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
>          ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
> -        ALTERNATIVE "", DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR_HVM
> +
> +        .macro restore_spec_ctrl
> +            mov    $MSR_SPEC_CTRL, %ecx
> +            movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax
> +            xor    %edx, %edx
> +            wrmsr
> +        .endm
> +        ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM
>          /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
>  
>          /* Hardware clears MSR_DEBUGCTL on VMExit.  Reinstate it if debugging Xen. */
> @@ -82,8 +89,7 @@ UNLIKELY_END(realmode)
>          mov VCPUMSR_spec_ctrl_raw(%rax), %eax
>  
>          /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
> -        /* SPEC_CTRL_EXIT_TO_VMX   Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
> -        ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM
> +        /* SPEC_CTRL_EXIT_TO_VMX   Req: %rsp=regs/cpuinfo              Clob:    */
>          ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), X86_FEATURE_SC_VERW_HVM

I notice you did update this clobber remark, but what about the one further
up in context?

> --- a/xen/arch/x86/include/asm/msr.h
> +++ b/xen/arch/x86/include/asm/msr.h
> @@ -287,7 +287,15 @@ extern struct msr_policy     raw_msr_policy,
>  /* Container object for per-vCPU MSRs */
>  struct vcpu_msrs
>  {
> -    /* 0x00000048 - MSR_SPEC_CTRL */
> +    /*
> +     * 0x00000048 - MSR_SPEC_CTRL
> +     *
> +     * For PV guests, this holds the guest kernel value.  It is accessed on
> +     * every entry/exit path.
> +     *
> +     * For VT-x guests, the guest value is held in the MSR guest load/save
> +     * list.
> +     */
>      struct {
>          uint32_t raw;
>      } spec_ctrl;

To stand a chance of noticing bad use of this field for VT-x guests, would
it make sense to store some sentinel value into this field for all involved
vCPU-s?

Jan
Andrew Cooper Jan. 19, 2022, 5 p.m. UTC | #2
On 19/01/2022 13:42, Jan Beulich wrote:
> On 17.01.2022 19:34, Andrew Cooper wrote:
>> --- a/xen/arch/x86/hvm/vmx/entry.S
>> +++ b/xen/arch/x86/hvm/vmx/entry.S
>> @@ -35,7 +35,14 @@ ENTRY(vmx_asm_vmexit_handler)
>>  
>>          /* SPEC_CTRL_ENTRY_FROM_VMX    Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
>>          ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
>> -        ALTERNATIVE "", DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR_HVM
>> +
>> +        .macro restore_spec_ctrl
>> +            mov    $MSR_SPEC_CTRL, %ecx
>> +            movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax
>> +            xor    %edx, %edx
>> +            wrmsr
>> +        .endm
>> +        ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM
>>          /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
>>  
>>          /* Hardware clears MSR_DEBUGCTL on VMExit.  Reinstate it if debugging Xen. */
>> @@ -82,8 +89,7 @@ UNLIKELY_END(realmode)
>>          mov VCPUMSR_spec_ctrl_raw(%rax), %eax
>>  
>>          /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
>> -        /* SPEC_CTRL_EXIT_TO_VMX   Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
>> -        ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM
>> +        /* SPEC_CTRL_EXIT_TO_VMX   Req: %rsp=regs/cpuinfo              Clob:    */
>>          ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), X86_FEATURE_SC_VERW_HVM
> I notice you did update this clobber remark, but what about the one further
> up in context?

What about it?  It still clobbers %eax, %ecx and %edx.

>> --- a/xen/arch/x86/include/asm/msr.h
>> +++ b/xen/arch/x86/include/asm/msr.h
>> @@ -287,7 +287,15 @@ extern struct msr_policy     raw_msr_policy,
>>  /* Container object for per-vCPU MSRs */
>>  struct vcpu_msrs
>>  {
>> -    /* 0x00000048 - MSR_SPEC_CTRL */
>> +    /*
>> +     * 0x00000048 - MSR_SPEC_CTRL
>> +     *
>> +     * For PV guests, this holds the guest kernel value.  It is accessed on
>> +     * every entry/exit path.
>> +     *
>> +     * For VT-x guests, the guest value is held in the MSR guest load/save
>> +     * list.
>> +     */
>>      struct {
>>          uint32_t raw;
>>      } spec_ctrl;
> To stand a chance of noticing bad use of this field for VT-x guests, would
> it make sense to store some sentinel value into this field for all involved
> vCPU-s?

The usage is going to get more complicated before we're done.  I'd like
to wait until the churn reduces before applying invariants like that.

~Andrew
Jan Beulich Jan. 20, 2022, 8:14 a.m. UTC | #3
On 19.01.2022 18:00, Andrew Cooper wrote:
> On 19/01/2022 13:42, Jan Beulich wrote:
>> On 17.01.2022 19:34, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/hvm/vmx/entry.S
>>> +++ b/xen/arch/x86/hvm/vmx/entry.S
>>> @@ -35,7 +35,14 @@ ENTRY(vmx_asm_vmexit_handler)
>>>  
>>>          /* SPEC_CTRL_ENTRY_FROM_VMX    Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
>>>          ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
>>> -        ALTERNATIVE "", DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR_HVM
>>> +
>>> +        .macro restore_spec_ctrl
>>> +            mov    $MSR_SPEC_CTRL, %ecx
>>> +            movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax
>>> +            xor    %edx, %edx
>>> +            wrmsr
>>> +        .endm
>>> +        ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM
>>>          /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
>>>  
>>>          /* Hardware clears MSR_DEBUGCTL on VMExit.  Reinstate it if debugging Xen. */
>>> @@ -82,8 +89,7 @@ UNLIKELY_END(realmode)
>>>          mov VCPUMSR_spec_ctrl_raw(%rax), %eax
>>>  
>>>          /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
>>> -        /* SPEC_CTRL_EXIT_TO_VMX   Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
>>> -        ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM
>>> +        /* SPEC_CTRL_EXIT_TO_VMX   Req: %rsp=regs/cpuinfo              Clob:    */
>>>          ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), X86_FEATURE_SC_VERW_HVM
>> I notice you did update this clobber remark, but what about the one further
>> up in context?
> 
> What about it?  It still clobbers %eax, %ecx and %edx.

Oh, sorry - I did look at DO_OVERWRITE_RSB only, not paying attention
to the now open-coded 2nd part, which - due to the blank line - doesn't
appear connected to the comment anymore.

Jan
Andrew Cooper Jan. 20, 2022, 11:55 a.m. UTC | #4
On 20/01/2022 08:14, Jan Beulich wrote:
> On 19.01.2022 18:00, Andrew Cooper wrote:
>> On 19/01/2022 13:42, Jan Beulich wrote:
>>> On 17.01.2022 19:34, Andrew Cooper wrote:
>>>> --- a/xen/arch/x86/hvm/vmx/entry.S
>>>> +++ b/xen/arch/x86/hvm/vmx/entry.S
>>>> @@ -35,7 +35,14 @@ ENTRY(vmx_asm_vmexit_handler)
>>>>  
>>>>          /* SPEC_CTRL_ENTRY_FROM_VMX    Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
>>>>          ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
>>>> -        ALTERNATIVE "", DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR_HVM
>>>> +
>>>> +        .macro restore_spec_ctrl
>>>> +            mov    $MSR_SPEC_CTRL, %ecx
>>>> +            movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax
>>>> +            xor    %edx, %edx
>>>> +            wrmsr
>>>> +        .endm
>>>> +        ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM
>>>>          /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
>>>>  
>>>>          /* Hardware clears MSR_DEBUGCTL on VMExit.  Reinstate it if debugging Xen. */
>>>> @@ -82,8 +89,7 @@ UNLIKELY_END(realmode)
>>>>          mov VCPUMSR_spec_ctrl_raw(%rax), %eax
>>>>  
>>>>          /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
>>>> -        /* SPEC_CTRL_EXIT_TO_VMX   Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
>>>> -        ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM
>>>> +        /* SPEC_CTRL_EXIT_TO_VMX   Req: %rsp=regs/cpuinfo              Clob:    */
>>>>          ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), X86_FEATURE_SC_VERW_HVM
>>> I notice you did update this clobber remark, but what about the one further
>>> up in context?
>> What about it?  It still clobbers %eax, %ecx and %edx.
> Oh, sorry - I did look at DO_OVERWRITE_RSB only, not paying attention
> to the now open-coded 2nd part, which - due to the blank line - doesn't
> appear connected to the comment anymore.

Yeah - it's a little harder now that it isn't a single line.  The
req/clob information really is most useful at the start of the block,
because that's where it is important to get the context correct.

Can I take this as an R-by then?

~Andrew
Jan Beulich Jan. 20, 2022, 12:11 p.m. UTC | #5
On 20.01.2022 12:55, Andrew Cooper wrote:
> On 20/01/2022 08:14, Jan Beulich wrote:
>> On 19.01.2022 18:00, Andrew Cooper wrote:
>>> On 19/01/2022 13:42, Jan Beulich wrote:
>>>> On 17.01.2022 19:34, Andrew Cooper wrote:
>>>>> --- a/xen/arch/x86/hvm/vmx/entry.S
>>>>> +++ b/xen/arch/x86/hvm/vmx/entry.S
>>>>> @@ -35,7 +35,14 @@ ENTRY(vmx_asm_vmexit_handler)
>>>>>  
>>>>>          /* SPEC_CTRL_ENTRY_FROM_VMX    Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
>>>>>          ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
>>>>> -        ALTERNATIVE "", DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR_HVM
>>>>> +
>>>>> +        .macro restore_spec_ctrl
>>>>> +            mov    $MSR_SPEC_CTRL, %ecx
>>>>> +            movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax
>>>>> +            xor    %edx, %edx
>>>>> +            wrmsr
>>>>> +        .endm
>>>>> +        ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM
>>>>>          /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
>>>>>  
>>>>>          /* Hardware clears MSR_DEBUGCTL on VMExit.  Reinstate it if debugging Xen. */
>>>>> @@ -82,8 +89,7 @@ UNLIKELY_END(realmode)
>>>>>          mov VCPUMSR_spec_ctrl_raw(%rax), %eax
>>>>>  
>>>>>          /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
>>>>> -        /* SPEC_CTRL_EXIT_TO_VMX   Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
>>>>> -        ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM
>>>>> +        /* SPEC_CTRL_EXIT_TO_VMX   Req: %rsp=regs/cpuinfo              Clob:    */
>>>>>          ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), X86_FEATURE_SC_VERW_HVM
>>>> I notice you did update this clobber remark, but what about the one further
>>>> up in context?
>>> What about it?  It still clobbers %eax, %ecx and %edx.
>> Oh, sorry - I did look at DO_OVERWRITE_RSB only, not paying attention
>> to the now open-coded 2nd part, which - due to the blank line - doesn't
>> appear connected to the comment anymore.
> 
> Yeah - it's a little harder now that it isn't a single line.  The
> req/clob information really is most useful at the start of the block,
> because that's where it is important to get the context correct.
> 
> Can I take this as an R-by then?

Please do; I should have said so earlier on.

Jan
Jan Beulich Jan. 21, 2022, 7:56 a.m. UTC | #6
On 17.01.2022 19:34, Andrew Cooper wrote:
> The logic was based on a mistaken understanding of how NMI blocking on vmexit
> works.  NMIs are only blocked for EXIT_REASON_NMI, and not for general exits.
> Therefore, an NMI can in general hit early in the vmx_asm_vmexit_handler path,
> and the guest's value will be clobbered before it is saved.
> 
> Switch to using MSR load/save lists.  This causes the guest value to be saved
> atomically with respect to NMIs/MCEs/etc.
> 
> First, update vmx_cpuid_policy_changed() to configure the load/save lists at
> the same time as configuring the intercepts.  This function is always used in
> remote context, so extend the vmx_vmcs_{enter,exit}() block to cover the whole
> function, rather than having multiple remote acquisitions of the same VMCS.
> 
> Both of vmx_{add,del}_guest_msr() can fail.  The -ESRCH delete case is fine,
> but all others are fatal to the running of the VM, so handle them using
> domain_crash() - this path is only used during domain construction anyway.
> 
> Second, update vmx_{get,set}_reg() to use the MSR load/save lists rather than
> vcpu_msrs, and update the vcpu_msrs comment to describe the new state
> location.
> 
> Finally, adjust the entry/exit asm.
> 
> Because the guest value is saved and loaded atomically, we do not need to
> manually load the guest value, nor do we need to enable SCF_use_shadow.  This
> lets us remove the use of DO_SPEC_CTRL_EXIT_TO_GUEST.  Additionally,
> SPEC_CTRL_ENTRY_FROM_PV gets removed too, because on an early entry failure,
> we're no longer in the guest MSR_SPEC_CTRL context needing to switch back to
> Xen's context.
> 
> The only action remaining is to load Xen's MSR_SPEC_CTRL value on vmexit.  We
> could in principle use the host msr list, but is expected to complicated
> future work.  Delete DO_SPEC_CTRL_ENTRY_FROM_HVM entirely, and use a shorter
> code sequence to simply reload Xen's setting from the top-of-stack block.
> 
> Adjust the comment at the top of spec_ctrl_asm.h in light of this bugfix.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> CC: Wei Liu <wl@xen.org>
> CC: Jun Nakajima <jun.nakajima@intel.com>
> CC: Kevin Tian <kevin.tian@intel.com>
> 
> Needs backporting as far as people can tolerate.

Besides the earlier patches in this series, are there any other prereqs
that you're aware of and which aren't there yet in the stable trees?

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
index 30139ae58e9d..ce7b48558ee1 100644
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -35,7 +35,14 @@  ENTRY(vmx_asm_vmexit_handler)
 
         /* SPEC_CTRL_ENTRY_FROM_VMX    Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
         ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
-        ALTERNATIVE "", DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR_HVM
+
+        .macro restore_spec_ctrl
+            mov    $MSR_SPEC_CTRL, %ecx
+            movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax
+            xor    %edx, %edx
+            wrmsr
+        .endm
+        ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         /* Hardware clears MSR_DEBUGCTL on VMExit.  Reinstate it if debugging Xen. */
@@ -82,8 +89,7 @@  UNLIKELY_END(realmode)
         mov VCPUMSR_spec_ctrl_raw(%rax), %eax
 
         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
-        /* SPEC_CTRL_EXIT_TO_VMX   Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
-        ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM
+        /* SPEC_CTRL_EXIT_TO_VMX   Req: %rsp=regs/cpuinfo              Clob:    */
         ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), X86_FEATURE_SC_VERW_HVM
 
         mov  VCPU_hvm_guest_cr2(%rbx),%rax
@@ -119,12 +125,11 @@  UNLIKELY_END(realmode)
         SAVE_ALL
 
         /*
-         * PV variant needed here as no guest code has executed (so
-         * MSR_SPEC_CTRL can't have changed value), and NMIs/MCEs are liable
-         * to hit (in which case the HVM variant might corrupt things).
+         * SPEC_CTRL_ENTRY notes
+         *
+         * If we end up here, no guest code has executed.  We still have Xen's
+         * choice of MSR_SPEC_CTRL in context, and the RSB is safe.
          */
-        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo Clob: acd */
-        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         call vmx_vmentry_failure
         jmp  .Lvmx_process_softirqs
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c32967f190ff..69e38d0fa8f9 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -592,6 +592,7 @@  void vmx_update_exception_bitmap(struct vcpu *v)
 static void vmx_cpuid_policy_changed(struct vcpu *v)
 {
     const struct cpuid_policy *cp = v->domain->arch.cpuid;
+    int rc = 0;
 
     if ( opt_hvm_fep ||
          (v->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor) )
@@ -601,17 +602,29 @@  static void vmx_cpuid_policy_changed(struct vcpu *v)
 
     vmx_vmcs_enter(v);
     vmx_update_exception_bitmap(v);
-    vmx_vmcs_exit(v);
 
     /*
      * We can safely pass MSR_SPEC_CTRL through to the guest, even if STIBP
      * isn't enumerated in hardware, as SPEC_CTRL_STIBP is ignored.
      */
     if ( cp->feat.ibrsb )
+    {
         vmx_clear_msr_intercept(v, MSR_SPEC_CTRL, VMX_MSR_RW);
+
+        rc = vmx_add_guest_msr(v, MSR_SPEC_CTRL, 0);
+        if ( rc )
+            goto out;
+    }
     else
+    {
         vmx_set_msr_intercept(v, MSR_SPEC_CTRL, VMX_MSR_RW);
 
+        rc = vmx_del_msr(v, MSR_SPEC_CTRL, VMX_MSR_GUEST);
+        if ( rc && rc != -ESRCH )
+            goto out;
+        rc = 0; /* Tolerate -ESRCH */
+    }
+
     /* MSR_PRED_CMD is safe to pass through if the guest knows about it. */
     if ( cp->feat.ibrsb || cp->extd.ibpb )
         vmx_clear_msr_intercept(v, MSR_PRED_CMD,  VMX_MSR_RW);
@@ -623,6 +636,15 @@  static void vmx_cpuid_policy_changed(struct vcpu *v)
         vmx_clear_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW);
     else
         vmx_set_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW);
+
+ out:
+    vmx_vmcs_exit(v);
+
+    if ( rc )
+    {
+        printk(XENLOG_G_ERR "%pv MSR list error: %d", v, rc);
+        domain_crash(v->domain);
+    }
 }
 
 int vmx_guest_x86_mode(struct vcpu *v)
@@ -2407,11 +2429,20 @@  static int vmtrace_reset(struct vcpu *v)
 static uint64_t vmx_get_reg(struct vcpu *v, unsigned int reg)
 {
     struct domain *d = v->domain;
+    uint64_t val = 0;
+    int rc;
 
     switch ( reg )
     {
     case MSR_SPEC_CTRL:
-        return v->arch.msrs->spec_ctrl.raw;
+        rc = vmx_read_guest_msr(v, reg, &val);
+        if ( rc )
+        {
+            printk(XENLOG_G_ERR "%s(%pv, 0x%08x) MSR list error: %d\n",
+                   __func__, v, reg, rc);
+            domain_crash(d);
+        }
+        return val;
 
     default:
         printk(XENLOG_G_ERR "%s(%pv, 0x%08x) Bad register\n",
@@ -2424,11 +2455,18 @@  static uint64_t vmx_get_reg(struct vcpu *v, unsigned int reg)
 static void vmx_set_reg(struct vcpu *v, unsigned int reg, uint64_t val)
 {
     struct domain *d = v->domain;
+    int rc;
 
     switch ( reg )
     {
     case MSR_SPEC_CTRL:
-        v->arch.msrs->spec_ctrl.raw = val;
+        rc = vmx_write_guest_msr(v, reg, val);
+        if ( rc )
+        {
+            printk(XENLOG_G_ERR "%s(%pv, 0x%08x) MSR list error: %d\n",
+                   __func__, v, reg, rc);
+            domain_crash(d);
+        }
         break;
 
     default:
diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h
index 1d3eca9063a2..10039c2d227b 100644
--- a/xen/arch/x86/include/asm/msr.h
+++ b/xen/arch/x86/include/asm/msr.h
@@ -287,7 +287,15 @@  extern struct msr_policy     raw_msr_policy,
 /* Container object for per-vCPU MSRs */
 struct vcpu_msrs
 {
-    /* 0x00000048 - MSR_SPEC_CTRL */
+    /*
+     * 0x00000048 - MSR_SPEC_CTRL
+     *
+     * For PV guests, this holds the guest kernel value.  It is accessed on
+     * every entry/exit path.
+     *
+     * For VT-x guests, the guest value is held in the MSR guest load/save
+     * list.
+     */
     struct {
         uint32_t raw;
     } spec_ctrl;
diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
index 2b3f123cb501..bf82528a12ae 100644
--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
@@ -42,9 +42,10 @@ 
  *     path, or late in the exit path after restoring the guest value.  This
  *     will corrupt the guest value.
  *
- * Factor 1 is dealt with by relying on NMIs/MCEs being blocked immediately
- * after VMEXIT.  The VMEXIT-specific code reads MSR_SPEC_CTRL and updates
- * current before loading Xen's MSR_SPEC_CTRL setting.
+ * Factor 1 is dealt with:
+ *   - On VMX by using MSR load/save lists to have vmentry/exit atomically
+ *     load/save the guest value.  Xen's value is loaded in regular code, and
+ *     there is no need to use the shadow logic (below).
  *
  * Factor 2 is harder.  We maintain a shadow_spec_ctrl value, and a use_shadow
  * boolean in the per cpu spec_ctrl_flags.  The synchronous use is:
@@ -128,31 +129,6 @@ 
 #endif
 .endm
 
-.macro DO_SPEC_CTRL_ENTRY_FROM_HVM
-/*
- * Requires %rbx=current, %rsp=regs/cpuinfo
- * Clobbers %rax, %rcx, %rdx
- *
- * The common case is that a guest has direct access to MSR_SPEC_CTRL, at
- * which point we need to save the guest value before setting IBRS for Xen.
- * Unilaterally saving the guest value is shorter and faster than checking.
- */
-    mov $MSR_SPEC_CTRL, %ecx
-    rdmsr
-
-    /* Stash the value from hardware. */
-    mov VCPU_arch_msrs(%rbx), %rdx
-    mov %eax, VCPUMSR_spec_ctrl_raw(%rdx)
-    xor %edx, %edx
-
-    /* Clear SPEC_CTRL shadowing *before* loading Xen's value. */
-    andb $~SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp)
-
-    /* Load Xen's intended value. */
-    movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax
-    wrmsr
-.endm
-
 .macro DO_SPEC_CTRL_ENTRY maybexen:req
 /*
  * Requires %rsp=regs (also cpuinfo if !maybexen)