diff mbox series

[4/5] KVM: vmx: implement MSR_IA32_TSX_CTRL disable RTM functionality

Message ID 1574101067-5638-5-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: vmx: implement MSR_IA32_TSX_CTRL for guests | expand

Commit Message

Paolo Bonzini Nov. 18, 2019, 6:17 p.m. UTC
The current guest mitigation of TAA is both too heavy and not really
sufficient.  It is too heavy because it will cause some affected CPUs
(those that have MDS_NO but lack TAA_NO) to fall back to VERW and
get the corresponding slowdown.  It is not really sufficient because
it will cause the MDS_NO bit to disappear upon microcode update, so
that VMs started before the microcode update will not be runnable
anymore afterwards, even with tsx=on.

Instead, if tsx=on on the host, we can emulate MSR_IA32_TSX_CTRL for
the guest and let it run without the VERW mitigation.  Even though
MSR_IA32_TSX_CTRL is quite heavyweight, and we do not want to write
it on every vmentry, we can use the shared MSR functionality because
the host kernel need not protect itself from TSX-based side-channels.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 34 +++++++++++++++++++++++++++++++---
 arch/x86/kvm/x86.c     | 23 +++++------------------
 2 files changed, 36 insertions(+), 21 deletions(-)

Comments

Jim Mattson Nov. 19, 2019, 9:06 p.m. UTC | #1
On Mon, Nov 18, 2019 at 10:17 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> The current guest mitigation of TAA is both too heavy and not really
> sufficient.  It is too heavy because it will cause some affected CPUs
> (those that have MDS_NO but lack TAA_NO) to fall back to VERW and
> get the corresponding slowdown.  It is not really sufficient because
> it will cause the MDS_NO bit to disappear upon microcode update, so
> that VMs started before the microcode update will not be runnable
> anymore afterwards, even with tsx=on.
>
> Instead, if tsx=on on the host, we can emulate MSR_IA32_TSX_CTRL for
> the guest and let it run without the VERW mitigation.  Even though
> MSR_IA32_TSX_CTRL is quite heavyweight, and we do not want to write
> it on every vmentry, we can use the shared MSR functionality because
> the host kernel need not protect itself from TSX-based side-channels.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 34 +++++++++++++++++++++++++++++++---
>  arch/x86/kvm/x86.c     | 23 +++++------------------
>  2 files changed, 36 insertions(+), 21 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 04a8212704c1..ed25fe7d5234 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -450,6 +450,7 @@ noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
>         MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
>  #endif
>         MSR_EFER, MSR_TSC_AUX, MSR_STAR,
> +       MSR_IA32_TSX_CTRL,
>  };
>
>  #if IS_ENABLED(CONFIG_HYPERV)
> @@ -1683,6 +1684,9 @@ static void setup_msrs(struct vcpu_vmx *vmx)
>         index = __find_msr_index(vmx, MSR_TSC_AUX);
>         if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
>                 move_msr_up(vmx, index, save_nmsrs++);
> +       index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL);
> +       if (index >= 0)
> +               move_msr_up(vmx, index, save_nmsrs++);
>
>         vmx->save_nmsrs = save_nmsrs;
>         vmx->guest_msrs_ready = false;
> @@ -1782,6 +1786,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  #endif
>         case MSR_EFER:
>                 return kvm_get_msr_common(vcpu, msr_info);
> +       case MSR_IA32_TSX_CTRL:
> +               if (!msr_info->host_initiated &&
> +                   !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
> +                       return 1;
> +               goto find_shared_msr;
>         case MSR_IA32_UMWAIT_CONTROL:
>                 if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
>                         return 1;
> @@ -1884,8 +1893,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                 if (!msr_info->host_initiated &&
>                     !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
>                         return 1;
> -               /* Else, falls through */
> +               goto find_shared_msr;
>         default:
> +       find_shared_msr:
>                 msr = find_msr_entry(vmx, msr_info->index);
>                 if (msr) {
>                         msr_info->data = msr->data;
> @@ -2001,6 +2011,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                                               MSR_IA32_SPEC_CTRL,
>                                               MSR_TYPE_RW);
>                 break;
> +       case MSR_IA32_TSX_CTRL:
> +               if (!msr_info->host_initiated &&
> +                   !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
> +                       return 1;
> +               if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
> +                       return 1;
> +               goto find_shared_msr;
>         case MSR_IA32_PRED_CMD:
>                 if (!msr_info->host_initiated &&
>                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
> @@ -2152,8 +2169,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                 /* Check reserved bit, higher 32 bits should be zero */
>                 if ((data >> 32) != 0)
>                         return 1;
> -               /* Else, falls through */
> +               goto find_shared_msr;
> +
>         default:
> +       find_shared_msr:
>                 msr = find_msr_entry(vmx, msr_index);
>                 if (msr) {
>                         u64 old_msr_data = msr->data;
> @@ -4234,7 +4253,16 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>                         continue;
>                 vmx->guest_msrs[j].index = i;
>                 vmx->guest_msrs[j].data = 0;
> -               vmx->guest_msrs[j].mask = -1ull;
> +
> +               switch (index) {
> +               case MSR_IA32_TSX_CTRL:
> +                       /* No need to pass TSX_CTRL_CPUID_CLEAR through.  */
> +                       vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
> +                       break;

Why even bother with the special case here? Does this make the wrmsr faster?

> +               default:
> +                       vmx->guest_msrs[j].mask = -1ull;
> +                       break;
> +               }
>                 ++vmx->nmsrs;
>         }
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 648e84e728fc..fc54e3905fe3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1314,29 +1314,16 @@ static u64 kvm_get_arch_capabilities(void)
>                 data |= ARCH_CAP_MDS_NO;
>
>         /*
> -        * On TAA affected systems, export MDS_NO=0 when:
> -        *      - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
> -        *      - Updated microcode is present. This is detected by
> -        *        the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
> -        *        that VERW clears CPU buffers.
> -        *
> -        * When MDS_NO=0 is exported, guests deploy clear CPU buffer
> -        * mitigation and don't complain:
> -        *
> -        *      "Vulnerable: Clear CPU buffers attempted, no microcode"
> -        *
> -        * If TSX is disabled on the system, guests are also mitigated against
> -        * TAA and clear CPU buffer mitigation is not required for guests.
> +        * On TAA affected systems:
> +        *      - nothing to do if TSX is disabled on the host.
> +        *      - we emulate TSX_CTRL if present on the host.
> +        *        This lets the guest use VERW to clear CPU buffers.
>          */
>         if (!boot_cpu_has(X86_FEATURE_RTM))
> -               data &= ~ARCH_CAP_TAA_NO;
> +               data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
>         else if (!boot_cpu_has_bug(X86_BUG_TAA))
>                 data |= ARCH_CAP_TAA_NO;
> -       else if (data & ARCH_CAP_TSX_CTRL_MSR)
> -               data &= ~ARCH_CAP_MDS_NO;
>
> -       /* KVM does not emulate MSR_IA32_TSX_CTRL.  */
> -       data &= ~ARCH_CAP_TSX_CTRL_MSR;
>         return data;
>  }
>  EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
> --
> 1.8.3.1
>
>
Paolo Bonzini Nov. 20, 2019, 12:21 p.m. UTC | #2
On 19/11/19 22:06, Jim Mattson wrote:
>> +               switch (index) {
>> +               case MSR_IA32_TSX_CTRL:
>> +                       /* No need to pass TSX_CTRL_CPUID_CLEAR through.  */
>> +                       vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
>> +                       break;
> Why even bother with the special case here? Does this make the wrmsr faster?
> 

No, but it can avoid the wrmsr altogether if the guest uses the same
DISABLE_RTM setting but a different value for CPUID_CLEAR.

More important, while I am confident re-enabling TSX while in the kernel
and only restoring MSR_IA32_TSX_CTRL on return to userspace, I'm more
wary of changing CPUID bits while the kernel is running.  I will update
the comment.

Paolo
Jim Mattson Dec. 4, 2019, 11:49 p.m. UTC | #3
On Mon, Nov 18, 2019 at 10:17 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> The current guest mitigation of TAA is both too heavy and not really
> sufficient.  It is too heavy because it will cause some affected CPUs
> (those that have MDS_NO but lack TAA_NO) to fall back to VERW and
> get the corresponding slowdown.  It is not really sufficient because
> it will cause the MDS_NO bit to disappear upon microcode update, so
> that VMs started before the microcode update will not be runnable
> anymore afterwards, even with tsx=on.
>
> Instead, if tsx=on on the host, we can emulate MSR_IA32_TSX_CTRL for
> the guest and let it run without the VERW mitigation.  Even though
> MSR_IA32_TSX_CTRL is quite heavyweight, and we do not want to write
> it on every vmentry, we can use the shared MSR functionality because
> the host kernel need not protect itself from TSX-based side-channels.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 34 +++++++++++++++++++++++++++++++---
>  arch/x86/kvm/x86.c     | 23 +++++------------------
>  2 files changed, 36 insertions(+), 21 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 04a8212704c1..ed25fe7d5234 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -450,6 +450,7 @@ noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
>         MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
>  #endif
>         MSR_EFER, MSR_TSC_AUX, MSR_STAR,
> +       MSR_IA32_TSX_CTRL,
>  };
>
>  #if IS_ENABLED(CONFIG_HYPERV)
> @@ -1683,6 +1684,9 @@ static void setup_msrs(struct vcpu_vmx *vmx)
>         index = __find_msr_index(vmx, MSR_TSC_AUX);
>         if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
>                 move_msr_up(vmx, index, save_nmsrs++);
> +       index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL);
> +       if (index >= 0)
> +               move_msr_up(vmx, index, save_nmsrs++);
>
>         vmx->save_nmsrs = save_nmsrs;
>         vmx->guest_msrs_ready = false;
> @@ -1782,6 +1786,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  #endif
>         case MSR_EFER:
>                 return kvm_get_msr_common(vcpu, msr_info);
> +       case MSR_IA32_TSX_CTRL:
> +               if (!msr_info->host_initiated &&
> +                   !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
> +                       return 1;
> +               goto find_shared_msr;
>         case MSR_IA32_UMWAIT_CONTROL:
>                 if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
>                         return 1;
> @@ -1884,8 +1893,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                 if (!msr_info->host_initiated &&
>                     !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
>                         return 1;
> -               /* Else, falls through */
> +               goto find_shared_msr;
>         default:
> +       find_shared_msr:
>                 msr = find_msr_entry(vmx, msr_info->index);
>                 if (msr) {
>                         msr_info->data = msr->data;
> @@ -2001,6 +2011,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                                               MSR_IA32_SPEC_CTRL,
>                                               MSR_TYPE_RW);
>                 break;
> +       case MSR_IA32_TSX_CTRL:
> +               if (!msr_info->host_initiated &&
> +                   !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
> +                       return 1;
> +               if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
> +                       return 1;
> +               goto find_shared_msr;
>         case MSR_IA32_PRED_CMD:
>                 if (!msr_info->host_initiated &&
>                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
> @@ -2152,8 +2169,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                 /* Check reserved bit, higher 32 bits should be zero */
>                 if ((data >> 32) != 0)
>                         return 1;
> -               /* Else, falls through */
> +               goto find_shared_msr;
> +
>         default:
> +       find_shared_msr:
>                 msr = find_msr_entry(vmx, msr_index);
>                 if (msr) {
>                         u64 old_msr_data = msr->data;
> @@ -4234,7 +4253,16 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>                         continue;
>                 vmx->guest_msrs[j].index = i;
>                 vmx->guest_msrs[j].data = 0;
> -               vmx->guest_msrs[j].mask = -1ull;
> +
> +               switch (index) {
> +               case MSR_IA32_TSX_CTRL:
> +                       /* No need to pass TSX_CTRL_CPUID_CLEAR through.  */
> +                       vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
> +                       break;
> +               default:
> +                       vmx->guest_msrs[j].mask = -1ull;
> +                       break;
> +               }
>                 ++vmx->nmsrs;
>         }
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 648e84e728fc..fc54e3905fe3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1314,29 +1314,16 @@ static u64 kvm_get_arch_capabilities(void)
>                 data |= ARCH_CAP_MDS_NO;
>
>         /*
> -        * On TAA affected systems, export MDS_NO=0 when:
> -        *      - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
> -        *      - Updated microcode is present. This is detected by
> -        *        the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
> -        *        that VERW clears CPU buffers.
> -        *
> -        * When MDS_NO=0 is exported, guests deploy clear CPU buffer
> -        * mitigation and don't complain:
> -        *
> -        *      "Vulnerable: Clear CPU buffers attempted, no microcode"
> -        *
> -        * If TSX is disabled on the system, guests are also mitigated against
> -        * TAA and clear CPU buffer mitigation is not required for guests.
> +        * On TAA affected systems:
> +        *      - nothing to do if TSX is disabled on the host.
> +        *      - we emulate TSX_CTRL if present on the host.
> +        *        This lets the guest use VERW to clear CPU buffers.
>          */
>         if (!boot_cpu_has(X86_FEATURE_RTM))
> -               data &= ~ARCH_CAP_TAA_NO;
> +               data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
>         else if (!boot_cpu_has_bug(X86_BUG_TAA))
>                 data |= ARCH_CAP_TAA_NO;
> -       else if (data & ARCH_CAP_TSX_CTRL_MSR)
> -               data &= ~ARCH_CAP_MDS_NO;
>
> -       /* KVM does not emulate MSR_IA32_TSX_CTRL.  */
> -       data &= ~ARCH_CAP_TSX_CTRL_MSR;

Shouldn't kvm be masking off any bits that it doesn't know about here?
Who knows what future features we may claim to support?

>         return data;
>  }
>  EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
> --
> 1.8.3.1
>
>
Paolo Bonzini Dec. 5, 2019, 10:16 a.m. UTC | #4
On 05/12/19 00:49, Jim Mattson wrote:
>>         if (!boot_cpu_has(X86_FEATURE_RTM))
>> -               data &= ~ARCH_CAP_TAA_NO;
>> +               data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
>>         else if (!boot_cpu_has_bug(X86_BUG_TAA))
>>                 data |= ARCH_CAP_TAA_NO;
>> -       else if (data & ARCH_CAP_TSX_CTRL_MSR)
>> -               data &= ~ARCH_CAP_MDS_NO;
>>
>> -       /* KVM does not emulate MSR_IA32_TSX_CTRL.  */
>> -       data &= ~ARCH_CAP_TSX_CTRL_MSR;
> Shouldn't kvm be masking off any bits that it doesn't know about here?
> Who knows what future features we may claim to support?

Good question, in the past the ARCH_CAPABILITIES were just "we don't
have this bug" so it made sense to pass everything through.  Now we have
TSX_CTRL that is of a different kind and arguably should have been a
CPUID bit, so we should indeed mask unknown capabilties.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 04a8212704c1..ed25fe7d5234 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -450,6 +450,7 @@  noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
 	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
 #endif
 	MSR_EFER, MSR_TSC_AUX, MSR_STAR,
+	MSR_IA32_TSX_CTRL,
 };
 
 #if IS_ENABLED(CONFIG_HYPERV)
@@ -1683,6 +1684,9 @@  static void setup_msrs(struct vcpu_vmx *vmx)
 	index = __find_msr_index(vmx, MSR_TSC_AUX);
 	if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
 		move_msr_up(vmx, index, save_nmsrs++);
+	index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL);
+	if (index >= 0)
+		move_msr_up(vmx, index, save_nmsrs++);
 
 	vmx->save_nmsrs = save_nmsrs;
 	vmx->guest_msrs_ready = false;
@@ -1782,6 +1786,11 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 #endif
 	case MSR_EFER:
 		return kvm_get_msr_common(vcpu, msr_info);
+	case MSR_IA32_TSX_CTRL:
+		if (!msr_info->host_initiated &&
+		    !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
+			return 1;
+		goto find_shared_msr;
 	case MSR_IA32_UMWAIT_CONTROL:
 		if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
 			return 1;
@@ -1884,8 +1893,9 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
 			return 1;
-		/* Else, falls through */
+		goto find_shared_msr;
 	default:
+	find_shared_msr:
 		msr = find_msr_entry(vmx, msr_info->index);
 		if (msr) {
 			msr_info->data = msr->data;
@@ -2001,6 +2011,13 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 					      MSR_IA32_SPEC_CTRL,
 					      MSR_TYPE_RW);
 		break;
+	case MSR_IA32_TSX_CTRL:
+		if (!msr_info->host_initiated &&
+		    !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
+			return 1;
+		if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
+			return 1;
+		goto find_shared_msr;
 	case MSR_IA32_PRED_CMD:
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -2152,8 +2169,10 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		/* Check reserved bit, higher 32 bits should be zero */
 		if ((data >> 32) != 0)
 			return 1;
-		/* Else, falls through */
+		goto find_shared_msr;
+
 	default:
+	find_shared_msr:
 		msr = find_msr_entry(vmx, msr_index);
 		if (msr) {
 			u64 old_msr_data = msr->data;
@@ -4234,7 +4253,16 @@  static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 			continue;
 		vmx->guest_msrs[j].index = i;
 		vmx->guest_msrs[j].data = 0;
-		vmx->guest_msrs[j].mask = -1ull;
+
+		switch (index) {
+		case MSR_IA32_TSX_CTRL:
+			/* No need to pass TSX_CTRL_CPUID_CLEAR through.  */
+			vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+			break;
+		default:
+			vmx->guest_msrs[j].mask = -1ull;
+			break;
+		}
 		++vmx->nmsrs;
 	}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 648e84e728fc..fc54e3905fe3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1314,29 +1314,16 @@  static u64 kvm_get_arch_capabilities(void)
 		data |= ARCH_CAP_MDS_NO;
 
 	/*
-	 * On TAA affected systems, export MDS_NO=0 when:
-	 *	- TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
-	 *	- Updated microcode is present. This is detected by
-	 *	  the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
-	 *	  that VERW clears CPU buffers.
-	 *
-	 * When MDS_NO=0 is exported, guests deploy clear CPU buffer
-	 * mitigation and don't complain:
-	 *
-	 *	"Vulnerable: Clear CPU buffers attempted, no microcode"
-	 *
-	 * If TSX is disabled on the system, guests are also mitigated against
-	 * TAA and clear CPU buffer mitigation is not required for guests.
+	 * On TAA affected systems:
+	 *      - nothing to do if TSX is disabled on the host.
+	 *      - we emulate TSX_CTRL if present on the host.
+	 *	  This lets the guest use VERW to clear CPU buffers.
 	 */
 	if (!boot_cpu_has(X86_FEATURE_RTM))
-		data &= ~ARCH_CAP_TAA_NO;
+		data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
 	else if (!boot_cpu_has_bug(X86_BUG_TAA))
 		data |= ARCH_CAP_TAA_NO;
-	else if (data & ARCH_CAP_TSX_CTRL_MSR)
-		data &= ~ARCH_CAP_MDS_NO;
 
-	/* KVM does not emulate MSR_IA32_TSX_CTRL.  */
-	data &= ~ARCH_CAP_TSX_CTRL_MSR;
 	return data;
 }
 EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);