diff mbox series

[v3,3/4] KVM: x86: Tie Intel and AMD behavior for MSR_TSC_AUX to guest CPU model

Message ID 20210423223404.3860547-4-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: MSR_TSC_AUX fixes and improvements | expand

Commit Message

Sean Christopherson April 23, 2021, 10:34 p.m. UTC
Squish the Intel and AMD emulation of MSR_TSC_AUX together and tie it to
the guest CPU model instead of the host CPU behavior.  While not strictly
necessary to avoid guest breakage, emulating cross-vendor "architecture"
will provide consistent behavior for the guest, e.g. WRMSR fault behavior
won't change if the vCPU is migrated to a host with divergent behavior.

Note, this also adds a kvm_cpu_has() check on RDTSCP for VMX.  For all
practical purposes, the extra check is a nop as VMX's use of user return
MSRs indirectly performs the same check by checking the result of WRMSR.
Technically RDTSCP support can exist in bare metal but not in the VMCS,
but no known Intel CPUs behave this way.  In practice, the only scenario
where adding the kvm_cpu_has() check isn't a nop is nested virtualization
scenario where L0 hides the VMCS control from L1 (KVM in this case), and
the L1 userspace VMM has decided to advertise RDTSCP _against_ the
recommendations of KVM_GET_SUPPORTED_CPUID.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/svm/svm.c | 22 +---------------------
 arch/x86/kvm/vmx/vmx.c | 13 -------------
 arch/x86/kvm/x86.c     | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 34 deletions(-)

Comments

Reiji Watanabe April 24, 2021, 7:19 a.m. UTC | #1
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1610,6 +1610,29 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
>                  * invokes 64-bit SYSENTER.
>                  */
>                 data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
> +               break;
> +       case MSR_TSC_AUX:
> +               if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
> +                       return 1;
> +
> +               if (!host_initiated &&
> +                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> +                       return 1;
> +
> +               /*
> +                * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
> +                * incomplete and conflicting architectural behavior.  Current
> +                * AMD CPUs completely ignore bits 63:32, i.e. they aren't
> +                * reserved and always read as zeros.  Enforce Intel's reserved
> +                * bits check if and only if the guest CPU is Intel, and clear
> +                * the bits in all other cases.  This ensures cross-vendor
> +                * migration will provide consistent behavior for the guest.
> +                */
> +               if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
> +                       return 1;
> +
> +               data = (u32)data;
> +               break;
>         }
>
>         msr.data = data;
> @@ -1646,6 +1669,17 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
>         if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
>                 return KVM_MSR_RET_FILTERED;
>
> +       switch (index) {
> +       case MSR_TSC_AUX:
> +               if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
> +                       return 1;
> +
> +               if (!host_initiated &&
> +                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> +                       return 1;


It looks Table 2-2 of the Intel SDM Vol4 (April 2021) says
TSC_AUX is supported:

   If CPUID.80000001H:EDX[27] = 1 or CPUID.(EAX=7,ECX=0):ECX[22] = 1

Should we also check X86_FEATURE_RDPID before returning 1
due to no RDTSCP support ?
There doesn't seem to be a similar description in the APM though.

Thanks,
Reiji
Sean Christopherson April 26, 2021, 7:38 p.m. UTC | #2
On Sat, Apr 24, 2021, Reiji Watanabe wrote:
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1610,6 +1610,29 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
> >                  * invokes 64-bit SYSENTER.
> >                  */
> >                 data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
> > +               break;
> > +       case MSR_TSC_AUX:
> > +               if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
> > +                       return 1;
> > +
> > +               if (!host_initiated &&
> > +                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> > +                       return 1;
> > +
> > +               /*
> > +                * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
> > +                * incomplete and conflicting architectural behavior.  Current
> > +                * AMD CPUs completely ignore bits 63:32, i.e. they aren't
> > +                * reserved and always read as zeros.  Enforce Intel's reserved
> > +                * bits check if and only if the guest CPU is Intel, and clear
> > +                * the bits in all other cases.  This ensures cross-vendor
> > +                * migration will provide consistent behavior for the guest.
> > +                */
> > +               if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
> > +                       return 1;
> > +
> > +               data = (u32)data;
> > +               break;
> >         }
> >
> >         msr.data = data;
> > @@ -1646,6 +1669,17 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
> >         if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
> >                 return KVM_MSR_RET_FILTERED;
> >
> > +       switch (index) {
> > +       case MSR_TSC_AUX:
> > +               if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
> > +                       return 1;
> > +
> > +               if (!host_initiated &&
> > +                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> > +                       return 1;
> 
> 
> It looks Table 2-2 of the Intel SDM Vol4 (April 2021) says
> TSC_AUX is supported:
> 
>    If CPUID.80000001H:EDX[27] = 1 or CPUID.(EAX=7,ECX=0):ECX[22] = 1
> 
> Should we also check X86_FEATURE_RDPID before returning 1
> due to no RDTSCP support ?

Yep.  VMX should also clear RDPID if the ENABLE_RDTSCP control isn't supported.
That bug isn't fatal because KVM emulates RDPID on #UD, but it would be a
notieable performance hit for the guest.

There is also a kernel bug lurking; vgetcpu_cpu_init() doesn't check
X86_FEATURE_RDPID and will fail to initialize MSR_TSC_AUX if RDPID is supported
but RDTSCP is not, and __getcpu() uses RDPID.  I'll verify that's broken and
send a patch for that one too.

> There doesn't seem to be a similar description in the APM though.

AMD also documents this in Appendix E:

  CPUID Fn0000_0007_EBX_x0 Structured Extended Feature Identifiers (ECX=0)
  Bits   Field   Name
  ...
  22     RDPID   RDPID instruction and TSC_AUX MSR support.
Reiji Watanabe April 27, 2021, 4:42 a.m. UTC | #3
> > It looks Table 2-2 of the Intel SDM Vol4 (April 2021) says
> > TSC_AUX is supported:
> >
> >    If CPUID.80000001H:EDX[27] = 1 or CPUID.(EAX=7,ECX=0):ECX[22] = 1
> >
> > Should we also check X86_FEATURE_RDPID before returning 1
> > due to no RDTSCP support ?
>
> Yep.  VMX should also clear RDPID if the ENABLE_RDTSCP control isn't supported.
> That bug isn't fatal because KVM emulates RDPID on #UD, but it would be a
> notieable performance hit for the guest.

Thank you so much for the confirmation and the information.
Understood.


> There is also a kernel bug lurking; vgetcpu_cpu_init() doesn't check
> X86_FEATURE_RDPID and will fail to initialize MSR_TSC_AUX if RDPID is supported
> but RDTSCP is not, and __getcpu() uses RDPID.  I'll verify that's broken and
> send a patch for that one too.

I don't find vgetcpu_cpu_init() or __getcpu() in
https://github.com/torvalds/linux.
I would assume you meant setup_getcpu() and vdso_read_cpunode() instead (?).


> AMD also documents this in Appendix E:
>
>   CPUID Fn0000_0007_EBX_x0 Structured Extended Feature Identifiers (ECX=0)
>   Bits   Field   Name
>   ...
>   22     RDPID   RDPID instruction and TSC_AUX MSR support.

Thank you.  I overlooked that...


Regards,
Reiji
Sean Christopherson April 27, 2021, 9:58 p.m. UTC | #4
On Mon, Apr 26, 2021, Reiji Watanabe wrote:
 
> > There is also a kernel bug lurking; vgetcpu_cpu_init() doesn't check
> > X86_FEATURE_RDPID and will fail to initialize MSR_TSC_AUX if RDPID is supported
> > but RDTSCP is not, and __getcpu() uses RDPID.  I'll verify that's broken and
> > send a patch for that one too.
> 
> I don't find vgetcpu_cpu_init() or __getcpu() in
> https://github.com/torvalds/linux.
> I would assume you meant setup_getcpu() and vdso_read_cpunode() instead (?).

Ya, I was looking at an old kernel when I typed that up.  Bug is still there
though :-)
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 71d704f8d569..4c7604fca009 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2672,11 +2672,6 @@  static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
 		break;
 	case MSR_TSC_AUX:
-		if (!boot_cpu_has(X86_FEATURE_RDTSCP))
-			return 1;
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
-			return 1;
 		msr_info->data = svm->tsc_aux;
 		break;
 	/*
@@ -2892,13 +2887,6 @@  static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
 		break;
 	case MSR_TSC_AUX:
-		if (!boot_cpu_has(X86_FEATURE_RDTSCP))
-			return 1;
-
-		if (!msr->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
-			return 1;
-
 		/*
 		 * This is rare, so we update the MSR here instead of using
 		 * direct_access_msrs.  Doing that would require a rdmsr in
@@ -2906,15 +2894,7 @@  static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		 */
 		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
 
-		/*
-		 * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
-		 * incomplete and conflicting architectural behavior.  Current
-		 * AMD CPUs completely ignore bits 63:32, i.e. they aren't
-		 * reserved and always read as zeros.  Emulate AMD CPU behavior
-		 * to avoid explosions if the vCPU is migrated from an AMD host
-		 * to an Intel host.
-		 */
-		svm->tsc_aux = (u32)data;
+		svm->tsc_aux = data;
 		break;
 	case MSR_IA32_DEBUGCTLMSR:
 		if (!boot_cpu_has(X86_FEATURE_LBRV)) {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6501d66167b8..d3fce53d89ac 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1992,11 +1992,6 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
 		break;
-	case MSR_TSC_AUX:
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
-			return 1;
-		goto find_uret_msr;
 	case MSR_IA32_DEBUGCTLMSR:
 		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
 		break;
@@ -2312,14 +2307,6 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			vmx->pt_desc.guest.addr_a[index / 2] = data;
 		break;
-	case MSR_TSC_AUX:
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
-			return 1;
-		/* Check reserved bit, higher 32 bits should be zero */
-		if ((data >> 32) != 0)
-			return 1;
-		goto find_uret_msr;
 	case MSR_IA32_PERF_CAPABILITIES:
 		if (data && !vcpu_to_pmu(vcpu)->version)
 			return 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f0d0b6e043ae..95da9b1cabdb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1610,6 +1610,29 @@  static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
 		 * invokes 64-bit SYSENTER.
 		 */
 		data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
+		break;
+	case MSR_TSC_AUX:
+		if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+			return 1;
+
+		if (!host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+			return 1;
+
+		/*
+		 * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
+		 * incomplete and conflicting architectural behavior.  Current
+		 * AMD CPUs completely ignore bits 63:32, i.e. they aren't
+		 * reserved and always read as zeros.  Enforce Intel's reserved
+		 * bits check if and only if the guest CPU is Intel, and clear
+		 * the bits in all other cases.  This ensures cross-vendor
+		 * migration will provide consistent behavior for the guest.
+		 */
+		if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
+			return 1;
+
+		data = (u32)data;
+		break;
 	}
 
 	msr.data = data;
@@ -1646,6 +1669,17 @@  int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
 	if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
 		return KVM_MSR_RET_FILTERED;
 
+	switch (index) {
+	case MSR_TSC_AUX:
+		if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+			return 1;
+
+		if (!host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+			return 1;
+		break;
+	}
+
 	msr.index = index;
 	msr.host_initiated = host_initiated;