diff mbox series

[v5,04/19] KVM:x86: Refresh CPUID on write to guest MSR_IA32_XSS

Message ID 20230803042732.88515-5-weijiang.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series Enable CET Virtualization | expand

Commit Message

Yang, Weijiang Aug. 3, 2023, 4:27 a.m. UTC
Update CPUID(EAX=0DH,ECX=1) when the guest's XSS is modified.
CPUID(EAX=0DH,ECX=1).EBX reports required storage size of
all enabled xstate features in XCR0 | XSS. Guest can allocate
sufficient xsave buffer based on the info.

Note, KVM does not yet support any XSS based features, i.e.
supported_xss is guaranteed to be zero at this time.

Co-developed-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
Signed-off-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/cpuid.c            | 20 ++++++++++++++++++--
 arch/x86/kvm/x86.c              |  8 +++++---
 3 files changed, 24 insertions(+), 5 deletions(-)

Comments

Sean Christopherson Aug. 4, 2023, 4:02 p.m. UTC | #1
On Thu, Aug 03, 2023, Yang Weijiang wrote:
> Update CPUID(EAX=0DH,ECX=1) when the guest's XSS is modified.
> CPUID(EAX=0DH,ECX=1).EBX reports required storage size of
> all enabled xstate features in XCR0 | XSS. Guest can allocate
> sufficient xsave buffer based on the info.

Please wrap changelogs closer to ~75 chars.  I'm pretty sure this isn't the first
time I've made this request...

> Note, KVM does not yet support any XSS based features, i.e.
> supported_xss is guaranteed to be zero at this time.
> 
> Co-developed-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
> Signed-off-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/cpuid.c            | 20 ++++++++++++++++++--
>  arch/x86/kvm/x86.c              |  8 +++++---
>  3 files changed, 24 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 28bd38303d70..20bbcd95511f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -804,6 +804,7 @@ struct kvm_vcpu_arch {
>  
>  	u64 xcr0;
>  	u64 guest_supported_xcr0;
> +	u64 guest_supported_xss;
>  
>  	struct kvm_pio_request pio;
>  	void *pio_data;
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 7f4d13383cf2..0338316b827c 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -249,6 +249,17 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
>  	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
>  }
>  
> +static u64 cpuid_get_supported_xss(struct kvm_cpuid_entry2 *entries, int nent)
> +{
> +	struct kvm_cpuid_entry2 *best;
> +
> +	best = cpuid_entry2_find(entries, nent, 0xd, 1);
> +	if (!best)
> +		return 0;
> +
> +	return (best->ecx | ((u64)best->edx << 32)) & kvm_caps.supported_xss;
> +}
> +
>  static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
>  				       int nent)
>  {
> @@ -276,8 +287,11 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
>  
>  	best = cpuid_entry2_find(entries, nent, 0xD, 1);
>  	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
> -		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
> -		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
> +		     cpuid_entry_has(best, X86_FEATURE_XSAVEC))) {
> +		u64 xstate = vcpu->arch.xcr0 | vcpu->arch.ia32_xss;

Nit, the variable should be xfeatures, not xstate.  Though I vote to avoid the
variable entirely,

	best = cpuid_entry2_find(entries, nent, 0xD, 1);
	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
		best->ebx = xstate_required_size(vcpu->arch.xcr0 |
						 vcpu->arch.ia32_xss, true);

though it's only a slight preference, i.e. feel free to keep your approach if
you or others feel strongly about the style.

> +	}
>  
>  	best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
>  	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
> @@ -325,6 +339,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>  
>  	vcpu->arch.guest_supported_xcr0 =
>  		cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
> +	vcpu->arch.guest_supported_xss =
> +		cpuid_get_supported_xss(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);

Blech.  I tried to clean up this ugly, but Paolo disagreed[*].  Can you fold in
the below (compile tested only) patch at the very beginning of this series?  It
implements my suggested alternative.  And then this would become:

static u64 vcpu_get_supported_xss(struct kvm_vcpu *vcpu)
{
	struct kvm_cpuid_entry2 *best;

	best = kvm_find_cpuid_entry_index(vcpu, 0xd, 1);
	if (!best)
		return 0;

	return (best->ecx | ((u64)best->edx << 32)) & kvm_caps.supported_xss;
}

[*] https://lore.kernel.org/all/ZGfius5UkckpUyXl@google.com

>  	/*
>  	 * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0b9033551d8c..5d6d6fa33e5b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3780,10 +3780,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		 * IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than
>  		 * XSAVES/XRSTORS to save/restore PT MSRs.
>  		 */
> -		if (data & ~kvm_caps.supported_xss)
> +		if (data & ~vcpu->arch.guest_supported_xss)
>  			return 1;
> -		vcpu->arch.ia32_xss = data;
> -		kvm_update_cpuid_runtime(vcpu);
> +		if (vcpu->arch.ia32_xss != data) {
> +			vcpu->arch.ia32_xss = data;
> +			kvm_update_cpuid_runtime(vcpu);
> +		}

Nit, I prefer this style:

		if (vcpu->arch.ia32_xss == data)
			break;

		vcpu->arch.ia32_xss = data;
		kvm_update_cpuid_runtime(vcpu);

so that the common path isn't buried in an if-statement.

>  		break;
>  	case MSR_SMI_COUNT:
>  		if (!msr_info->host_initiated)
> -- 


From: Sean Christopherson <seanjc@google.com>
Date: Fri, 4 Aug 2023 08:48:03 -0700
Subject: [PATCH] KVM: x86: Rework cpuid_get_supported_xcr0() to operate on
 vCPU data

Rework and rename cpuid_get_supported_xcr0() to explicitly operate on vCPU
state, i.e. on a vCPU's CPUID state.  Prior to commit 275a87244ec8 ("KVM:
x86: Don't adjust guest's CPUID.0x12.1 (allowed SGX enclave XFRM)"), KVM
incorrectly fudged guest CPUID at runtime, which in turn necessitated
massaging the incoming CPUID state for KVM_SET_CPUID{2} so as not to run
afoul of kvm_cpuid_check_equal().

Opportunistically move the helper below kvm_update_cpuid_runtime() to make
it harder to repeat the mistake of querying supported XCR0 for runtime
updates.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/cpuid.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7f4d13383cf2..5e42846c948a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -234,21 +234,6 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
 		vcpu->arch.pv_cpuid.features = best->eax;
 }
 
-/*
- * Calculate guest's supported XCR0 taking into account guest CPUID data and
- * KVM's supported XCR0 (comprised of host's XCR0 and KVM_SUPPORTED_XCR0).
- */
-static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = cpuid_entry2_find(entries, nent, 0xd, 0);
-	if (!best)
-		return 0;
-
-	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
-}
-
 static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
 				       int nent)
 {
@@ -299,6 +284,21 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
 
+/*
+ * Calculate guest's supported XCR0 taking into account guest CPUID data and
+ * KVM's supported XCR0 (comprised of host's XCR0 and KVM_SUPPORTED_XCR0).
+ */
+static u64 vcpu_get_supported_xcr0(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry_index(vcpu, 0xd, 0);
+	if (!best)
+		return 0;
+
+	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
+}
+
 static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
 {
 	struct kvm_cpuid_entry2 *entry;
@@ -323,8 +323,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 		kvm_apic_set_version(vcpu);
 	}
 
-	vcpu->arch.guest_supported_xcr0 =
-		cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+	vcpu->arch.guest_supported_xcr0 = vcpu_get_supported_xcr0(vcpu);
 
 	/*
 	 * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if

base-commit: f0147fcfab840fe9a3f03e9645d25c1326373fe6
--
Sean Christopherson Aug. 4, 2023, 6:27 p.m. UTC | #2
On Thu, Aug 03, 2023, Yang Weijiang wrote:
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0b9033551d8c..5d6d6fa33e5b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3780,10 +3780,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		 * IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than
>  		 * XSAVES/XRSTORS to save/restore PT MSRs.
>  		 */
> -		if (data & ~kvm_caps.supported_xss)
> +		if (data & ~vcpu->arch.guest_supported_xss)

Hmm, this is arguably wrong for userspace-initiated writes, as it would prevent
userspace from restoring MSRs before CPUID.

And it would make the handling of MSR_IA32_XSS writes inconsistent just within
this case statement.  The initial "can this MSR be written at all" check would
*not* honor guest CPUID for host writes, but then the per-bit check *would* honor
guest CPUID for host writes.

But if we exempt host writes, then we'll end up with another mess, as exempting
host writes for MSR_KVM_GUEST_SSP would let the guest coerce KVM into writing an
illegal value by modifying SMRAM while in SMM.

Blech.

If we can get away with it, i.e. not break userspace, I think my preference is
to enforce guest CPUID for host accesses to XSS, XFD, XFD_ERR, etc.  I'm 99%
certain we can make that change, because there are many, many MSRs that do NOT
exempt host writes, i.e. the only way this would be a breaking change is if
userspace is writing things like XSS before KVM_SET_CPUID2, but other MSRs after
KVM_SET_CPUID2.

I'm pretty sure I've advocated for the exact opposite in the past, i.e. argued
that KVM's ABI is to not enforce ordering between KVM_SET_CPUID2 and KVM_SET_MSR.
But this is becoming untenable, juggling the dependencies in KVM is complex and
is going to result in a nasty bug at some point.

For this series, lets just tighten the rules for XSS, i.e. drop the host_initated
exemption.  And in a parallel/separate series, try to do a wholesale cleanup of
all the cases that essentially allow userspace to do KVM_SET_MSR before KVM_SET_CPUID2.
Paolo Bonzini Aug. 4, 2023, 9:43 p.m. UTC | #3
On 8/4/23 18:02, Sean Christopherson wrote:
>> Update CPUID(EAX=0DH,ECX=1) when the guest's XSS is modified.
>> CPUID(EAX=0DH,ECX=1).EBX reports required storage size of
>> all enabled xstate features in XCR0 | XSS. Guest can allocate
>> sufficient xsave buffer based on the info.
>
> Please wrap changelogs closer to ~75 chars.  I'm pretty sure this isn't the first
> time I've made this request...

I suspect this is because of the long "word" CPUID(EAX=0DH,ECX=1).EBX. 
It would make the lengths less homogeneous if lines 1 stayed the same 
but lines 2-4 became longer.

Paolo
Paolo Bonzini Aug. 7, 2023, 6:55 a.m. UTC | #4
On 8/4/23 20:27, Sean Christopherson wrote:
> I think my preference is to enforce guest CPUID for host accesses to 
> XSS, XFD, XFD_ERR, etc I'm pretty sure I've advocated for the exact 
> opposite in the past, i.e. argued that KVM's ABI is to not enforce 
> ordering between KVM_SET_CPUID2 and KVM_SET_MSR. But this is becoming
> untenable, juggling the dependencies in KVM is complex and is going
> to result in a nasty bug at some point.

Fortunately, you are right now.  Well, almost :) but the important part 
is that indeed the dependencies are too complex.

While host-side accesses must be allowed, they should only allow the 
default value if the CPUID bit is not set.

Paolo
Yang, Weijiang Aug. 8, 2023, 2:20 p.m. UTC | #5
On 8/5/2023 12:02 AM, Sean Christopherson wrote:
> On Thu, Aug 03, 2023, Yang Weijiang wrote:
>> Update CPUID(EAX=0DH,ECX=1) when the guest's XSS is modified.
>> CPUID(EAX=0DH,ECX=1).EBX reports required storage size of
>> all enabled xstate features in XCR0 | XSS. Guest can allocate
>> sufficient xsave buffer based on the info.
> Please wrap changelogs closer to ~75 chars.  I'm pretty sure this isn't the first
> time I've made this request...
Thanks, will keep the changelog to 70~75  chars.
>> Note, KVM does not yet support any XSS based features, i.e.
>> supported_xss is guaranteed to be zero at this time.
>>
>> Co-developed-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
>> Signed-off-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
>> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
>> ---
>>   arch/x86/include/asm/kvm_host.h |  1 +
>>   arch/x86/kvm/cpuid.c            | 20 ++++++++++++++++++--
>>   arch/x86/kvm/x86.c              |  8 +++++---
>>   3 files changed, 24 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 28bd38303d70..20bbcd95511f 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -804,6 +804,7 @@ struct kvm_vcpu_arch {
>>   
>>   	u64 xcr0;
>>   	u64 guest_supported_xcr0;
>> +	u64 guest_supported_xss;
>>   
>>   	struct kvm_pio_request pio;
>>   	void *pio_data;
>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> index 7f4d13383cf2..0338316b827c 100644
>> --- a/arch/x86/kvm/cpuid.c
>> +++ b/arch/x86/kvm/cpuid.c
>> @@ -249,6 +249,17 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
>>   	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
>>   }
>>   
>> +static u64 cpuid_get_supported_xss(struct kvm_cpuid_entry2 *entries, int nent)
>> +{
>> +	struct kvm_cpuid_entry2 *best;
>> +
>> +	best = cpuid_entry2_find(entries, nent, 0xd, 1);
>> +	if (!best)
>> +		return 0;
>> +
>> +	return (best->ecx | ((u64)best->edx << 32)) & kvm_caps.supported_xss;
>> +}
>> +
>>   static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
>>   				       int nent)
>>   {
>> @@ -276,8 +287,11 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
>>   
>>   	best = cpuid_entry2_find(entries, nent, 0xD, 1);
>>   	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
>> -		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
>> -		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
>> +		     cpuid_entry_has(best, X86_FEATURE_XSAVEC))) {
>> +		u64 xstate = vcpu->arch.xcr0 | vcpu->arch.ia32_xss;
> Nit, the variable should be xfeatures, not xstate.  Though I vote to avoid the
> variable entirely,
>
> 	best = cpuid_entry2_find(entries, nent, 0xD, 1);
> 	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
> 		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
> 		best->ebx = xstate_required_size(vcpu->arch.xcr0 |
> 						 vcpu->arch.ia32_xss, true);
>
> though it's only a slight preference, i.e. feel free to keep your approach if
> you or others feel strongly about the style.
Yes, the variable is not necessary, will remove it.
>> +	}
>>   
>>   	best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
>>   	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
>> @@ -325,6 +339,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>>   
>>   	vcpu->arch.guest_supported_xcr0 =
>>   		cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
>> +	vcpu->arch.guest_supported_xss =
>> +		cpuid_get_supported_xss(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
> Blech.  I tried to clean up this ugly, but Paolo disagreed[*].  Can you fold in
> the below (compile tested only) patch at the very beginning of this series?  It
> implements my suggested alternative.  And then this would become:
>
> static u64 vcpu_get_supported_xss(struct kvm_vcpu *vcpu)
> {
> 	struct kvm_cpuid_entry2 *best;
>
> 	best = kvm_find_cpuid_entry_index(vcpu, 0xd, 1);
> 	if (!best)
> 		return 0;
>
> 	return (best->ecx | ((u64)best->edx << 32)) & kvm_caps.supported_xss;
> }
>
> [*] https://lore.kernel.org/all/ZGfius5UkckpUyXl@google.com
Sure, will take it into my series, thanks!
>>   	/*
>>   	 * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 0b9033551d8c..5d6d6fa33e5b 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -3780,10 +3780,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   		 * IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than
>>   		 * XSAVES/XRSTORS to save/restore PT MSRs.
>>   		 */
>> -		if (data & ~kvm_caps.supported_xss)
>> +		if (data & ~vcpu->arch.guest_supported_xss)
>>   			return 1;
>> -		vcpu->arch.ia32_xss = data;
>> -		kvm_update_cpuid_runtime(vcpu);
>> +		if (vcpu->arch.ia32_xss != data) {
>> +			vcpu->arch.ia32_xss = data;
>> +			kvm_update_cpuid_runtime(vcpu);
>> +		}
> Nit, I prefer this style:
>
> 		if (vcpu->arch.ia32_xss == data)
> 			break;
>
> 		vcpu->arch.ia32_xss = data;
> 		kvm_update_cpuid_runtime(vcpu);
>
> so that the common path isn't buried in an if-statement.
Yeah, I feel I'm a bit awkward to make code look nicer :-)
>>   		break;
>>   	case MSR_SMI_COUNT:
>>   		if (!msr_info->host_initiated)
>> -- 
>
> From: Sean Christopherson <seanjc@google.com>
> Date: Fri, 4 Aug 2023 08:48:03 -0700
> Subject: [PATCH] KVM: x86: Rework cpuid_get_supported_xcr0() to operate on
>   vCPU data
>
> Rework and rename cpuid_get_supported_xcr0() to explicitly operate on vCPU
> state, i.e. on a vCPU's CPUID state.  Prior to commit 275a87244ec8 ("KVM:
> x86: Don't adjust guest's CPUID.0x12.1 (allowed SGX enclave XFRM)"), KVM
> incorrectly fudged guest CPUID at runtime, which in turn necessitated
> massaging the incoming CPUID state for KVM_SET_CPUID{2} so as not to run
> afoul of kvm_cpuid_check_equal().
>
> Opportunistically move the helper below kvm_update_cpuid_runtime() to make
> it harder to repeat the mistake of querying supported XCR0 for runtime
> updates.
>
> No functional change intended.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>   arch/x86/kvm/cpuid.c | 33 ++++++++++++++++-----------------
>   1 file changed, 16 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 7f4d13383cf2..5e42846c948a 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -234,21 +234,6 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
>   		vcpu->arch.pv_cpuid.features = best->eax;
>   }
>   
> -/*
> - * Calculate guest's supported XCR0 taking into account guest CPUID data and
> - * KVM's supported XCR0 (comprised of host's XCR0 and KVM_SUPPORTED_XCR0).
> - */
> -static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
> -{
> -	struct kvm_cpuid_entry2 *best;
> -
> -	best = cpuid_entry2_find(entries, nent, 0xd, 0);
> -	if (!best)
> -		return 0;
> -
> -	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
> -}
> -
>   static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
>   				       int nent)
>   {
> @@ -299,6 +284,21 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
>   }
>   EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
>   
> +/*
> + * Calculate guest's supported XCR0 taking into account guest CPUID data and
> + * KVM's supported XCR0 (comprised of host's XCR0 and KVM_SUPPORTED_XCR0).
> + */
> +static u64 vcpu_get_supported_xcr0(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_cpuid_entry2 *best;
> +
> +	best = kvm_find_cpuid_entry_index(vcpu, 0xd, 0);
> +	if (!best)
> +		return 0;
> +
> +	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
> +}
> +
>   static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
>   {
>   	struct kvm_cpuid_entry2 *entry;
> @@ -323,8 +323,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>   		kvm_apic_set_version(vcpu);
>   	}
>   
> -	vcpu->arch.guest_supported_xcr0 =
> -		cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
> +	vcpu->arch.guest_supported_xcr0 = vcpu_get_supported_xcr0(vcpu);
>   
>   	/*
>   	 * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
>
> base-commit: f0147fcfab840fe9a3f03e9645d25c1326373fe6
Yang, Weijiang Aug. 9, 2023, 3:11 a.m. UTC | #6
On 8/5/2023 5:43 AM, Paolo Bonzini wrote:
> On 8/4/23 18:02, Sean Christopherson wrote:
>>> Update CPUID(EAX=0DH,ECX=1) when the guest's XSS is modified.
>>> CPUID(EAX=0DH,ECX=1).EBX reports required storage size of
>>> all enabled xstate features in XCR0 | XSS. Guest can allocate
>>> sufficient xsave buffer based on the info.
>>
>> Please wrap changelogs closer to ~75 chars.  I'm pretty sure this isn't the first
>> time I've made this request...
>
> I suspect this is because of the long "word" CPUID(EAX=0DH,ECX=1).EBX. It would make the lengths less homogeneous if lines 1 stayed the same but lines 2-4 became longer.
Yes, more or less, but I need to learn some "techniques" to make the wording looks trimmed and tidy. Thanks!
> Paolo
>
Yang, Weijiang Aug. 9, 2023, 8:56 a.m. UTC | #7
On 8/5/2023 2:27 AM, Sean Christopherson wrote:
> On Thu, Aug 03, 2023, Yang Weijiang wrote:
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 0b9033551d8c..5d6d6fa33e5b 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -3780,10 +3780,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   		 * IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than
>>   		 * XSAVES/XRSTORS to save/restore PT MSRs.
>>   		 */
>> -		if (data & ~kvm_caps.supported_xss)
>> +		if (data & ~vcpu->arch.guest_supported_xss)
> Hmm, this is arguably wrong for userspace-initiated writes, as it would prevent
> userspace from restoring MSRs before CPUID.
>
> And it would make the handling of MSR_IA32_XSS writes inconsistent just within
> this case statement.  The initial "can this MSR be written at all" check would
> *not* honor guest CPUID for host writes, but then the per-bit check *would* honor
> guest CPUID for host writes.
>
> But if we exempt host writes, then we'll end up with another mess, as exempting
> host writes for MSR_KVM_GUEST_SSP would let the guest coerce KVM into writing an
> illegal value by modifying SMRAM while in SMM.
>
> Blech.
>
> If we can get away with it, i.e. not break userspace, I think my preference is
> to enforce guest CPUID for host accesses to XSS, XFD, XFD_ERR, etc.  I'm 99%
> certain we can make that change, because there are many, many MSRs that do NOT
> exempt host writes, i.e. the only way this would be a breaking change is if
> userspace is writing things like XSS before KVM_SET_CPUID2, but other MSRs after
> KVM_SET_CPUID2.
>
> I'm pretty sure I've advocated for the exact opposite in the past, i.e. argued
> that KVM's ABI is to not enforce ordering between KVM_SET_CPUID2 and KVM_SET_MSR.
> But this is becoming untenable, juggling the dependencies in KVM is complex and
> is going to result in a nasty bug at some point.
>
> For this series, lets just tighten the rules for XSS, i.e. drop the host_initated
> exemption.  And in a parallel/separate series, try to do a wholesale cleanup of
> all the cases that essentially allow userspace to do KVM_SET_MSR before KVM_SET_CPUID2.
OK, will do it for this series and investigate for other MSRs.
Thanks!
Paolo Bonzini Aug. 10, 2023, 12:01 a.m. UTC | #8
On Wed, Aug 9, 2023 at 10:56 AM Yang, Weijiang <weijiang.yang@intel.com> wrote:
> > I'm pretty sure I've advocated for the exact opposite in the past, i.e. argued
> > that KVM's ABI is to not enforce ordering between KVM_SET_CPUID2 and KVM_SET_MSR.
> > But this is becoming untenable, juggling the dependencies in KVM is complex and
> > is going to result in a nasty bug at some point.
> >
> > For this series, lets just tighten the rules for XSS, i.e. drop the host_initated
> > exemption.  And in a parallel/separate series, try to do a wholesale cleanup of
> > all the cases that essentially allow userspace to do KVM_SET_MSR before KVM_SET_CPUID2.
> OK, will do it for this series and investigate for other MSRs.
> Thanks!

Remember that, while the ordering between KVM_SET_CPUID2 and
KVM_SET_MSR must be enforced(*), the host_initiated path must allow
the default (generally 0) value.

Paolo

(*) this means that you should check guest_cpuid_has even if
host_initiated == true.
Yang, Weijiang Aug. 10, 2023, 1:12 a.m. UTC | #9
On 8/10/2023 8:01 AM, Paolo Bonzini wrote:
> On Wed, Aug 9, 2023 at 10:56 AM Yang, Weijiang <weijiang.yang@intel.com> wrote:
>>> I'm pretty sure I've advocated for the exact opposite in the past, i.e. argued
>>> that KVM's ABI is to not enforce ordering between KVM_SET_CPUID2 and KVM_SET_MSR.
>>> But this is becoming untenable, juggling the dependencies in KVM is complex and
>>> is going to result in a nasty bug at some point.
>>>
>>> For this series, lets just tighten the rules for XSS, i.e. drop the host_initated
>>> exemption.  And in a parallel/separate series, try to do a wholesale cleanup of
>>> all the cases that essentially allow userspace to do KVM_SET_MSR before KVM_SET_CPUID2.
>> OK, will do it for this series and investigate for other MSRs.
>> Thanks!
> Remember that, while the ordering between KVM_SET_CPUID2 and
> KVM_SET_MSR must be enforced(*), the host_initiated path must allow
> the default (generally 0) value.
Yes, will take it, thanks!
> Paolo
>
> (*) this means that you should check guest_cpuid_has even if
> host_initiated == true.
>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 28bd38303d70..20bbcd95511f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -804,6 +804,7 @@  struct kvm_vcpu_arch {
 
 	u64 xcr0;
 	u64 guest_supported_xcr0;
+	u64 guest_supported_xss;
 
 	struct kvm_pio_request pio;
 	void *pio_data;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7f4d13383cf2..0338316b827c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -249,6 +249,17 @@  static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
 	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
 }
 
+static u64 cpuid_get_supported_xss(struct kvm_cpuid_entry2 *entries, int nent)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = cpuid_entry2_find(entries, nent, 0xd, 1);
+	if (!best)
+		return 0;
+
+	return (best->ecx | ((u64)best->edx << 32)) & kvm_caps.supported_xss;
+}
+
 static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
 				       int nent)
 {
@@ -276,8 +287,11 @@  static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
 
 	best = cpuid_entry2_find(entries, nent, 0xD, 1);
 	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
-		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
-		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+		     cpuid_entry_has(best, X86_FEATURE_XSAVEC))) {
+		u64 xstate = vcpu->arch.xcr0 | vcpu->arch.ia32_xss;
+
+		best->ebx = xstate_required_size(xstate, true);
+	}
 
 	best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
 	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
@@ -325,6 +339,8 @@  static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.guest_supported_xcr0 =
 		cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+	vcpu->arch.guest_supported_xss =
+		cpuid_get_supported_xss(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
 
 	/*
 	 * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0b9033551d8c..5d6d6fa33e5b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3780,10 +3780,12 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 * IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than
 		 * XSAVES/XRSTORS to save/restore PT MSRs.
 		 */
-		if (data & ~kvm_caps.supported_xss)
+		if (data & ~vcpu->arch.guest_supported_xss)
 			return 1;
-		vcpu->arch.ia32_xss = data;
-		kvm_update_cpuid_runtime(vcpu);
+		if (vcpu->arch.ia32_xss != data) {
+			vcpu->arch.ia32_xss = data;
+			kvm_update_cpuid_runtime(vcpu);
+		}
 		break;
 	case MSR_SMI_COUNT:
 		if (!msr_info->host_initiated)