diff mbox series

[1/6] KVM: x86: Fix tracing of CPUID.function when function is out-of-range

Message ID 20200302195736.24777-2-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: CPUID emulation and tracing fixes | expand

Commit Message

Sean Christopherson March 2, 2020, 7:57 p.m. UTC
Rework kvm_cpuid() to query entry->function when adjusting the output
values so that the original function (in the aptly named "function") is
preserved for tracing.  This fixes a bug where trace_kvm_cpuid() will
trace the max function for a range instead of the requested function if
the requested function is out-of-range and an entry for the max function
exists.

Fixes: 43561123ab37 ("kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH")
Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Jim Mattson <jmattson@google.com>
Cc: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/cpuid.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

Comments

Jan Kiszka March 2, 2020, 8:26 p.m. UTC | #1
On 02.03.20 20:57, Sean Christopherson wrote:
> Rework kvm_cpuid() to query entry->function when adjusting the output
> values so that the original function (in the aptly named "function") is
> preserved for tracing.  This fixes a bug where trace_kvm_cpuid() will
> trace the max function for a range instead of the requested function if
> the requested function is out-of-range and an entry for the max function
> exists.
> 
> Fixes: 43561123ab37 ("kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH")
> Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
> Cc: Jim Mattson <jmattson@google.com>
> Cc: Xiaoyao Li <xiaoyao.li@intel.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> ---
>   arch/x86/kvm/cpuid.c | 15 +++++++--------
>   1 file changed, 7 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index b1c469446b07..6be012937eba 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -997,12 +997,12 @@ static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
>   	return max && function <= max->eax;
>   }
>   
> +/* Returns true if the requested leaf/function exists in guest CPUID. */
>   bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>   	       u32 *ecx, u32 *edx, bool check_limit)
>   {
> -	u32 function = *eax, index = *ecx;
> +	const u32 function = *eax, index = *ecx;
>   	struct kvm_cpuid_entry2 *entry;
> -	struct kvm_cpuid_entry2 *max;
>   	bool found;
>   
>   	entry = kvm_find_cpuid_entry(vcpu, function, index);
> @@ -1015,18 +1015,17 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>   	 */
>   	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
>   	    !cpuid_function_in_range(vcpu, function)) {
> -		max = kvm_find_cpuid_entry(vcpu, 0, 0);
> -		if (max) {
> -			function = max->eax;
> -			entry = kvm_find_cpuid_entry(vcpu, function, index);
> -		}
> +		entry = kvm_find_cpuid_entry(vcpu, 0, 0);
> +		if (entry)
> +			entry = kvm_find_cpuid_entry(vcpu, entry->eax, index);
>   	}
>   	if (entry) {
>   		*eax = entry->eax;
>   		*ebx = entry->ebx;
>   		*ecx = entry->ecx;
>   		*edx = entry->edx;
> -		if (function == 7 && index == 0) {
> +
> +		if (entry->function == 7 && index == 0) {
>   			u64 data;
>   		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
>   			    (data & TSX_CTRL_CPUID_CLEAR))
> 

What about the !entry case below this? It was impacted by the function 
capping so far, not it's no longer.

Jan
Sean Christopherson March 2, 2020, 8:49 p.m. UTC | #2
On Mon, Mar 02, 2020 at 09:26:54PM +0100, Jan Kiszka wrote:
> On 02.03.20 20:57, Sean Christopherson wrote:
> >Rework kvm_cpuid() to query entry->function when adjusting the output
> >values so that the original function (in the aptly named "function") is
> >preserved for tracing.  This fixes a bug where trace_kvm_cpuid() will
> >trace the max function for a range instead of the requested function if
> >the requested function is out-of-range and an entry for the max function
> >exists.
> >
> >Fixes: 43561123ab37 ("kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH")
> >Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
> >Cc: Jim Mattson <jmattson@google.com>
> >Cc: Xiaoyao Li <xiaoyao.li@intel.com>
> >Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> >---
> >  arch/x86/kvm/cpuid.c | 15 +++++++--------
> >  1 file changed, 7 insertions(+), 8 deletions(-)
> >
> >diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> >index b1c469446b07..6be012937eba 100644
> >--- a/arch/x86/kvm/cpuid.c
> >+++ b/arch/x86/kvm/cpuid.c
> >@@ -997,12 +997,12 @@ static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
> >  	return max && function <= max->eax;
> >  }
> >+/* Returns true if the requested leaf/function exists in guest CPUID. */
> >  bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
> >  	       u32 *ecx, u32 *edx, bool check_limit)
> >  {
> >-	u32 function = *eax, index = *ecx;
> >+	const u32 function = *eax, index = *ecx;
> >  	struct kvm_cpuid_entry2 *entry;
> >-	struct kvm_cpuid_entry2 *max;
> >  	bool found;
> >  	entry = kvm_find_cpuid_entry(vcpu, function, index);
> >@@ -1015,18 +1015,17 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
> >  	 */
> >  	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
> >  	    !cpuid_function_in_range(vcpu, function)) {
> >-		max = kvm_find_cpuid_entry(vcpu, 0, 0);
> >-		if (max) {
> >-			function = max->eax;
> >-			entry = kvm_find_cpuid_entry(vcpu, function, index);
> >-		}
> >+		entry = kvm_find_cpuid_entry(vcpu, 0, 0);
> >+		if (entry)
> >+			entry = kvm_find_cpuid_entry(vcpu, entry->eax, index);
> >  	}
> >  	if (entry) {
> >  		*eax = entry->eax;
> >  		*ebx = entry->ebx;
> >  		*ecx = entry->ecx;
> >  		*edx = entry->edx;
> >-		if (function == 7 && index == 0) {
> >+
> >+		if (entry->function == 7 && index == 0) {
> >  			u64 data;
> >  		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
> >  			    (data & TSX_CTRL_CPUID_CLEAR))
> >
> 
> What about the !entry case below this? It was impacted by the function
> capping so far, not it's no longer.

Hmm, the only way the output would be different is in a really contrived
scenario where userspace doesn't provide an entry for the max basic leaf.

The !entry path can only be reached with "orig_function != function" if
orig_function is out of range and there is no entry for the max basic leaf.
The adjustments for 0xb/0x1f require the max basic leaf to be 0xb or 0x1f,
and to take effect with !entry would require there to be a CPUID.max.1 but
not a CPUID.max.0.  That'd be a violation of Intel's SDM, i.e. it's bogus
userspace input and IMO can be ignored.
Jan Kiszka March 2, 2020, 8:59 p.m. UTC | #3
On 02.03.20 21:49, Sean Christopherson wrote:
> On Mon, Mar 02, 2020 at 09:26:54PM +0100, Jan Kiszka wrote:
>> On 02.03.20 20:57, Sean Christopherson wrote:
>>> Rework kvm_cpuid() to query entry->function when adjusting the output
>>> values so that the original function (in the aptly named "function") is
>>> preserved for tracing.  This fixes a bug where trace_kvm_cpuid() will
>>> trace the max function for a range instead of the requested function if
>>> the requested function is out-of-range and an entry for the max function
>>> exists.
>>>
>>> Fixes: 43561123ab37 ("kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH")
>>> Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
>>> Cc: Jim Mattson <jmattson@google.com>
>>> Cc: Xiaoyao Li <xiaoyao.li@intel.com>
>>> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
>>> ---
>>>   arch/x86/kvm/cpuid.c | 15 +++++++--------
>>>   1 file changed, 7 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>> index b1c469446b07..6be012937eba 100644
>>> --- a/arch/x86/kvm/cpuid.c
>>> +++ b/arch/x86/kvm/cpuid.c
>>> @@ -997,12 +997,12 @@ static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
>>>   	return max && function <= max->eax;
>>>   }
>>> +/* Returns true if the requested leaf/function exists in guest CPUID. */
>>>   bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>>>   	       u32 *ecx, u32 *edx, bool check_limit)
>>>   {
>>> -	u32 function = *eax, index = *ecx;
>>> +	const u32 function = *eax, index = *ecx;
>>>   	struct kvm_cpuid_entry2 *entry;
>>> -	struct kvm_cpuid_entry2 *max;
>>>   	bool found;
>>>   	entry = kvm_find_cpuid_entry(vcpu, function, index);
>>> @@ -1015,18 +1015,17 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>>>   	 */
>>>   	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
>>>   	    !cpuid_function_in_range(vcpu, function)) {
>>> -		max = kvm_find_cpuid_entry(vcpu, 0, 0);
>>> -		if (max) {
>>> -			function = max->eax;
>>> -			entry = kvm_find_cpuid_entry(vcpu, function, index);
>>> -		}
>>> +		entry = kvm_find_cpuid_entry(vcpu, 0, 0);
>>> +		if (entry)
>>> +			entry = kvm_find_cpuid_entry(vcpu, entry->eax, index);
>>>   	}
>>>   	if (entry) {
>>>   		*eax = entry->eax;
>>>   		*ebx = entry->ebx;
>>>   		*ecx = entry->ecx;
>>>   		*edx = entry->edx;
>>> -		if (function == 7 && index == 0) {
>>> +
>>> +		if (entry->function == 7 && index == 0) {
>>>   			u64 data;
>>>   		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
>>>   			    (data & TSX_CTRL_CPUID_CLEAR))
>>>
>>
>> What about the !entry case below this? It was impacted by the function
>> capping so far, not it's no longer.
> 
> Hmm, the only way the output would be different is in a really contrived
> scenario where userspace doesn't provide an entry for the max basic leaf.

I think I've seen that, a cap to 0x10, with QEMU and '-cpu host# when 
providing intentionally bogus values to cpuid.

Jan

> 
> The !entry path can only be reached with "orig_function != function" if
> orig_function is out of range and there is no entry for the max basic leaf.
> The adjustments for 0xb/0x1f require the max basic leaf to be 0xb or 0x1f,
> and to take effect with !entry would require there to be a CPUID.max.1 but
> not a CPUID.max.0.  That'd be a violation of Intel's SDM, i.e. it's bogus
> userspace input and IMO can be ignored.
>
Xiaoyao Li March 3, 2020, 2:27 a.m. UTC | #4
On 3/3/2020 4:49 AM, Sean Christopherson wrote:
> On Mon, Mar 02, 2020 at 09:26:54PM +0100, Jan Kiszka wrote:
>> On 02.03.20 20:57, Sean Christopherson wrote:
>>> Rework kvm_cpuid() to query entry->function when adjusting the output
>>> values so that the original function (in the aptly named "function") is
>>> preserved for tracing.  This fixes a bug where trace_kvm_cpuid() will
>>> trace the max function for a range instead of the requested function if
>>> the requested function is out-of-range and an entry for the max function
>>> exists.
>>>
>>> Fixes: 43561123ab37 ("kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH")
>>> Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
>>> Cc: Jim Mattson <jmattson@google.com>
>>> Cc: Xiaoyao Li <xiaoyao.li@intel.com>
>>> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
>>> ---
>>>   arch/x86/kvm/cpuid.c | 15 +++++++--------
>>>   1 file changed, 7 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>> index b1c469446b07..6be012937eba 100644
>>> --- a/arch/x86/kvm/cpuid.c
>>> +++ b/arch/x86/kvm/cpuid.c
>>> @@ -997,12 +997,12 @@ static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
>>>   	return max && function <= max->eax;
>>>   }
>>> +/* Returns true if the requested leaf/function exists in guest CPUID. */
>>>   bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>>>   	       u32 *ecx, u32 *edx, bool check_limit)
>>>   {
>>> -	u32 function = *eax, index = *ecx;
>>> +	const u32 function = *eax, index = *ecx;
>>>   	struct kvm_cpuid_entry2 *entry;
>>> -	struct kvm_cpuid_entry2 *max;
>>>   	bool found;
>>>   	entry = kvm_find_cpuid_entry(vcpu, function, index);
>>> @@ -1015,18 +1015,17 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>>>   	 */
>>>   	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
>>>   	    !cpuid_function_in_range(vcpu, function)) {
>>> -		max = kvm_find_cpuid_entry(vcpu, 0, 0);
>>> -		if (max) {
>>> -			function = max->eax;
>>> -			entry = kvm_find_cpuid_entry(vcpu, function, index);
>>> -		}
>>> +		entry = kvm_find_cpuid_entry(vcpu, 0, 0);
>>> +		if (entry)
>>> +			entry = kvm_find_cpuid_entry(vcpu, entry->eax, index);
>>>   	}
>>>   	if (entry) {
>>>   		*eax = entry->eax;
>>>   		*ebx = entry->ebx;
>>>   		*ecx = entry->ecx;
>>>   		*edx = entry->edx;
>>> -		if (function == 7 && index == 0) {
>>> +
>>> +		if (entry->function == 7 && index == 0) {
>>>   			u64 data;
>>>   		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
>>>   			    (data & TSX_CTRL_CPUID_CLEAR))
>>>
>>
>> What about the !entry case below this? It was impacted by the function
>> capping so far, not it's no longer.
> 
> Hmm, the only way the output would be different is in a really contrived
> scenario where userspace doesn't provide an entry for the max basic leaf.
> 
> The !entry path can only be reached with "orig_function != function" if
> orig_function is out of range and there is no entry for the max basic leaf.

> The adjustments for 0xb/0x1f require the max basic leaf to be 0xb or 0x1f,
> and to take effect with !entry would require there to be a CPUID.max.1 but
> not a CPUID.max.0.  That'd be a violation of Intel's SDM, i.e. it's bogus
> userspace input and IMO can be ignored.
> 

Sorry I cannot catch you. Why it's a violation of Intel's SDM?

Supposing the max basic is 0x1f, and it queries cpuid(0x20, 0x5),
it should return cpuid(0x1f, 0x5).

But based on this patch, it returns all zeros.
Xiaoyao Li March 3, 2020, 2:50 a.m. UTC | #5
On 3/3/2020 3:57 AM, Sean Christopherson wrote:
> Rework kvm_cpuid() to query entry->function when adjusting the output
> values so that the original function (in the aptly named "function") is
> preserved for tracing.  This fixes a bug where trace_kvm_cpuid() will
> trace the max function for a range instead of the requested function if
> the requested function is out-of-range and an entry for the max function
> exists.
> 
> Fixes: 43561123ab37 ("kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH")
> Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
> Cc: Jim Mattson <jmattson@google.com>
> Cc: Xiaoyao Li <xiaoyao.li@intel.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> ---
>   arch/x86/kvm/cpuid.c | 15 +++++++--------
>   1 file changed, 7 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index b1c469446b07..6be012937eba 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -997,12 +997,12 @@ static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
>   	return max && function <= max->eax;
>   }
>   
> +/* Returns true if the requested leaf/function exists in guest CPUID. */
>   bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>   	       u32 *ecx, u32 *edx, bool check_limit)
>   {
> -	u32 function = *eax, index = *ecx;
> +	const u32 function = *eax, index = *ecx;
>   	struct kvm_cpuid_entry2 *entry;
> -	struct kvm_cpuid_entry2 *max;
>   	bool found;
>   
>   	entry = kvm_find_cpuid_entry(vcpu, function, index);
> @@ -1015,18 +1015,17 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>   	 */
>   	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
>   	    !cpuid_function_in_range(vcpu, function)) {
> -		max = kvm_find_cpuid_entry(vcpu, 0, 0);
> -		if (max) {
> -			function = max->eax;
> -			entry = kvm_find_cpuid_entry(vcpu, function, index);
> -		}
> +		entry = kvm_find_cpuid_entry(vcpu, 0, 0);
> +		if (entry)
> +			entry = kvm_find_cpuid_entry(vcpu, entry->eax, index);

There is a problem.

when queried leaf is out of range on Intel CPU, it returns the maximum 
basic leaf, and any dependence on input ECX (i.e., subleaf) value in the 
basic leaf is honored. As disclaimed in SDM of CPUID instruction.

The ECX should be honored if and only the leaf has a significant index.
If the leaf doesn't has a significant index, it just ignores the EDX 
input in bare metal.

So it should be something like:

if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
	!cpuid_function_in_range(vcpu, function)) {
	entry = kvm_find_cpuid_entry(vcpu, 0, 0);
	if (entry) {
		entry = kvm_find_cpuid_entry(vcpu, entry->eax, 0);
		if (entry &&
		    entry->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX ) {
			entry = kvm_find_cpuid_entry(vcpu, entry->eax,
						     index);
		}
	}
}

>   	}
>   	if (entry) {
>   		*eax = entry->eax;
>   		*ebx = entry->ebx;
>   		*ecx = entry->ecx;
>   		*edx = entry->edx;
> -		if (function == 7 && index == 0) {
> +
> +		if (entry->function == 7 && index == 0) {
>   			u64 data;
>   		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
>   			    (data & TSX_CTRL_CPUID_CLEAR))
>
Sean Christopherson March 3, 2020, 3:45 a.m. UTC | #6
On Tue, Mar 03, 2020 at 10:27:47AM +0800, Xiaoyao Li wrote:
> On 3/3/2020 4:49 AM, Sean Christopherson wrote:
> >On Mon, Mar 02, 2020 at 09:26:54PM +0100, Jan Kiszka wrote:
> >>On 02.03.20 20:57, Sean Christopherson wrote:
> >>>Rework kvm_cpuid() to query entry->function when adjusting the output
> >>>values so that the original function (in the aptly named "function") is
> >>>preserved for tracing.  This fixes a bug where trace_kvm_cpuid() will
> >>>trace the max function for a range instead of the requested function if
> >>>the requested function is out-of-range and an entry for the max function
> >>>exists.
> >>>
> >>>Fixes: 43561123ab37 ("kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH")
> >>>Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
> >>>Cc: Jim Mattson <jmattson@google.com>
> >>>Cc: Xiaoyao Li <xiaoyao.li@intel.com>
> >>>Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> >>>---
> >>>  arch/x86/kvm/cpuid.c | 15 +++++++--------
> >>>  1 file changed, 7 insertions(+), 8 deletions(-)
> >>>
> >>>diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> >>>index b1c469446b07..6be012937eba 100644
> >>>--- a/arch/x86/kvm/cpuid.c
> >>>+++ b/arch/x86/kvm/cpuid.c
> >>>@@ -997,12 +997,12 @@ static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
> >>>  	return max && function <= max->eax;
> >>>  }
> >>>+/* Returns true if the requested leaf/function exists in guest CPUID. */
> >>>  bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
> >>>  	       u32 *ecx, u32 *edx, bool check_limit)
> >>>  {
> >>>-	u32 function = *eax, index = *ecx;
> >>>+	const u32 function = *eax, index = *ecx;
> >>>  	struct kvm_cpuid_entry2 *entry;
> >>>-	struct kvm_cpuid_entry2 *max;
> >>>  	bool found;
> >>>  	entry = kvm_find_cpuid_entry(vcpu, function, index);
> >>>@@ -1015,18 +1015,17 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
> >>>  	 */
> >>>  	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
> >>>  	    !cpuid_function_in_range(vcpu, function)) {
> >>>-		max = kvm_find_cpuid_entry(vcpu, 0, 0);
> >>>-		if (max) {
> >>>-			function = max->eax;
> >>>-			entry = kvm_find_cpuid_entry(vcpu, function, index);
> >>>-		}
> >>>+		entry = kvm_find_cpuid_entry(vcpu, 0, 0);
> >>>+		if (entry)
> >>>+			entry = kvm_find_cpuid_entry(vcpu, entry->eax, index);
> >>>  	}
> >>>  	if (entry) {
> >>>  		*eax = entry->eax;
> >>>  		*ebx = entry->ebx;
> >>>  		*ecx = entry->ecx;
> >>>  		*edx = entry->edx;
> >>>-		if (function == 7 && index == 0) {
> >>>+
> >>>+		if (entry->function == 7 && index == 0) {
> >>>  			u64 data;
> >>>  		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
> >>>  			    (data & TSX_CTRL_CPUID_CLEAR))
> >>>
> >>
> >>What about the !entry case below this? It was impacted by the function
> >>capping so far, not it's no longer.
> >
> >Hmm, the only way the output would be different is in a really contrived
> >scenario where userspace doesn't provide an entry for the max basic leaf.
> >
> >The !entry path can only be reached with "orig_function != function" if
> >orig_function is out of range and there is no entry for the max basic leaf.
> 
> >The adjustments for 0xb/0x1f require the max basic leaf to be 0xb or 0x1f,
> >and to take effect with !entry would require there to be a CPUID.max.1 but
> >not a CPUID.max.0.  That'd be a violation of Intel's SDM, i.e. it's bogus
> >userspace input and IMO can be ignored.
> >
> 
> Sorry I cannot catch you. Why it's a violation of Intel's SDM?

The case being discussed above would look like:

KVM CPUID Entries:
   Function   Index Output
   0x00000000 0x00: eax=0x0000000b ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69
   0x00000001 0x00: eax=0x000906ea ebx=0x03000800 ecx=0xfffa3223 edx=0x0f8bfbff
   0x00000002 0x00: eax=0x00000001 ebx=0x00000000 ecx=0x0000004d edx=0x002c307d
   0x00000003 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
   0x00000004 0x00: eax=0x00000121 ebx=0x01c0003f ecx=0x0000003f edx=0x00000001
   0x00000004 0x01: eax=0x00000122 ebx=0x01c0003f ecx=0x0000003f edx=0x00000001
   0x00000004 0x02: eax=0x00000143 ebx=0x03c0003f ecx=0x00000fff edx=0x00000001
   0x00000004 0x03: eax=0x00000163 ebx=0x03c0003f ecx=0x00003fff edx=0x00000006
   0x00000005 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000003 edx=0x00000000
   0x00000006 0x00: eax=0x00000004 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
   0x00000007 0x00: eax=0x00000000 ebx=0x009c4fbb ecx=0x00000004 edx=0x84000000
   0x00000008 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
   0x00000009 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
   0x0000000a 0x00: eax=0x07300402 ebx=0x00000000 ecx=0x00000000 edx=0x00000603
--> MISSING CPUID.0xB.0
   0x0000000b 0x01: eax=0x00000000 ebx=0x00000001 ecx=0x00000201 edx=0x00000003

CPUID.0xB.0 does not exist, so output.ECX=0, which indicates an invalid
level-type.

The SDM states (for CPUID.0xB):

   If an input value n in ECX returns the invalid level-type of 0 in ECX[15:8],
   other input values with ECX > n also return 0 in ECX[15:8]

That means returning a valid level-type in CPUID.0xB.1 as above violates
the SDM's definition of how leaf 0xB works.  I'm arguing we can ignore the
adjustments that would be done on output.E{C,D} for an out of range leaf
because the model is bogus.

> Supposing the max basic is 0x1f, and it queries cpuid(0x20, 0x5),
> it should return cpuid(0x1f, 0x5).
> 
> But based on this patch, it returns all zeros.

Have you tested the patch, or is your comment based on the above discussion
and/or code inspection?  Honest question, because I've thoroughly tested
the above scenario and it works as you describe, but now I'm worried I
completely botched my testing.
Xiaoyao Li March 3, 2020, 4:02 a.m. UTC | #7
On 3/3/2020 11:45 AM, Sean Christopherson wrote:
> On Tue, Mar 03, 2020 at 10:27:47AM +0800, Xiaoyao Li wrote:
>> On 3/3/2020 4:49 AM, Sean Christopherson wrote:
>>> On Mon, Mar 02, 2020 at 09:26:54PM +0100, Jan Kiszka wrote:
>>>> On 02.03.20 20:57, Sean Christopherson wrote:
>>>>> Rework kvm_cpuid() to query entry->function when adjusting the output
>>>>> values so that the original function (in the aptly named "function") is
>>>>> preserved for tracing.  This fixes a bug where trace_kvm_cpuid() will
>>>>> trace the max function for a range instead of the requested function if
>>>>> the requested function is out-of-range and an entry for the max function
>>>>> exists.
>>>>>
>>>>> Fixes: 43561123ab37 ("kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH")
>>>>> Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>> Cc: Jim Mattson <jmattson@google.com>
>>>>> Cc: Xiaoyao Li <xiaoyao.li@intel.com>
>>>>> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
>>>>> ---
>>>>>   arch/x86/kvm/cpuid.c | 15 +++++++--------
>>>>>   1 file changed, 7 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>>>> index b1c469446b07..6be012937eba 100644
>>>>> --- a/arch/x86/kvm/cpuid.c
>>>>> +++ b/arch/x86/kvm/cpuid.c
>>>>> @@ -997,12 +997,12 @@ static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
>>>>>   	return max && function <= max->eax;
>>>>>   }
>>>>> +/* Returns true if the requested leaf/function exists in guest CPUID. */
>>>>>   bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>>>>>   	       u32 *ecx, u32 *edx, bool check_limit)
>>>>>   {
>>>>> -	u32 function = *eax, index = *ecx;
>>>>> +	const u32 function = *eax, index = *ecx;
>>>>>   	struct kvm_cpuid_entry2 *entry;
>>>>> -	struct kvm_cpuid_entry2 *max;
>>>>>   	bool found;
>>>>>   	entry = kvm_find_cpuid_entry(vcpu, function, index);
>>>>> @@ -1015,18 +1015,17 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>>>>>   	 */
>>>>>   	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
>>>>>   	    !cpuid_function_in_range(vcpu, function)) {
>>>>> -		max = kvm_find_cpuid_entry(vcpu, 0, 0);
>>>>> -		if (max) {
>>>>> -			function = max->eax;
>>>>> -			entry = kvm_find_cpuid_entry(vcpu, function, index);
>>>>> -		}
>>>>> +		entry = kvm_find_cpuid_entry(vcpu, 0, 0);
>>>>> +		if (entry)
>>>>> +			entry = kvm_find_cpuid_entry(vcpu, entry->eax, index);
>>>>>   	}
>>>>>   	if (entry) {
>>>>>   		*eax = entry->eax;
>>>>>   		*ebx = entry->ebx;
>>>>>   		*ecx = entry->ecx;
>>>>>   		*edx = entry->edx;
>>>>> -		if (function == 7 && index == 0) {
>>>>> +
>>>>> +		if (entry->function == 7 && index == 0) {
>>>>>   			u64 data;
>>>>>   		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
>>>>>   			    (data & TSX_CTRL_CPUID_CLEAR))
>>>>>
>>>>
>>>> What about the !entry case below this? It was impacted by the function
>>>> capping so far, not it's no longer.
>>>
>>> Hmm, the only way the output would be different is in a really contrived
>>> scenario where userspace doesn't provide an entry for the max basic leaf.
>>>
>>> The !entry path can only be reached with "orig_function != function" if
>>> orig_function is out of range and there is no entry for the max basic leaf.
>>
>>> The adjustments for 0xb/0x1f require the max basic leaf to be 0xb or 0x1f,
>>> and to take effect with !entry would require there to be a CPUID.max.1 but
>>> not a CPUID.max.0.  That'd be a violation of Intel's SDM, i.e. it's bogus
>>> userspace input and IMO can be ignored.
>>>
>>
>> Sorry I cannot catch you. Why it's a violation of Intel's SDM?
> 
> The case being discussed above would look like:
> 
> KVM CPUID Entries:
>     Function   Index Output
>     0x00000000 0x00: eax=0x0000000b ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69
>     0x00000001 0x00: eax=0x000906ea ebx=0x03000800 ecx=0xfffa3223 edx=0x0f8bfbff
>     0x00000002 0x00: eax=0x00000001 ebx=0x00000000 ecx=0x0000004d edx=0x002c307d
>     0x00000003 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
>     0x00000004 0x00: eax=0x00000121 ebx=0x01c0003f ecx=0x0000003f edx=0x00000001
>     0x00000004 0x01: eax=0x00000122 ebx=0x01c0003f ecx=0x0000003f edx=0x00000001
>     0x00000004 0x02: eax=0x00000143 ebx=0x03c0003f ecx=0x00000fff edx=0x00000001
>     0x00000004 0x03: eax=0x00000163 ebx=0x03c0003f ecx=0x00003fff edx=0x00000006
>     0x00000005 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000003 edx=0x00000000
>     0x00000006 0x00: eax=0x00000004 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
>     0x00000007 0x00: eax=0x00000000 ebx=0x009c4fbb ecx=0x00000004 edx=0x84000000
>     0x00000008 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
>     0x00000009 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
>     0x0000000a 0x00: eax=0x07300402 ebx=0x00000000 ecx=0x00000000 edx=0x00000603
> --> MISSING CPUID.0xB.0
>     0x0000000b 0x01: eax=0x00000000 ebx=0x00000001 ecx=0x00000201 edx=0x00000003
> 
> CPUID.0xB.0 does not exist, so output.ECX=0, which indicates an invalid
> level-type.
> 
> The SDM states (for CPUID.0xB):
> 
>     If an input value n in ECX returns the invalid level-type of 0 in ECX[15:8],
>     other input values with ECX > n also return 0 in ECX[15:8]
> 
> That means returning a valid level-type in CPUID.0xB.1 as above violates
> the SDM's definition of how leaf 0xB works.  I'm arguing we can ignore the
> adjustments that would be done on output.E{C,D} for an out of range leaf
> because the model is bogus.

Right.

So we'd better do something in KVM_SET_CPUID* , to avoid userspace set 
bogus cpuid.

>> Supposing the max basic is 0x1f, and it queries cpuid(0x20, 0x5),
>> it should return cpuid(0x1f, 0x5).
>>
>> But based on this patch, it returns all zeros.
> 
> Have you tested the patch, or is your comment based on the above discussion
> and/or code inspection?  Honest question, because I've thoroughly tested
> the above scenario and it works as you describe, but now I'm worried I
> completely botched my testing.
> 

No, I didn't test.

Leaf 0xB and 0x1f are special cases when they are the maximum basic 
leaf, because no matter what subleaf is, there is always a non-zero 
E[CX,DX].

If cpuid.0 returns maximum basic leaf as 0xB/0x1F, when queried leaf is 
greater, it should always return a non-zero value.
Sean Christopherson March 3, 2020, 4:08 a.m. UTC | #8
On Tue, Mar 03, 2020 at 10:50:03AM +0800, Xiaoyao Li wrote:
> On 3/3/2020 3:57 AM, Sean Christopherson wrote:
> >Rework kvm_cpuid() to query entry->function when adjusting the output
> >values so that the original function (in the aptly named "function") is
> >preserved for tracing.  This fixes a bug where trace_kvm_cpuid() will
> >trace the max function for a range instead of the requested function if
> >the requested function is out-of-range and an entry for the max function
> >exists.
> >
> >Fixes: 43561123ab37 ("kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH")
> >Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
> >Cc: Jim Mattson <jmattson@google.com>
> >Cc: Xiaoyao Li <xiaoyao.li@intel.com>
> >Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> >---
> >  arch/x86/kvm/cpuid.c | 15 +++++++--------
> >  1 file changed, 7 insertions(+), 8 deletions(-)
> >
> >diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> >index b1c469446b07..6be012937eba 100644
> >--- a/arch/x86/kvm/cpuid.c
> >+++ b/arch/x86/kvm/cpuid.c
> >@@ -997,12 +997,12 @@ static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
> >  	return max && function <= max->eax;
> >  }
> >+/* Returns true if the requested leaf/function exists in guest CPUID. */
> >  bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
> >  	       u32 *ecx, u32 *edx, bool check_limit)
> >  {
> >-	u32 function = *eax, index = *ecx;
> >+	const u32 function = *eax, index = *ecx;
> >  	struct kvm_cpuid_entry2 *entry;
> >-	struct kvm_cpuid_entry2 *max;
> >  	bool found;
> >  	entry = kvm_find_cpuid_entry(vcpu, function, index);
> >@@ -1015,18 +1015,17 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
> >  	 */
> >  	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
> >  	    !cpuid_function_in_range(vcpu, function)) {
> >-		max = kvm_find_cpuid_entry(vcpu, 0, 0);
> >-		if (max) {
> >-			function = max->eax;
> >-			entry = kvm_find_cpuid_entry(vcpu, function, index);
> >-		}
> >+		entry = kvm_find_cpuid_entry(vcpu, 0, 0);
> >+		if (entry)
> >+			entry = kvm_find_cpuid_entry(vcpu, entry->eax, index);
> 
> There is a problem.
> 
> when queried leaf is out of range on Intel CPU, it returns the maximum basic
> leaf, and any dependence on input ECX (i.e., subleaf) value in the basic
> leaf is honored. As disclaimed in SDM of CPUID instruction.

That's what the code above does.

> The ECX should be honored if and only the leaf has a significant index.
> If the leaf doesn't has a significant index, it just ignores the EDX input

s/EDX/ECX

> in bare metal.
>
> So it should be something like:
> 
> if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
> 	!cpuid_function_in_range(vcpu, function)) {
> 	entry = kvm_find_cpuid_entry(vcpu, 0, 0);
> 	if (entry) {
> 		entry = kvm_find_cpuid_entry(vcpu, entry->eax, 0);
> 		if (entry &&
> 		    entry->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX ) {

This is unnecessary IMO.  The only scenario where SIGNFICANT_INDEX is 0
and cpuid_entry(entry->eax, 0) != cpuid_entry(entry->eax, index) is if
userspace created a cpuid entry for index>0 with SIGNFICANT_INDEX.  That's
a busted model, e.g. it'd be the SDM equivalent of an Intel CPU having
different output for CPUID.0x16.0 and CPUID.16.5 despite the SDM stating
that the CPUID.0x16 ignores the index.

E.g. on my system with a max basic leaf of 0x16

$ cpuid -1 -r
CPU:
   0x00000000 0x00: eax=0x00000016 ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69
...
   0x00000016 0x00: eax=0x00000e74 ebx=0x0000125c ecx=0x00000064 edx=0x00000000

$ cpuid -1 -r -l 0x16
CPU:
   0x00000016 0x00: eax=0x00000e74 ebx=0x0000125c ecx=0x00000064 edx=0x00000000
~ $ cpuid -1 -r -l 0x16 -s 4
CPU:
   0x00000016 0x04: eax=0x00000e74 ebx=0x0000125c ecx=0x00000064 edx=0x00000000
~ $ cpuid -1 -r -l 0x16 -s 466
CPU:
   0x00000016 0x1d2: eax=0x00000e74 ebx=0x0000125c ecx=0x00000064 edx=0x00000000


If it returned anything else for CPUID.0x16.0x4 then it'd be a CPU bug.
Same thing here, it's a userspace bug if it creates a CPUID entry that
shouldn't exist.  E.g. ignoring Intel's silly "max basic leaf" behavior
for the moment, if userspace created a entry for CPUID.0x0.N it would
break the Linux kernel's cpu_detect(), as it doesn't initialize ECX when
doing CPUID.0x0.

> 			entry = kvm_find_cpuid_entry(vcpu, entry->eax,
> 						     index);
> 		}
> 	}
> }
> 
> >  	}
> >  	if (entry) {
> >  		*eax = entry->eax;
> >  		*ebx = entry->ebx;
> >  		*ecx = entry->ecx;
> >  		*edx = entry->edx;
> >-		if (function == 7 && index == 0) {
> >+
> >+		if (entry->function == 7 && index == 0) {
> >  			u64 data;
> >  		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
> >  			    (data & TSX_CTRL_CPUID_CLEAR))
> >
>
Sean Christopherson March 3, 2020, 4:12 a.m. UTC | #9
On Tue, Mar 03, 2020 at 12:02:39PM +0800, Xiaoyao Li wrote:
> On 3/3/2020 11:45 AM, Sean Christopherson wrote:
> >On Tue, Mar 03, 2020 at 10:27:47AM +0800, Xiaoyao Li wrote:
> >>Sorry I cannot catch you. Why it's a violation of Intel's SDM?
> >
> >The case being discussed above would look like:
> >
> >KVM CPUID Entries:
> >    Function   Index Output
> >    0x00000000 0x00: eax=0x0000000b ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69
> >    0x00000001 0x00: eax=0x000906ea ebx=0x03000800 ecx=0xfffa3223 edx=0x0f8bfbff
> >    0x00000002 0x00: eax=0x00000001 ebx=0x00000000 ecx=0x0000004d edx=0x002c307d
> >    0x00000003 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
> >    0x00000004 0x00: eax=0x00000121 ebx=0x01c0003f ecx=0x0000003f edx=0x00000001
> >    0x00000004 0x01: eax=0x00000122 ebx=0x01c0003f ecx=0x0000003f edx=0x00000001
> >    0x00000004 0x02: eax=0x00000143 ebx=0x03c0003f ecx=0x00000fff edx=0x00000001
> >    0x00000004 0x03: eax=0x00000163 ebx=0x03c0003f ecx=0x00003fff edx=0x00000006
> >    0x00000005 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000003 edx=0x00000000
> >    0x00000006 0x00: eax=0x00000004 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
> >    0x00000007 0x00: eax=0x00000000 ebx=0x009c4fbb ecx=0x00000004 edx=0x84000000
> >    0x00000008 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
> >    0x00000009 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
> >    0x0000000a 0x00: eax=0x07300402 ebx=0x00000000 ecx=0x00000000 edx=0x00000603
> >--> MISSING CPUID.0xB.0
> >    0x0000000b 0x01: eax=0x00000000 ebx=0x00000001 ecx=0x00000201 edx=0x00000003
> >
> >CPUID.0xB.0 does not exist, so output.ECX=0, which indicates an invalid
> >level-type.
> >
> >The SDM states (for CPUID.0xB):
> >
> >    If an input value n in ECX returns the invalid level-type of 0 in ECX[15:8],
> >    other input values with ECX > n also return 0 in ECX[15:8]
> >
> >That means returning a valid level-type in CPUID.0xB.1 as above violates
> >the SDM's definition of how leaf 0xB works.  I'm arguing we can ignore the
> >adjustments that would be done on output.E{C,D} for an out of range leaf
> >because the model is bogus.
> 
> Right.
> 
> So we'd better do something in KVM_SET_CPUID* , to avoid userspace set bogus
> cpuid.
> 
> >>Supposing the max basic is 0x1f, and it queries cpuid(0x20, 0x5),
> >>it should return cpuid(0x1f, 0x5).
> >>
> >>But based on this patch, it returns all zeros.
> >
> >Have you tested the patch, or is your comment based on the above discussion
> >and/or code inspection?  Honest question, because I've thoroughly tested
> >the above scenario and it works as you describe, but now I'm worried I
> >completely botched my testing.
> >
> 
> No, I didn't test.
> 
> Leaf 0xB and 0x1f are special cases when they are the maximum basic leaf,
> because no matter what subleaf is, there is always a non-zero E[CX,DX].
> 
> If cpuid.0 returns maximum basic leaf as 0xB/0x1F, when queried leaf is
> greater, it should always return a non-zero value.

Yes, and that's userspace's responsibility to not screw up.  E.g. if
userspace didn't create CPUID.0xB.0 (as above) then it's not KVM's fault
for returning zeros when the guest executes CPUID.0xB.0.
Xiaoyao Li March 3, 2020, 4:16 a.m. UTC | #10
On 3/3/2020 12:08 PM, Sean Christopherson wrote:
> On Tue, Mar 03, 2020 at 10:50:03AM +0800, Xiaoyao Li wrote:
>> On 3/3/2020 3:57 AM, Sean Christopherson wrote:
>>> Rework kvm_cpuid() to query entry->function when adjusting the output
>>> values so that the original function (in the aptly named "function") is
>>> preserved for tracing.  This fixes a bug where trace_kvm_cpuid() will
>>> trace the max function for a range instead of the requested function if
>>> the requested function is out-of-range and an entry for the max function
>>> exists.
>>>
>>> Fixes: 43561123ab37 ("kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH")
>>> Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
>>> Cc: Jim Mattson <jmattson@google.com>
>>> Cc: Xiaoyao Li <xiaoyao.li@intel.com>
>>> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
>>> ---
>>>   arch/x86/kvm/cpuid.c | 15 +++++++--------
>>>   1 file changed, 7 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>> index b1c469446b07..6be012937eba 100644
>>> --- a/arch/x86/kvm/cpuid.c
>>> +++ b/arch/x86/kvm/cpuid.c
>>> @@ -997,12 +997,12 @@ static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
>>>   	return max && function <= max->eax;
>>>   }
>>> +/* Returns true if the requested leaf/function exists in guest CPUID. */
>>>   bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>>>   	       u32 *ecx, u32 *edx, bool check_limit)
>>>   {
>>> -	u32 function = *eax, index = *ecx;
>>> +	const u32 function = *eax, index = *ecx;
>>>   	struct kvm_cpuid_entry2 *entry;
>>> -	struct kvm_cpuid_entry2 *max;
>>>   	bool found;
>>>   	entry = kvm_find_cpuid_entry(vcpu, function, index);
>>> @@ -1015,18 +1015,17 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>>>   	 */
>>>   	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
>>>   	    !cpuid_function_in_range(vcpu, function)) {
>>> -		max = kvm_find_cpuid_entry(vcpu, 0, 0);
>>> -		if (max) {
>>> -			function = max->eax;
>>> -			entry = kvm_find_cpuid_entry(vcpu, function, index);
>>> -		}
>>> +		entry = kvm_find_cpuid_entry(vcpu, 0, 0);
>>> +		if (entry)
>>> +			entry = kvm_find_cpuid_entry(vcpu, entry->eax, index);
>>
>> There is a problem.
>>
>> when queried leaf is out of range on Intel CPU, it returns the maximum basic
>> leaf, and any dependence on input ECX (i.e., subleaf) value in the basic
>> leaf is honored. As disclaimed in SDM of CPUID instruction.
> 
> That's what the code above does.
> 
>> The ECX should be honored if and only the leaf has a significant index.
>> If the leaf doesn't has a significant index, it just ignores the EDX input
> 
> s/EDX/ECX
> 
>> in bare metal.
>>
>> So it should be something like:
>>
>> if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
>> 	!cpuid_function_in_range(vcpu, function)) {
>> 	entry = kvm_find_cpuid_entry(vcpu, 0, 0);
>> 	if (entry) {
>> 		entry = kvm_find_cpuid_entry(vcpu, entry->eax, 0);
>> 		if (entry &&
>> 		    entry->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX ) {
> 
> This is unnecessary IMO.  The only scenario where SIGNFICANT_INDEX is 0
> and cpuid_entry(entry->eax, 0) != cpuid_entry(entry->eax, index) is if
> userspace created a cpuid entry for index>0 with SIGNFICANT_INDEX.  

I just forgot that is_matching_cpuid_entry() has taken SIGNIFICANT_INDEX 
into account.

Please ignore my stupid noise.

> a busted model, e.g. it'd be the SDM equivalent of an Intel CPU having
> different output for CPUID.0x16.0 and CPUID.16.5 despite the SDM stating
> that the CPUID.0x16 ignores the index.
> 
> E.g. on my system with a max basic leaf of 0x16
> 
> $ cpuid -1 -r
> CPU:
>     0x00000000 0x00: eax=0x00000016 ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69
> ...
>     0x00000016 0x00: eax=0x00000e74 ebx=0x0000125c ecx=0x00000064 edx=0x00000000
> 
> $ cpuid -1 -r -l 0x16
> CPU:
>     0x00000016 0x00: eax=0x00000e74 ebx=0x0000125c ecx=0x00000064 edx=0x00000000
> ~ $ cpuid -1 -r -l 0x16 -s 4
> CPU:
>     0x00000016 0x04: eax=0x00000e74 ebx=0x0000125c ecx=0x00000064 edx=0x00000000
> ~ $ cpuid -1 -r -l 0x16 -s 466
> CPU:
>     0x00000016 0x1d2: eax=0x00000e74 ebx=0x0000125c ecx=0x00000064 edx=0x00000000
> 
> 
> If it returned anything else for CPUID.0x16.0x4 then it'd be a CPU bug.
> Same thing here, it's a userspace bug if it creates a CPUID entry that
> shouldn't exist.  E.g. ignoring Intel's silly "max basic leaf" behavior
> for the moment, if userspace created a entry for CPUID.0x0.N it would
> break the Linux kernel's cpu_detect(), as it doesn't initialize ECX when
> doing CPUID.0x0.
> 
>> 			entry = kvm_find_cpuid_entry(vcpu, entry->eax,
>> 						     index);
>> 		}
>> 	}
>> }
>>
>>>   	}
>>>   	if (entry) {
>>>   		*eax = entry->eax;
>>>   		*ebx = entry->ebx;
>>>   		*ecx = entry->ecx;
>>>   		*edx = entry->edx;
>>> -		if (function == 7 && index == 0) {
>>> +
>>> +		if (entry->function == 7 && index == 0) {
>>>   			u64 data;
>>>   		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
>>>   			    (data & TSX_CTRL_CPUID_CLEAR))
>>>
>>
Xiaoyao Li March 3, 2020, 4:30 a.m. UTC | #11
On 3/3/2020 12:12 PM, Sean Christopherson wrote:
> On Tue, Mar 03, 2020 at 12:02:39PM +0800, Xiaoyao Li wrote:
>> On 3/3/2020 11:45 AM, Sean Christopherson wrote:
>>> On Tue, Mar 03, 2020 at 10:27:47AM +0800, Xiaoyao Li wrote:
>>>> Sorry I cannot catch you. Why it's a violation of Intel's SDM?
>>>
>>> The case being discussed above would look like:
>>>
>>> KVM CPUID Entries:
>>>     Function   Index Output
>>>     0x00000000 0x00: eax=0x0000000b ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69
>>>     0x00000001 0x00: eax=0x000906ea ebx=0x03000800 ecx=0xfffa3223 edx=0x0f8bfbff
>>>     0x00000002 0x00: eax=0x00000001 ebx=0x00000000 ecx=0x0000004d edx=0x002c307d
>>>     0x00000003 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
>>>     0x00000004 0x00: eax=0x00000121 ebx=0x01c0003f ecx=0x0000003f edx=0x00000001
>>>     0x00000004 0x01: eax=0x00000122 ebx=0x01c0003f ecx=0x0000003f edx=0x00000001
>>>     0x00000004 0x02: eax=0x00000143 ebx=0x03c0003f ecx=0x00000fff edx=0x00000001
>>>     0x00000004 0x03: eax=0x00000163 ebx=0x03c0003f ecx=0x00003fff edx=0x00000006
>>>     0x00000005 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000003 edx=0x00000000
>>>     0x00000006 0x00: eax=0x00000004 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
>>>     0x00000007 0x00: eax=0x00000000 ebx=0x009c4fbb ecx=0x00000004 edx=0x84000000
>>>     0x00000008 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
>>>     0x00000009 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
>>>     0x0000000a 0x00: eax=0x07300402 ebx=0x00000000 ecx=0x00000000 edx=0x00000603
>>> --> MISSING CPUID.0xB.0
>>>     0x0000000b 0x01: eax=0x00000000 ebx=0x00000001 ecx=0x00000201 edx=0x00000003
>>>
>>> CPUID.0xB.0 does not exist, so output.ECX=0, which indicates an invalid
>>> level-type.
>>>
>>> The SDM states (for CPUID.0xB):
>>>
>>>     If an input value n in ECX returns the invalid level-type of 0 in ECX[15:8],
>>>     other input values with ECX > n also return 0 in ECX[15:8]
>>>
>>> That means returning a valid level-type in CPUID.0xB.1 as above violates
>>> the SDM's definition of how leaf 0xB works.  I'm arguing we can ignore the
>>> adjustments that would be done on output.E{C,D} for an out of range leaf
>>> because the model is bogus.
>>
>> Right.
>>
>> So we'd better do something in KVM_SET_CPUID* , to avoid userspace set bogus
>> cpuid.
>>
>>>> Supposing the max basic is 0x1f, and it queries cpuid(0x20, 0x5),
>>>> it should return cpuid(0x1f, 0x5).
>>>>
>>>> But based on this patch, it returns all zeros.
>>>
>>> Have you tested the patch, or is your comment based on the above discussion
>>> and/or code inspection?  Honest question, because I've thoroughly tested
>>> the above scenario and it works as you describe, but now I'm worried I
>>> completely botched my testing.
>>>
>>
>> No, I didn't test.
>>
>> Leaf 0xB and 0x1f are special cases when they are the maximum basic leaf,
>> because no matter what subleaf is, there is always a non-zero E[CX,DX].
>>
>> If cpuid.0 returns maximum basic leaf as 0xB/0x1F, when queried leaf is
>> greater, it should always return a non-zero value.
> 
> Yes, and that's userspace's responsibility to not screw up.  E.g. if
> userspace didn't create CPUID.0xB.0 (as above) then it's not KVM's fault
> for returning zeros when the guest executes CPUID.0xB.0.
> 

But this needs userspace to create all the subleaf of 0xB/0x1F. with a 
correct userspace, for example, it only creates one more subleaf of 
0xB/0x1F to indicate from this subleaf, no valid level-type anymore.

So when maximum basic leaf is 0x1f, cpuid(0x20, bigger than the first 
invalid subleaf created by userspace) returns all-zero.
diff mbox series

Patch

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b1c469446b07..6be012937eba 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -997,12 +997,12 @@  static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
 	return max && function <= max->eax;
 }
 
+/* Returns true if the requested leaf/function exists in guest CPUID. */
 bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
 	       u32 *ecx, u32 *edx, bool check_limit)
 {
-	u32 function = *eax, index = *ecx;
+	const u32 function = *eax, index = *ecx;
 	struct kvm_cpuid_entry2 *entry;
-	struct kvm_cpuid_entry2 *max;
 	bool found;
 
 	entry = kvm_find_cpuid_entry(vcpu, function, index);
@@ -1015,18 +1015,17 @@  bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
 	 */
 	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
 	    !cpuid_function_in_range(vcpu, function)) {
-		max = kvm_find_cpuid_entry(vcpu, 0, 0);
-		if (max) {
-			function = max->eax;
-			entry = kvm_find_cpuid_entry(vcpu, function, index);
-		}
+		entry = kvm_find_cpuid_entry(vcpu, 0, 0);
+		if (entry)
+			entry = kvm_find_cpuid_entry(vcpu, entry->eax, index);
 	}
 	if (entry) {
 		*eax = entry->eax;
 		*ebx = entry->ebx;
 		*ecx = entry->ecx;
 		*edx = entry->edx;
-		if (function == 7 && index == 0) {
+
+		if (entry->function == 7 && index == 0) {
 			u64 data;
 		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
 			    (data & TSX_CTRL_CPUID_CLEAR))