diff mbox series

[3/3] KVM: VMX: simplify MSR interception enable/disable

Message ID 20240217014513.7853-4-dongli.zhang@oracle.com (mailing list archive)
State New, archived
Headers show
Series KVM: VMX: MSR intercept/passthrough cleanup and simplification | expand

Commit Message

Dongli Zhang Feb. 17, 2024, 1:45 a.m. UTC
Currently, the is_valid_passthrough_msr() is only called by two sites:
vmx_enable_intercept_for_msr() and vmx_disable_intercept_for_msr(). The
is_valid_passthrough_msr() is called for two reasons.

1. Do WARN() if the input msr is neither x2APIC/PT/LBR passthrough MSRs,
nor the possible passthrough MSRs.

2. Return if the msr is a possible passthrough MSR.

While the is_valid_passthrough_msr() may traverse the
vmx_possible_passthrough_msrs[], the following
possible_passthrough_msr_slot() may traverse the save array again. There
is no need to call possible_passthrough_msr_slot() twice.

vmx_disable_intercept_for_msr()
-> is_valid_passthrough_msr()
   -> possible_passthrough_msr_slot()
-> possible_passthrough_msr_slot()

Therefore, we merge the is_valid_passthrough_msr() and the following
possible_passthrough_msr_slot() into the same function:

- If the msr is not any passthrough MSR, WARN and return -ENOENT.
- Return VMX_OTHER_PASSTHROUGH if x2apic/PT/LBR.
- Return VMX_POSSIBLE_PASSTHROUGH and set possible_idx, if possible
passthrough MSRs.

Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
---
 arch/x86/kvm/vmx/vmx.c | 55 +++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

Comments

Sean Christopherson Feb. 19, 2024, 10:33 p.m. UTC | #1
On Fri, Feb 16, 2024, Dongli Zhang wrote:
> ---
>  arch/x86/kvm/vmx/vmx.c | 55 +++++++++++++++++++++---------------------
>  1 file changed, 28 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 5a866d3c2bc8..76dff0e7d8bd 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -669,14 +669,18 @@ static int possible_passthrough_msr_slot(u32 msr)
>  	return -ENOENT;
>  }
>  
> -static bool is_valid_passthrough_msr(u32 msr)
> +#define VMX_POSSIBLE_PASSTHROUGH	1
> +#define VMX_OTHER_PASSTHROUGH		2
> +/*
> + * Vefify if the msr is the passthrough MSRs.
> + * Return the index in *possible_idx if it is a possible passthrough MSR.
> + */
> +static int validate_passthrough_msr(u32 msr, int *possible_idx)

There's no need for a custom tri-state return value or an out-param, just return
the slot/-ENOENT.  Not fully tested yet, but this should do the trick.

From: Sean Christopherson <seanjc@google.com>
Date: Mon, 19 Feb 2024 07:58:10 -0800
Subject: [PATCH] KVM: VMX: Combine "check" and "get" APIs for passthrough MSR
 lookups

Combine possible_passthrough_msr_slot() and is_valid_passthrough_msr()
into a single function, vmx_get_passthrough_msr_slot(), and have the
combined helper return the slot on success, using a negative value to
indiciate "failure".

Combining the operations avoids iterating over the array of passthrough
MSRs twice for relevant MSRs.

Suggested-by: Dongli Zhang <dongli.zhang@oracle.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/vmx.c | 63 +++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 014cf47dc66b..969fd3aa0da3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -658,25 +658,14 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
 	return flexpriority_enabled && lapic_in_kernel(vcpu);
 }
 
-static int possible_passthrough_msr_slot(u32 msr)
+static int vmx_get_passthrough_msr_slot(u32 msr)
 {
-	u32 i;
-
-	for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++)
-		if (vmx_possible_passthrough_msrs[i] == msr)
-			return i;
-
-	return -ENOENT;
-}
-
-static bool is_valid_passthrough_msr(u32 msr)
-{
-	bool r;
+	int i;
 
 	switch (msr) {
 	case 0x800 ... 0x8ff:
 		/* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
-		return true;
+		return -ENOENT;
 	case MSR_IA32_RTIT_STATUS:
 	case MSR_IA32_RTIT_OUTPUT_BASE:
 	case MSR_IA32_RTIT_OUTPUT_MASK:
@@ -691,14 +680,16 @@ static bool is_valid_passthrough_msr(u32 msr)
 	case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
 	case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
 		/* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */
-		return true;
+		return -ENOENT;
 	}
 
-	r = possible_passthrough_msr_slot(msr) != -ENOENT;
-
-	WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+	for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
+		if (vmx_possible_passthrough_msrs[i] == msr)
+			return i;
+	}
 
-	return r;
+	WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+	return -ENOENT;
 }
 
 struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
@@ -3954,6 +3945,7 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+	int idx;
 
 	if (!cpu_has_vmx_msr_bitmap())
 		return;
@@ -3963,16 +3955,13 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 	/*
 	 * Mark the desired intercept state in shadow bitmap, this is needed
 	 * for resync when the MSR filters change.
-	*/
-	if (is_valid_passthrough_msr(msr)) {
-		int idx = possible_passthrough_msr_slot(msr);
-
-		if (idx != -ENOENT) {
-			if (type & MSR_TYPE_R)
-				clear_bit(idx, vmx->shadow_msr_intercept.read);
-			if (type & MSR_TYPE_W)
-				clear_bit(idx, vmx->shadow_msr_intercept.write);
-		}
+	 */
+	idx = vmx_get_passthrough_msr_slot(msr);
+	if (idx >= 0) {
+		if (type & MSR_TYPE_R)
+			clear_bit(idx, vmx->shadow_msr_intercept.read);
+		if (type & MSR_TYPE_W)
+			clear_bit(idx, vmx->shadow_msr_intercept.write);
 	}
 
 	if ((type & MSR_TYPE_R) &&
@@ -3998,6 +3987,7 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+	int idx;
 
 	if (!cpu_has_vmx_msr_bitmap())
 		return;
@@ -4008,15 +3998,12 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 	 * Mark the desired intercept state in shadow bitmap, this is needed
 	 * for resync when the MSR filter changes.
 	*/
-	if (is_valid_passthrough_msr(msr)) {
-		int idx = possible_passthrough_msr_slot(msr);
-
-		if (idx != -ENOENT) {
-			if (type & MSR_TYPE_R)
-				set_bit(idx, vmx->shadow_msr_intercept.read);
-			if (type & MSR_TYPE_W)
-				set_bit(idx, vmx->shadow_msr_intercept.write);
-		}
+	idx = vmx_get_passthrough_msr_slot(msr);
+	if (idx >= 0) {
+		if (type & MSR_TYPE_R)
+			set_bit(idx, vmx->shadow_msr_intercept.read);
+		if (type & MSR_TYPE_W)
+			set_bit(idx, vmx->shadow_msr_intercept.write);
 	}
 
 	if (type & MSR_TYPE_R)

base-commit: 342c6dfc2a0ae893394a6f894acd1d1728c009f2
--
Dongli Zhang Feb. 20, 2024, 9:50 p.m. UTC | #2
Hi Sean,

On 2/19/24 14:33, Sean Christopherson wrote:
> On Fri, Feb 16, 2024, Dongli Zhang wrote:
>> ---
>>  arch/x86/kvm/vmx/vmx.c | 55 +++++++++++++++++++++---------------------
>>  1 file changed, 28 insertions(+), 27 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index 5a866d3c2bc8..76dff0e7d8bd 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -669,14 +669,18 @@ static int possible_passthrough_msr_slot(u32 msr)
>>  	return -ENOENT;
>>  }
>>  
>> -static bool is_valid_passthrough_msr(u32 msr)
>> +#define VMX_POSSIBLE_PASSTHROUGH	1
>> +#define VMX_OTHER_PASSTHROUGH		2
>> +/*
>> + * Vefify if the msr is the passthrough MSRs.
>> + * Return the index in *possible_idx if it is a possible passthrough MSR.
>> + */
>> +static int validate_passthrough_msr(u32 msr, int *possible_idx)
> 
> There's no need for a custom tri-state return value or an out-param, just return
> the slot/-ENOENT.  Not fully tested yet, but this should do the trick.

The new patch looks good to me, from functionality's perspective.

Just that the new patched function looks confusing. That's why I was adding the
out-param initially to differentiate from different cases.

The new vmx_get_passthrough_msr_slot() is just doing the trick by combining many
jobs together:

1. Get the possible passthrough msr slot index.

2. For x2APIC/PT/LBR msr, return -ENOENT.

3. For other msr, return the same -ENOENT, with a WARN.

The semantics of the function look confusing.

If the objective is to return passthrough msr slot, why return -ENOENT for
x2APIC/PT/LBR.

Why both x2APIC/PT/LBR and other MSRs return the same -ENOENT, while the other
MSRs may trigger WARN. (I know this is because the other MSRs do not belong to
any passthrough MSRs).


 661 static int vmx_get_passthrough_msr_slot(u32 msr)
 662 {
 663         int i;
 664
 665         switch (msr) {
 666         case 0x800 ... 0x8ff:
 667                 /* x2APIC MSRs. These are handled in
vmx_update_msr_bitmap_x2apic() */
 668                 return -ENOENT;
 669         case MSR_IA32_RTIT_STATUS:
 670         case MSR_IA32_RTIT_OUTPUT_BASE:
 671         case MSR_IA32_RTIT_OUTPUT_MASK:
 672         case MSR_IA32_RTIT_CR3_MATCH:
 673         case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
 674                 /* PT MSRs. These are handled in
pt_update_intercept_for_msr() */
 675         case MSR_LBR_SELECT:
 676         case MSR_LBR_TOS:
 677         case MSR_LBR_INFO_0 ... MSR_LBR_INFO_0 + 31:
 678         case MSR_LBR_NHM_FROM ... MSR_LBR_NHM_FROM + 31:
 679         case MSR_LBR_NHM_TO ... MSR_LBR_NHM_TO + 31:
 680         case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
 681         case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
 682                 /* LBR MSRs. These are handled in
vmx_update_intercept_for_lbr_msrs() */
 683                 return -ENOENT;
 684         }
 685
 686         for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
 687                 if (vmx_possible_passthrough_msrs[i] == msr)
 688                         return i;
 689         }
 690
 691         WARN(1, "Invalid MSR %x, please adapt
vmx_possible_passthrough_msrs[]", msr);
 692         return -ENOENT;
 693 }


The patch looks good to me.

Thank you very much!

Dongli Zhang

> 
> From: Sean Christopherson <seanjc@google.com>
> Date: Mon, 19 Feb 2024 07:58:10 -0800
> Subject: [PATCH] KVM: VMX: Combine "check" and "get" APIs for passthrough MSR
>  lookups
> 
> Combine possible_passthrough_msr_slot() and is_valid_passthrough_msr()
> into a single function, vmx_get_passthrough_msr_slot(), and have the
> combined helper return the slot on success, using a negative value to
> indiciate "failure".
> 
> Combining the operations avoids iterating over the array of passthrough
> MSRs twice for relevant MSRs.
> 
> Suggested-by: Dongli Zhang <dongli.zhang@oracle.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 63 +++++++++++++++++-------------------------
>  1 file changed, 25 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 014cf47dc66b..969fd3aa0da3 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -658,25 +658,14 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
>  	return flexpriority_enabled && lapic_in_kernel(vcpu);
>  }
>  
> -static int possible_passthrough_msr_slot(u32 msr)
> +static int vmx_get_passthrough_msr_slot(u32 msr)
>  {
> -	u32 i;
> -
> -	for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++)
> -		if (vmx_possible_passthrough_msrs[i] == msr)
> -			return i;
> -
> -	return -ENOENT;
> -}
> -
> -static bool is_valid_passthrough_msr(u32 msr)
> -{
> -	bool r;
> +	int i;
>  
>  	switch (msr) {
>  	case 0x800 ... 0x8ff:
>  		/* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
> -		return true;
> +		return -ENOENT;
>  	case MSR_IA32_RTIT_STATUS:
>  	case MSR_IA32_RTIT_OUTPUT_BASE:
>  	case MSR_IA32_RTIT_OUTPUT_MASK:
> @@ -691,14 +680,16 @@ static bool is_valid_passthrough_msr(u32 msr)
>  	case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
>  	case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
>  		/* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */
> -		return true;
> +		return -ENOENT;
>  	}
>  
> -	r = possible_passthrough_msr_slot(msr) != -ENOENT;
> -
> -	WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
> +	for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
> +		if (vmx_possible_passthrough_msrs[i] == msr)
> +			return i;
> +	}
>  
> -	return r;
> +	WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
> +	return -ENOENT;
>  }
>  
>  struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
> @@ -3954,6 +3945,7 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
> +	int idx;
>  
>  	if (!cpu_has_vmx_msr_bitmap())
>  		return;
> @@ -3963,16 +3955,13 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
>  	/*
>  	 * Mark the desired intercept state in shadow bitmap, this is needed
>  	 * for resync when the MSR filters change.
> -	*/
> -	if (is_valid_passthrough_msr(msr)) {
> -		int idx = possible_passthrough_msr_slot(msr);
> -
> -		if (idx != -ENOENT) {
> -			if (type & MSR_TYPE_R)
> -				clear_bit(idx, vmx->shadow_msr_intercept.read);
> -			if (type & MSR_TYPE_W)
> -				clear_bit(idx, vmx->shadow_msr_intercept.write);
> -		}
> +	 */
> +	idx = vmx_get_passthrough_msr_slot(msr);
> +	if (idx >= 0) {
> +		if (type & MSR_TYPE_R)
> +			clear_bit(idx, vmx->shadow_msr_intercept.read);
> +		if (type & MSR_TYPE_W)
> +			clear_bit(idx, vmx->shadow_msr_intercept.write);
>  	}
>  
>  	if ((type & MSR_TYPE_R) &&
> @@ -3998,6 +3987,7 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
> +	int idx;
>  
>  	if (!cpu_has_vmx_msr_bitmap())
>  		return;
> @@ -4008,15 +3998,12 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
>  	 * Mark the desired intercept state in shadow bitmap, this is needed
>  	 * for resync when the MSR filter changes.
>  	*/
> -	if (is_valid_passthrough_msr(msr)) {
> -		int idx = possible_passthrough_msr_slot(msr);
> -
> -		if (idx != -ENOENT) {
> -			if (type & MSR_TYPE_R)
> -				set_bit(idx, vmx->shadow_msr_intercept.read);
> -			if (type & MSR_TYPE_W)
> -				set_bit(idx, vmx->shadow_msr_intercept.write);
> -		}
> +	idx = vmx_get_passthrough_msr_slot(msr);
> +	if (idx >= 0) {
> +		if (type & MSR_TYPE_R)
> +			set_bit(idx, vmx->shadow_msr_intercept.read);
> +		if (type & MSR_TYPE_W)
> +			set_bit(idx, vmx->shadow_msr_intercept.write);
>  	}
>  
>  	if (type & MSR_TYPE_R)
> 
> base-commit: 342c6dfc2a0ae893394a6f894acd1d1728c009f2
Sean Christopherson Feb. 21, 2024, 3:43 p.m. UTC | #3
On Tue, Feb 20, 2024, Dongli Zhang wrote:
> Hi Sean,
> 
> On 2/19/24 14:33, Sean Christopherson wrote:
> > On Fri, Feb 16, 2024, Dongli Zhang wrote:
> >> ---
> >>  arch/x86/kvm/vmx/vmx.c | 55 +++++++++++++++++++++---------------------
> >>  1 file changed, 28 insertions(+), 27 deletions(-)
> >>
> >> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> >> index 5a866d3c2bc8..76dff0e7d8bd 100644
> >> --- a/arch/x86/kvm/vmx/vmx.c
> >> +++ b/arch/x86/kvm/vmx/vmx.c
> >> @@ -669,14 +669,18 @@ static int possible_passthrough_msr_slot(u32 msr)
> >>  	return -ENOENT;
> >>  }
> >>  
> >> -static bool is_valid_passthrough_msr(u32 msr)
> >> +#define VMX_POSSIBLE_PASSTHROUGH	1
> >> +#define VMX_OTHER_PASSTHROUGH		2
> >> +/*
> >> + * Vefify if the msr is the passthrough MSRs.
> >> + * Return the index in *possible_idx if it is a possible passthrough MSR.
> >> + */
> >> +static int validate_passthrough_msr(u32 msr, int *possible_idx)
> > 
> > There's no need for a custom tri-state return value or an out-param, just return
> > the slot/-ENOENT.  Not fully tested yet, but this should do the trick.
> 
> The new patch looks good to me, from functionality's perspective.
> 
> Just that the new patched function looks confusing. That's why I was adding the
> out-param initially to differentiate from different cases.
> 
> The new vmx_get_passthrough_msr_slot() is just doing the trick by combining many
> jobs together:
> 
> 1. Get the possible passthrough msr slot index.
> 
> 2. For x2APIC/PT/LBR msr, return -ENOENT.
> 
> 3. For other msr, return the same -ENOENT, with a WARN.
> 
> The semantics of the function look confusing.
> 
> If the objective is to return passthrough msr slot, why return -ENOENT for
> x2APIC/PT/LBR.

Because there is no "slot" for them in vmx_possible_passthrough_msrs, and the
main purpose of the helpers is to get that slot in order to efficiently update
the MSR bitmaps in response to userspace MSR filter changes.  The WARN is an extra
sanity check to ensure that KVM doesn't start passing through an MSR without
adding the MSR to vmx_possible_passthrough_msrs (or special casing it a la XAPIC,
PT, and LBR MSRS).

> Why both x2APIC/PT/LBR and other MSRs return the same -ENOENT, while the other
> MSRs may trigger WARN. (I know this is because the other MSRs do not belong to
> any passthrough MSRs).

The x2APIC/PT/LBR MSRs are given special treatment: KVM may pass them through to
the guest, but unlike the "regular" passthrough MSRs, userspace is NOT allowed to
override that behavior via MSR filters.

And so as mentioned above, they don't have a slot in vmx_possible_passthrough_msrs.
Dongli Zhang Feb. 21, 2024, 4:50 p.m. UTC | #4
On 2/21/24 07:43, Sean Christopherson wrote:
> On Tue, Feb 20, 2024, Dongli Zhang wrote:
>> Hi Sean,
>>
>> On 2/19/24 14:33, Sean Christopherson wrote:
>>> On Fri, Feb 16, 2024, Dongli Zhang wrote:
>>>> ---
>>>>  arch/x86/kvm/vmx/vmx.c | 55 +++++++++++++++++++++---------------------
>>>>  1 file changed, 28 insertions(+), 27 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>>>> index 5a866d3c2bc8..76dff0e7d8bd 100644
>>>> --- a/arch/x86/kvm/vmx/vmx.c
>>>> +++ b/arch/x86/kvm/vmx/vmx.c
>>>> @@ -669,14 +669,18 @@ static int possible_passthrough_msr_slot(u32 msr)
>>>>  	return -ENOENT;
>>>>  }
>>>>  
>>>> -static bool is_valid_passthrough_msr(u32 msr)
>>>> +#define VMX_POSSIBLE_PASSTHROUGH	1
>>>> +#define VMX_OTHER_PASSTHROUGH		2
>>>> +/*
>>>> + * Vefify if the msr is the passthrough MSRs.
>>>> + * Return the index in *possible_idx if it is a possible passthrough MSR.
>>>> + */
>>>> +static int validate_passthrough_msr(u32 msr, int *possible_idx)
>>>
>>> There's no need for a custom tri-state return value or an out-param, just return
>>> the slot/-ENOENT.  Not fully tested yet, but this should do the trick.
>>
>> The new patch looks good to me, from functionality's perspective.
>>
>> Just that the new patched function looks confusing. That's why I was adding the
>> out-param initially to differentiate from different cases.
>>
>> The new vmx_get_passthrough_msr_slot() is just doing the trick by combining many
>> jobs together:
>>
>> 1. Get the possible passthrough msr slot index.
>>
>> 2. For x2APIC/PT/LBR msr, return -ENOENT.
>>
>> 3. For other msr, return the same -ENOENT, with a WARN.
>>
>> The semantics of the function look confusing.
>>
>> If the objective is to return passthrough msr slot, why return -ENOENT for
>> x2APIC/PT/LBR.
> 
> Because there is no "slot" for them in vmx_possible_passthrough_msrs, and the
> main purpose of the helpers is to get that slot in order to efficiently update
> the MSR bitmaps in response to userspace MSR filter changes.  The WARN is an extra
> sanity check to ensure that KVM doesn't start passing through an MSR without
> adding the MSR to vmx_possible_passthrough_msrs (or special casing it a la XAPIC,
> PT, and LBR MSRS).
> 
>> Why both x2APIC/PT/LBR and other MSRs return the same -ENOENT, while the other
>> MSRs may trigger WARN. (I know this is because the other MSRs do not belong to
>> any passthrough MSRs).
> 
> The x2APIC/PT/LBR MSRs are given special treatment: KVM may pass them through to
> the guest, but unlike the "regular" passthrough MSRs, userspace is NOT allowed to
> override that behavior via MSR filters.
> 
> And so as mentioned above, they don't have a slot in vmx_possible_passthrough_msrs.

Thank you very much for the explanation! This looks good to me.

Dongli Zhang
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5a866d3c2bc8..76dff0e7d8bd 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -669,14 +669,18 @@  static int possible_passthrough_msr_slot(u32 msr)
 	return -ENOENT;
 }
 
-static bool is_valid_passthrough_msr(u32 msr)
+#define VMX_POSSIBLE_PASSTHROUGH	1
+#define VMX_OTHER_PASSTHROUGH		2
+/*
+ * Vefify if the msr is the passthrough MSRs.
+ * Return the index in *possible_idx if it is a possible passthrough MSR.
+ */
+static int validate_passthrough_msr(u32 msr, int *possible_idx)
 {
-	bool r;
-
 	switch (msr) {
 	case 0x800 ... 0x8ff:
 		/* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
-		return true;
+		return VMX_OTHER_PASSTHROUGH;
 	case MSR_IA32_RTIT_STATUS:
 	case MSR_IA32_RTIT_OUTPUT_BASE:
 	case MSR_IA32_RTIT_OUTPUT_MASK:
@@ -691,14 +695,17 @@  static bool is_valid_passthrough_msr(u32 msr)
 	case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
 	case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
 		/* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */
-		return true;
+		return VMX_OTHER_PASSTHROUGH;
 	}
 
-	r = possible_passthrough_msr_slot(msr) != -ENOENT;
+	*possible_idx = possible_passthrough_msr_slot(msr);
+	WARN(*possible_idx == -ENOENT,
+	     "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
 
-	WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+	if (*possible_idx >= 0)
+		return VMX_POSSIBLE_PASSTHROUGH;
 
-	return r;
+	return -ENOENT;
 }
 
 struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
@@ -3954,6 +3961,7 @@  void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+	int idx;
 
 	if (!cpu_has_vmx_msr_bitmap())
 		return;
@@ -3963,16 +3971,12 @@  void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 	/*
 	 * Mark the desired intercept state in shadow bitmap, this is needed
 	 * for resync when the MSR filters change.
-	*/
-	if (is_valid_passthrough_msr(msr)) {
-		int idx = possible_passthrough_msr_slot(msr);
-
-		if (idx != -ENOENT) {
-			if (type & MSR_TYPE_R)
-				clear_bit(idx, vmx->shadow_msr_intercept.read);
-			if (type & MSR_TYPE_W)
-				clear_bit(idx, vmx->shadow_msr_intercept.write);
-		}
+	 */
+	if (validate_passthrough_msr(msr, &idx) == VMX_POSSIBLE_PASSTHROUGH) {
+		if (type & MSR_TYPE_R)
+			clear_bit(idx, vmx->shadow_msr_intercept.read);
+		if (type & MSR_TYPE_W)
+			clear_bit(idx, vmx->shadow_msr_intercept.write);
 	}
 
 	if ((type & MSR_TYPE_R) &&
@@ -3998,6 +4002,7 @@  void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+	int idx;
 
 	if (!cpu_has_vmx_msr_bitmap())
 		return;
@@ -4008,15 +4013,11 @@  void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 	 * Mark the desired intercept state in shadow bitmap, this is needed
 	 * for resync when the MSR filter changes.
 	*/
-	if (is_valid_passthrough_msr(msr)) {
-		int idx = possible_passthrough_msr_slot(msr);
-
-		if (idx != -ENOENT) {
-			if (type & MSR_TYPE_R)
-				set_bit(idx, vmx->shadow_msr_intercept.read);
-			if (type & MSR_TYPE_W)
-				set_bit(idx, vmx->shadow_msr_intercept.write);
-		}
+	if (validate_passthrough_msr(msr, &idx) == VMX_POSSIBLE_PASSTHROUGH) {
+		if (type & MSR_TYPE_R)
+			set_bit(idx, vmx->shadow_msr_intercept.read);
+		if (type & MSR_TYPE_W)
+			set_bit(idx, vmx->shadow_msr_intercept.write);
 	}
 
 	if (type & MSR_TYPE_R)