diff mbox series

[v12,07/11] KVM: vmx/pmu: Unmask LBR fields in the MSR_IA32_DEBUGCTLMSR emualtion

Message ID 20200613080958.132489-8-like.xu@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Guest Last Branch Recording Enabling | expand

Commit Message

Like Xu June 13, 2020, 8:09 a.m. UTC
When the LBR feature is reported by the vmx_get_perf_capabilities(),
the LBR fields in the [vmx|vcpu]_supported debugctl should be unmasked.

The debugctl msr is handled separately in vmx/svm and they're not
completely identical, hence remove the common msr handling code.

Signed-off-by: Like Xu <like.xu@linux.intel.com>
---
 arch/x86/kvm/vmx/capabilities.h | 12 ++++++++++++
 arch/x86/kvm/vmx/pmu_intel.c    | 19 +++++++++++++++++++
 arch/x86/kvm/x86.c              | 13 -------------
 3 files changed, 31 insertions(+), 13 deletions(-)

Comments

Xiaoyao Li June 13, 2020, 9:14 a.m. UTC | #1
On 6/13/2020 4:09 PM, Like Xu wrote:
> When the LBR feature is reported by the vmx_get_perf_capabilities(),
> the LBR fields in the [vmx|vcpu]_supported debugctl should be unmasked.
> 
> The debugctl msr is handled separately in vmx/svm and they're not
> completely identical, hence remove the common msr handling code.
> 
> Signed-off-by: Like Xu <like.xu@linux.intel.com>
> ---
>   arch/x86/kvm/vmx/capabilities.h | 12 ++++++++++++
>   arch/x86/kvm/vmx/pmu_intel.c    | 19 +++++++++++++++++++
>   arch/x86/kvm/x86.c              | 13 -------------
>   3 files changed, 31 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
> index b633a90320ee..f6fcfabb1026 100644
> --- a/arch/x86/kvm/vmx/capabilities.h
> +++ b/arch/x86/kvm/vmx/capabilities.h
> @@ -21,6 +21,8 @@ extern int __read_mostly pt_mode;
>   #define PMU_CAP_FW_WRITES	(1ULL << 13)
>   #define PMU_CAP_LBR_FMT		0x3f
>   
> +#define DEBUGCTLMSR_LBR_MASK		(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
> +
>   struct nested_vmx_msrs {
>   	/*
>   	 * We only store the "true" versions of the VMX capability MSRs. We
> @@ -387,4 +389,14 @@ static inline u64 vmx_get_perf_capabilities(void)
>   	return perf_cap;
>   }
>   
> +static inline u64 vmx_get_supported_debugctl(void)
> +{
> +	u64 val = 0;
> +
> +	if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
> +		val |= DEBUGCTLMSR_LBR_MASK;
> +
> +	return val;
> +}
> +
>   #endif /* __KVM_X86_VMX_CAPS_H */
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index a953c7d633f6..d92e95b64c74 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -187,6 +187,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
>   	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
>   		ret = pmu->version > 1;
>   		break;
> +	case MSR_IA32_DEBUGCTLMSR:
>   	case MSR_IA32_PERF_CAPABILITIES:
>   		ret = 1;
>   		break;
> @@ -237,6 +238,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   			return 1;
>   		msr_info->data = vcpu->arch.perf_capabilities;
>   		return 0;
> +	case MSR_IA32_DEBUGCTLMSR:
> +		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);

Can we put the emulation of MSR_IA32_DEBUGCTLMSR in 
vmx_{get/set})_msr(). AFAIK, MSR_IA32_DEBUGCTLMSR is not a pure PMU 
related MSR that there is bit 2 to enable #DB for bus lock.

> +		return 0;
>   	default:
>   		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>   		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> @@ -282,6 +286,16 @@ static inline bool lbr_is_compatible(struct kvm_vcpu *vcpu)
>   	return true;
>   }
>   
> +static inline u64 vcpu_get_supported_debugctl(struct kvm_vcpu *vcpu)
> +{
> +	u64 debugctlmsr = vmx_get_supported_debugctl();
> +
> +	if (!lbr_is_enabled(vcpu))
> +		debugctlmsr &= ~DEBUGCTLMSR_LBR_MASK;
> +
> +	return debugctlmsr;
> +}
> +
>   static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   {
>   	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> @@ -336,6 +350,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   		}
>   		vcpu->arch.perf_capabilities = data;
>   		return 0;
> +	case MSR_IA32_DEBUGCTLMSR:
> +		if (data & ~vcpu_get_supported_debugctl(vcpu))
> +			return 1;
> +		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> +		return 0;
>   	default:
>   		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>   		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 00c88c2f34e4..56f275eb4554 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2840,18 +2840,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   			return 1;
>   		}
>   		break;
> -	case MSR_IA32_DEBUGCTLMSR:
> -		if (!data) {
> -			/* We support the non-activated case already */
> -			break;
> -		} else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {

So after this patch, guest trying to set bit DEBUGCTLMSR_BTF will get a 
#GP instead of being ignored and printing a log in kernel.

These codes were introduced ~12 years ago in commit b5e2fec0ebc3 ("KVM: 
Ignore DEBUGCTL MSRs with no effect"), just to make Netware happy. Maybe 
I'm overthinking for that too old thing.

> -			/* Values other than LBR and BTF are vendor-specific,
> -			   thus reserved and should throw a #GP */
> -			return 1;
> -		}
> -		vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
> -			    __func__, data);
> -		break;
>   	case 0x200 ... 0x2ff:
>   		return kvm_mtrr_set_msr(vcpu, msr, data);
>   	case MSR_IA32_APICBASE:
> @@ -3120,7 +3108,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   	switch (msr_info->index) {
>   	case MSR_IA32_PLATFORM_ID:
>   	case MSR_IA32_EBL_CR_POWERON:
> -	case MSR_IA32_DEBUGCTLMSR:
>   	case MSR_IA32_LASTBRANCHFROMIP:
>   	case MSR_IA32_LASTBRANCHTOIP:
>   	case MSR_IA32_LASTINTFROMIP:
>
Xu, Like June 13, 2020, 9:42 a.m. UTC | #2
On 2020/6/13 17:14, Xiaoyao Li wrote:
> On 6/13/2020 4:09 PM, Like Xu wrote:
>> When the LBR feature is reported by the vmx_get_perf_capabilities(),
>> the LBR fields in the [vmx|vcpu]_supported debugctl should be unmasked.
>>
>> The debugctl msr is handled separately in vmx/svm and they're not
>> completely identical, hence remove the common msr handling code.
>>
>> Signed-off-by: Like Xu <like.xu@linux.intel.com>
>> ---
>>   arch/x86/kvm/vmx/capabilities.h | 12 ++++++++++++
>>   arch/x86/kvm/vmx/pmu_intel.c    | 19 +++++++++++++++++++
>>   arch/x86/kvm/x86.c              | 13 -------------
>>   3 files changed, 31 insertions(+), 13 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx/capabilities.h 
>> b/arch/x86/kvm/vmx/capabilities.h
>> index b633a90320ee..f6fcfabb1026 100644
>> --- a/arch/x86/kvm/vmx/capabilities.h
>> +++ b/arch/x86/kvm/vmx/capabilities.h
>> @@ -21,6 +21,8 @@ extern int __read_mostly pt_mode;
>>   #define PMU_CAP_FW_WRITES    (1ULL << 13)
>>   #define PMU_CAP_LBR_FMT        0x3f
>>   +#define DEBUGCTLMSR_LBR_MASK        (DEBUGCTLMSR_LBR | 
>> DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
>> +
>>   struct nested_vmx_msrs {
>>       /*
>>        * We only store the "true" versions of the VMX capability MSRs. We
>> @@ -387,4 +389,14 @@ static inline u64 vmx_get_perf_capabilities(void)
>>       return perf_cap;
>>   }
>>   +static inline u64 vmx_get_supported_debugctl(void)
>> +{
>> +    u64 val = 0;
>> +
>> +    if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
>> +        val |= DEBUGCTLMSR_LBR_MASK;
>> +
>> +    return val;
>> +}
>> +
>>   #endif /* __KVM_X86_VMX_CAPS_H */
>> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
>> index a953c7d633f6..d92e95b64c74 100644
>> --- a/arch/x86/kvm/vmx/pmu_intel.c
>> +++ b/arch/x86/kvm/vmx/pmu_intel.c
>> @@ -187,6 +187,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu 
>> *vcpu, u32 msr)
>>       case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
>>           ret = pmu->version > 1;
>>           break;
>> +    case MSR_IA32_DEBUGCTLMSR:
>>       case MSR_IA32_PERF_CAPABILITIES:
>>           ret = 1;
>>           break;
>> @@ -237,6 +238,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, 
>> struct msr_data *msr_info)
>>               return 1;
>>           msr_info->data = vcpu->arch.perf_capabilities;
>>           return 0;
>> +    case MSR_IA32_DEBUGCTLMSR:
>> +        msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
>
> Can we put the emulation of MSR_IA32_DEBUGCTLMSR in vmx_{get/set})_msr(). 
> AFAIK, MSR_IA32_DEBUGCTLMSR is not a pure PMU related MSR that there is 
> bit 2 to enable #DB for bus lock.
We already have "case MSR_IA32_DEBUGCTLMSR" handler in the vmx_set_msr()
and you may apply you bus lock changes in that handler.
>> +        return 0;
>>       default:
>>           if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>>               (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>> @@ -282,6 +286,16 @@ static inline bool lbr_is_compatible(struct 
>> kvm_vcpu *vcpu)
>>       return true;
>>   }
>>   +static inline u64 vcpu_get_supported_debugctl(struct kvm_vcpu *vcpu)
>> +{
>> +    u64 debugctlmsr = vmx_get_supported_debugctl();
>> +
>> +    if (!lbr_is_enabled(vcpu))
>> +        debugctlmsr &= ~DEBUGCTLMSR_LBR_MASK;
>> +
>> +    return debugctlmsr;
>> +}
>> +
>>   static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data 
>> *msr_info)
>>   {
>>       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>> @@ -336,6 +350,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, 
>> struct msr_data *msr_info)
>>           }
>>           vcpu->arch.perf_capabilities = data;
>>           return 0;
>> +    case MSR_IA32_DEBUGCTLMSR:
>> +        if (data & ~vcpu_get_supported_debugctl(vcpu))
>> +            return 1;
>> +        vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>> +        return 0;
>>       default:
>>           if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>>               (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 00c88c2f34e4..56f275eb4554 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -2840,18 +2840,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, 
>> struct msr_data *msr_info)
>>               return 1;
>>           }
>>           break;
>> -    case MSR_IA32_DEBUGCTLMSR:
>> -        if (!data) {
>> -            /* We support the non-activated case already */
>> -            break;
>> -        } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
>
> So after this patch, guest trying to set bit DEBUGCTLMSR_BTF will get a 
> #GP instead of being ignored and printing a log in kernel.
>

Since the BTF is not implemented on the KVM at all,
I do propose not left this kind of dummy thing in the future KVM code.

Let's see if Netware or any BTF user will complain about this change.

> These codes were introduced ~12 years ago in commit b5e2fec0ebc3 ("KVM: 
> Ignore DEBUGCTL MSRs with no effect"), just to make Netware happy. Maybe 
> I'm overthinking for that too old thing.
>
>> -            /* Values other than LBR and BTF are vendor-specific,
>> -               thus reserved and should throw a #GP */
>> -            return 1;
>> -        }
>> -        vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
>> -                __func__, data);
>> -        break;
>>       case 0x200 ... 0x2ff:
>>           return kvm_mtrr_set_msr(vcpu, msr, data);
>>       case MSR_IA32_APICBASE:
>> @@ -3120,7 +3108,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, 
>> struct msr_data *msr_info)
>>       switch (msr_info->index) {
>>       case MSR_IA32_PLATFORM_ID:
>>       case MSR_IA32_EBL_CR_POWERON:
>> -    case MSR_IA32_DEBUGCTLMSR:
>>       case MSR_IA32_LASTBRANCHFROMIP:
>>       case MSR_IA32_LASTBRANCHTOIP:
>>       case MSR_IA32_LASTINTFROMIP:
>>
>
Sean Christopherson July 7, 2020, 8:21 p.m. UTC | #3
On Sat, Jun 13, 2020 at 05:42:50PM +0800, Xu, Like wrote:
> On 2020/6/13 17:14, Xiaoyao Li wrote:
> >On 6/13/2020 4:09 PM, Like Xu wrote:
> >>When the LBR feature is reported by the vmx_get_perf_capabilities(),
> >>the LBR fields in the [vmx|vcpu]_supported debugctl should be unmasked.
> >>
> >>The debugctl msr is handled separately in vmx/svm and they're not
> >>completely identical, hence remove the common msr handling code.

I would prefer to put the "remove DEBUGCTRL handling from common x86" in a
separate patch.  Without digging into SVM, it's not obvious that dropping
MSR_IA32_DEBUGCTLMSR from kvm_set_msr_common() is a nop for SVM.

> >>Signed-off-by: Like Xu <like.xu@linux.intel.com>
> >>---
> >>  arch/x86/kvm/vmx/capabilities.h | 12 ++++++++++++
> >>  arch/x86/kvm/vmx/pmu_intel.c    | 19 +++++++++++++++++++
> >>  arch/x86/kvm/x86.c              | 13 -------------
> >>  3 files changed, 31 insertions(+), 13 deletions(-)
> >>
> >>diff --git a/arch/x86/kvm/vmx/capabilities.h
> >>b/arch/x86/kvm/vmx/capabilities.h
> >>index b633a90320ee..f6fcfabb1026 100644
> >>--- a/arch/x86/kvm/vmx/capabilities.h
> >>+++ b/arch/x86/kvm/vmx/capabilities.h
> >>@@ -21,6 +21,8 @@ extern int __read_mostly pt_mode;
> >>  #define PMU_CAP_FW_WRITES    (1ULL << 13)
> >>  #define PMU_CAP_LBR_FMT        0x3f
> >>  +#define DEBUGCTLMSR_LBR_MASK        (DEBUGCTLMSR_LBR |
> >>DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
> >>+
> >>  struct nested_vmx_msrs {
> >>      /*
> >>       * We only store the "true" versions of the VMX capability MSRs. We
> >>@@ -387,4 +389,14 @@ static inline u64 vmx_get_perf_capabilities(void)
> >>      return perf_cap;
> >>  }
> >>  +static inline u64 vmx_get_supported_debugctl(void)
> >>+{
> >>+    u64 val = 0;
> >>+
> >>+    if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
> >>+        val |= DEBUGCTLMSR_LBR_MASK;
> >>+
> >>+    return val;
> >>+}
> >>+
> >>  #endif /* __KVM_X86_VMX_CAPS_H */
> >>diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> >>index a953c7d633f6..d92e95b64c74 100644
> >>--- a/arch/x86/kvm/vmx/pmu_intel.c
> >>+++ b/arch/x86/kvm/vmx/pmu_intel.c
> >>@@ -187,6 +187,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu
> >>*vcpu, u32 msr)
> >>      case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> >>          ret = pmu->version > 1;
> >>          break;
> >>+    case MSR_IA32_DEBUGCTLMSR:
> >>      case MSR_IA32_PERF_CAPABILITIES:
> >>          ret = 1;
> >>          break;
> >>@@ -237,6 +238,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu,
> >>struct msr_data *msr_info)
> >>              return 1;
> >>          msr_info->data = vcpu->arch.perf_capabilities;
> >>          return 0;
> >>+    case MSR_IA32_DEBUGCTLMSR:
> >>+        msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> >
> >Can we put the emulation of MSR_IA32_DEBUGCTLMSR in vmx_{get/set})_msr().
> >AFAIK, MSR_IA32_DEBUGCTLMSR is not a pure PMU related MSR that there is
> >bit 2 to enable #DB for bus lock.
> We already have "case MSR_IA32_DEBUGCTLMSR" handler in the vmx_set_msr()
> and you may apply you bus lock changes in that handler.

Hrm, but that'd be weird dependency as vmx_set_msr() would need to check for
#DB bus lock support but not actually write GUEST_IA32_DEBUGCTL, or we'd end
up writing it twice when both bus lock and LBR are supported.

I don't see anything in the series that takes action on writes to
MSR_IA32_DEBUGCTLMSR beyond updating the VMCS, i.e. AFAICT there isn't any
reason to call into the PMU, VMX can simply query vmx_get_perf_capabilities()
to check if it's legal to enable DEBUGCTLMSR_LBR_MASK.

A question for both LBR and bus lock: would it make sense to cache the
guest's value in vcpu_vmx so that querying the guest value doesn't require
a VMREAD?  I don't have a good feel for how frequently it would be accessed.

> >>+        return 0;
> >>      default:
> >>          if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
> >>              (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> >>@@ -282,6 +286,16 @@ static inline bool lbr_is_compatible(struct
> >>kvm_vcpu *vcpu)
> >>      return true;
> >>  }
> >>  +static inline u64 vcpu_get_supported_debugctl(struct kvm_vcpu *vcpu)
> >>+{
> >>+    u64 debugctlmsr = vmx_get_supported_debugctl();
> >>+
> >>+    if (!lbr_is_enabled(vcpu))
> >>+        debugctlmsr &= ~DEBUGCTLMSR_LBR_MASK;
> >>+
> >>+    return debugctlmsr;
> >>+}
> >>+
> >>  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data
> >>*msr_info)
> >>  {
> >>      struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> >>@@ -336,6 +350,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu,
> >>struct msr_data *msr_info)
> >>          }
> >>          vcpu->arch.perf_capabilities = data;
> >>          return 0;
> >>+    case MSR_IA32_DEBUGCTLMSR:
> >>+        if (data & ~vcpu_get_supported_debugctl(vcpu))
> >>+            return 1;
> >>+        vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> >>+        return 0;
> >>      default:
> >>          if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
> >>              (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> >>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>index 00c88c2f34e4..56f275eb4554 100644
> >>--- a/arch/x86/kvm/x86.c
> >>+++ b/arch/x86/kvm/x86.c
> >>@@ -2840,18 +2840,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu,
> >>struct msr_data *msr_info)
> >>              return 1;
> >>          }
> >>          break;
> >>-    case MSR_IA32_DEBUGCTLMSR:
> >>-        if (!data) {
> >>-            /* We support the non-activated case already */
> >>-            break;
> >>-        } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
> >
> >So after this patch, guest trying to set bit DEBUGCTLMSR_BTF will get a
> >#GP instead of being ignored and printing a log in kernel.
> >
> 
> Since the BTF is not implemented on the KVM at all,
> I do propose not left this kind of dummy thing in the future KVM code.
> 
> Let's see if Netware or any BTF user will complain about this change.

If you want to drop that behavior it needs be done in a separate patch.
Personally I don't see the point in doing so, it's a trivial amount of code
in KVM and there's no harm in dropping the bits on write.
Xiaoyao Li July 8, 2020, 1:37 a.m. UTC | #4
On 7/8/2020 4:21 AM, Sean Christopherson wrote:
> On Sat, Jun 13, 2020 at 05:42:50PM +0800, Xu, Like wrote:
>> On 2020/6/13 17:14, Xiaoyao Li wrote:
>>> On 6/13/2020 4:09 PM, Like Xu wrote:
[...]
>>>> @@ -237,6 +238,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu,
>>>> struct msr_data *msr_info)
>>>>                return 1;
>>>>            msr_info->data = vcpu->arch.perf_capabilities;
>>>>            return 0;
>>>> +    case MSR_IA32_DEBUGCTLMSR:
>>>> +        msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
>>>
>>> Can we put the emulation of MSR_IA32_DEBUGCTLMSR in vmx_{get/set})_msr().
>>> AFAIK, MSR_IA32_DEBUGCTLMSR is not a pure PMU related MSR that there is
>>> bit 2 to enable #DB for bus lock.
>> We already have "case MSR_IA32_DEBUGCTLMSR" handler in the vmx_set_msr()
>> and you may apply you bus lock changes in that handler.
> 
> Hrm, but that'd be weird dependency as vmx_set_msr() would need to check for
> #DB bus lock support but not actually write GUEST_IA32_DEBUGCTL, or we'd end
> up writing it twice when both bus lock and LBR are supported.

Yeah. That's what I concerned as well.

> I don't see anything in the series that takes action on writes to
> MSR_IA32_DEBUGCTLMSR beyond updating the VMCS, i.e. AFAICT there isn't any
> reason to call into the PMU, VMX can simply query vmx_get_perf_capabilities()
> to check if it's legal to enable DEBUGCTLMSR_LBR_MASK.
> 
> A question for both LBR and bus lock: would it make sense to cache the
> guest's value in vcpu_vmx so that querying the guest value doesn't require
> a VMREAD?  I don't have a good feel for how frequently it would be accessed.

Cache the guest's value is OK, even though #DB bus lock bit wouldn't be 
toggled frequently in a normal OS.
Xu, Like July 8, 2020, 7:06 a.m. UTC | #5
Hi Sean,

First of all, are you going to queue the LBR patch series in your tree
considering the host perf patches have already queued in Peter's tree ?

On 2020/7/8 4:21, Sean Christopherson wrote:
> On Sat, Jun 13, 2020 at 05:42:50PM +0800, Xu, Like wrote:
>> On 2020/6/13 17:14, Xiaoyao Li wrote:
>>> On 6/13/2020 4:09 PM, Like Xu wrote:
>>>> When the LBR feature is reported by the vmx_get_perf_capabilities(),
>>>> the LBR fields in the [vmx|vcpu]_supported debugctl should be unmasked.
>>>>
>>>> The debugctl msr is handled separately in vmx/svm and they're not
>>>> completely identical, hence remove the common msr handling code.
> I would prefer to put the "remove DEBUGCTRL handling from common x86" in a
> separate patch.  Without digging into SVM, it's not obvious that dropping
> MSR_IA32_DEBUGCTLMSR from kvm_set_msr_common() is a nop for SVM.
Sure, I'll do it in a separate patch.
>
>>>> Signed-off-by: Like Xu <like.xu@linux.intel.com>
>>>> ---
>>>>    arch/x86/kvm/vmx/capabilities.h | 12 ++++++++++++
>>>>    arch/x86/kvm/vmx/pmu_intel.c    | 19 +++++++++++++++++++
>>>>    arch/x86/kvm/x86.c              | 13 -------------
>>>>    3 files changed, 31 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kvm/vmx/capabilities.h
>>>> b/arch/x86/kvm/vmx/capabilities.h
>>>> index b633a90320ee..f6fcfabb1026 100644
>>>> --- a/arch/x86/kvm/vmx/capabilities.h
>>>> +++ b/arch/x86/kvm/vmx/capabilities.h
>>>> @@ -21,6 +21,8 @@ extern int __read_mostly pt_mode;
>>>>    #define PMU_CAP_FW_WRITES    (1ULL << 13)
>>>>    #define PMU_CAP_LBR_FMT        0x3f
>>>>    +#define DEBUGCTLMSR_LBR_MASK        (DEBUGCTLMSR_LBR |
>>>> DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
>>>> +
>>>>    struct nested_vmx_msrs {
>>>>        /*
>>>>         * We only store the "true" versions of the VMX capability MSRs. We
>>>> @@ -387,4 +389,14 @@ static inline u64 vmx_get_perf_capabilities(void)
>>>>        return perf_cap;
>>>>    }
>>>>    +static inline u64 vmx_get_supported_debugctl(void)
>>>> +{
>>>> +    u64 val = 0;
>>>> +
>>>> +    if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
>>>> +        val |= DEBUGCTLMSR_LBR_MASK;
>>>> +
>>>> +    return val;
>>>> +}
>>>> +
>>>>    #endif /* __KVM_X86_VMX_CAPS_H */
>>>> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
>>>> index a953c7d633f6..d92e95b64c74 100644
>>>> --- a/arch/x86/kvm/vmx/pmu_intel.c
>>>> +++ b/arch/x86/kvm/vmx/pmu_intel.c
>>>> @@ -187,6 +187,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu
>>>> *vcpu, u32 msr)
>>>>        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
>>>>            ret = pmu->version > 1;
>>>>            break;
>>>> +    case MSR_IA32_DEBUGCTLMSR:
>>>>        case MSR_IA32_PERF_CAPABILITIES:
>>>>            ret = 1;
>>>>            break;
>>>> @@ -237,6 +238,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu,
>>>> struct msr_data *msr_info)
>>>>                return 1;
>>>>            msr_info->data = vcpu->arch.perf_capabilities;
>>>>            return 0;
>>>> +    case MSR_IA32_DEBUGCTLMSR:
>>>> +        msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
>>> Can we put the emulation of MSR_IA32_DEBUGCTLMSR in vmx_{get/set})_msr().
>>> AFAIK, MSR_IA32_DEBUGCTLMSR is not a pure PMU related MSR that there is
>>> bit 2 to enable #DB for bus lock.
>> We already have "case MSR_IA32_DEBUGCTLMSR" handler in the vmx_set_msr()
>> and you may apply you bus lock changes in that handler.
> Hrm, but that'd be weird dependency as vmx_set_msr() would need to check for
> #DB bus lock support but not actually write GUEST_IA32_DEBUGCTL, or we'd end
> up writing it twice when both bus lock and LBR are supported.
Yes, you're right about the multiple writes on GUEST_IA32_DEBUGCTL.

I'll move the handler to vmx_set/get_msr() for other DEBUGCTL users.
>
> I don't see anything in the series that takes action on writes to
> MSR_IA32_DEBUGCTLMSR beyond updating the VMCS, i.e. AFAICT there isn't any
> reason to call into the PMU, VMX can simply query vmx_get_perf_capabilities()
> to check if it's legal to enable DEBUGCTLMSR_LBR_MASK.
There's a gap to enable DEBUGCTLMSR_LBR_MASK.

The vmx_get_perf_capabilities() is queried per-KVM while
the vmx_get_supported_debugctl() is queried per-guest.

>
> A question for both LBR and bus lock: would it make sense to cache the
> guest's value in vcpu_vmx so that querying the guest value doesn't require
> a VMREAD?  I don't have a good feel for how frequently it would be accessed.
I'm OK with the cached value for this field and AFAIK,
it will benefit the legacy_freezing_lbrs_on_pmi emulation
if the VMREAD is heavier than normal cache/mem touch.

>
>>>> +        return 0;
>>>>        default:
>>>>            if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>>>>                (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>>>> @@ -282,6 +286,16 @@ static inline bool lbr_is_compatible(struct
>>>> kvm_vcpu *vcpu)
>>>>        return true;
>>>>    }
>>>>    +static inline u64 vcpu_get_supported_debugctl(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +    u64 debugctlmsr = vmx_get_supported_debugctl();
>>>> +
>>>> +    if (!lbr_is_enabled(vcpu))
>>>> +        debugctlmsr &= ~DEBUGCTLMSR_LBR_MASK;
>>>> +
>>>> +    return debugctlmsr;
>>>> +}
>>>> +
>>>>    static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data
>>>> *msr_info)
>>>>    {
>>>>        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>>>> @@ -336,6 +350,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu,
>>>> struct msr_data *msr_info)
>>>>            }
>>>>            vcpu->arch.perf_capabilities = data;
>>>>            return 0;
>>>> +    case MSR_IA32_DEBUGCTLMSR:
>>>> +        if (data & ~vcpu_get_supported_debugctl(vcpu))
>>>> +            return 1;
>>>> +        vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>>>> +        return 0;
>>>>        default:
>>>>            if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>>>>                (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> index 00c88c2f34e4..56f275eb4554 100644
>>>> --- a/arch/x86/kvm/x86.c
>>>> +++ b/arch/x86/kvm/x86.c
>>>> @@ -2840,18 +2840,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu,
>>>> struct msr_data *msr_info)
>>>>                return 1;
>>>>            }
>>>>            break;
>>>> -    case MSR_IA32_DEBUGCTLMSR:
>>>> -        if (!data) {
>>>> -            /* We support the non-activated case already */
>>>> -            break;
>>>> -        } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
>>> So after this patch, guest trying to set bit DEBUGCTLMSR_BTF will get a
>>> #GP instead of being ignored and printing a log in kernel.
>>>
>> Since the BTF is not implemented on the KVM at all,
>> I do propose not left this kind of dummy thing in the future KVM code.
>>
>> Let's see if Netware or any BTF user will complain about this change.
> If you want to drop that behavior it needs be done in a separate patch.
> Personally I don't see the point in doing so, it's a trivial amount of code
> in KVM and there's no harm in dropping the bits on write.
No harm in dropping the bits on write ? Interesting.
I may keep the semantics unchanged for LBR patches and make it as separate 
proposal.

Thanks,
Like Xu
Sean Christopherson July 10, 2020, 4:28 p.m. UTC | #6
On Wed, Jul 08, 2020 at 03:06:57PM +0800, Xu, Like wrote:
> Hi Sean,
> 
> First of all, are you going to queue the LBR patch series in your tree
> considering the host perf patches have already queued in Peter's tree ?

No, I'll let Paolo take 'em directly, I'm nowhere near knowledgeable enough
with respect to the PMU to feel comfortable taking them.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index b633a90320ee..f6fcfabb1026 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -21,6 +21,8 @@  extern int __read_mostly pt_mode;
 #define PMU_CAP_FW_WRITES	(1ULL << 13)
 #define PMU_CAP_LBR_FMT		0x3f
 
+#define DEBUGCTLMSR_LBR_MASK		(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
+
 struct nested_vmx_msrs {
 	/*
 	 * We only store the "true" versions of the VMX capability MSRs. We
@@ -387,4 +389,14 @@  static inline u64 vmx_get_perf_capabilities(void)
 	return perf_cap;
 }
 
+static inline u64 vmx_get_supported_debugctl(void)
+{
+	u64 val = 0;
+
+	if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
+		val |= DEBUGCTLMSR_LBR_MASK;
+
+	return val;
+}
+
 #endif /* __KVM_X86_VMX_CAPS_H */
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index a953c7d633f6..d92e95b64c74 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -187,6 +187,7 @@  static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 		ret = pmu->version > 1;
 		break;
+	case MSR_IA32_DEBUGCTLMSR:
 	case MSR_IA32_PERF_CAPABILITIES:
 		ret = 1;
 		break;
@@ -237,6 +238,9 @@  static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		msr_info->data = vcpu->arch.perf_capabilities;
 		return 0;
+	case MSR_IA32_DEBUGCTLMSR:
+		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
+		return 0;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -282,6 +286,16 @@  static inline bool lbr_is_compatible(struct kvm_vcpu *vcpu)
 	return true;
 }
 
+static inline u64 vcpu_get_supported_debugctl(struct kvm_vcpu *vcpu)
+{
+	u64 debugctlmsr = vmx_get_supported_debugctl();
+
+	if (!lbr_is_enabled(vcpu))
+		debugctlmsr &= ~DEBUGCTLMSR_LBR_MASK;
+
+	return debugctlmsr;
+}
+
 static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -336,6 +350,11 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		}
 		vcpu->arch.perf_capabilities = data;
 		return 0;
+	case MSR_IA32_DEBUGCTLMSR:
+		if (data & ~vcpu_get_supported_debugctl(vcpu))
+			return 1;
+		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
+		return 0;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00c88c2f34e4..56f275eb4554 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2840,18 +2840,6 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		}
 		break;
-	case MSR_IA32_DEBUGCTLMSR:
-		if (!data) {
-			/* We support the non-activated case already */
-			break;
-		} else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
-			/* Values other than LBR and BTF are vendor-specific,
-			   thus reserved and should throw a #GP */
-			return 1;
-		}
-		vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
-			    __func__, data);
-		break;
 	case 0x200 ... 0x2ff:
 		return kvm_mtrr_set_msr(vcpu, msr, data);
 	case MSR_IA32_APICBASE:
@@ -3120,7 +3108,6 @@  int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	switch (msr_info->index) {
 	case MSR_IA32_PLATFORM_ID:
 	case MSR_IA32_EBL_CR_POWERON:
-	case MSR_IA32_DEBUGCTLMSR:
 	case MSR_IA32_LASTBRANCHFROMIP:
 	case MSR_IA32_LASTBRANCHTOIP:
 	case MSR_IA32_LASTINTFROMIP: