diff mbox series

[v2,06/15] KVM: vmx/pmu: Emulate MSR_ARCH_LBR_CTL for guest Arch LBR

Message ID 20221125040604.5051-7-weijiang.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce Architectural LBR for vPMU | expand

Commit Message

Yang, Weijiang Nov. 25, 2022, 4:05 a.m. UTC
From: Paolo Bonzini <pbonzini@redhat.com>

Arch LBR is enabled by setting MSR_ARCH_LBR_CTL.LBREn to 1. A new guest
state field named "Guest IA32_LBR_CTL" is added to enhance guest LBR usage.
When guest Arch LBR is enabled, a guest LBR event will be created like the
model-specific LBR does. Clear guest LBR enable bit on host PMI handling so
guest can see expected config.

On processors that support Arch LBR, MSR_IA32_DEBUGCTLMSR[bit 0] has no
meaning. It can be written to 0 or 1, but reads will always return 0.
Like IA32_DEBUGCTL, IA32_ARCH_LBR_CTL msr is also preserved on INIT.

Regardless of the Arch LBR or legacy LBR, when the LBR_EN bit 0 of the
corresponding control MSR is set to 1, LBR recording will be enabled.

Signed-off-by: Like Xu <like.xu@linux.intel.com>
Co-developed-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Co-developed-by: Yang Weijiang <weijiang.yang@intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/x86/events/intel/lbr.c      |  2 -
 arch/x86/include/asm/msr-index.h |  1 +
 arch/x86/include/asm/vmx.h       |  2 +
 arch/x86/kvm/vmx/pmu_intel.c     | 67 ++++++++++++++++++++++++++++----
 arch/x86/kvm/vmx/vmx.c           |  7 ++++
 5 files changed, 69 insertions(+), 10 deletions(-)

Comments

Like Xu Dec. 22, 2022, 11:09 a.m. UTC | #1
On 25/11/2022 12:05 pm, Yang Weijiang wrote:
>   static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   {
>   	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> @@ -377,6 +402,14 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   	case MSR_ARCH_LBR_DEPTH:
>   		msr_info->data = lbr_desc->records.nr;
>   		return 0;
> +	case MSR_ARCH_LBR_CTL:
> +		if (!kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR)) {
> +			WARN_ON_ONCE(!msr_info->host_initiated);

Why we need this warning even in the format of WARN_ON_ONCE() ?
And why not for MSR_ARCH_LBR_DEPTH msr ?

> +			msr_info->data = 0;
> +		} else {
> +			msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
> +		}
> +		return 0;
Like Xu Dec. 22, 2022, 11:19 a.m. UTC | #2
On 25/11/2022 12:05 pm, Yang Weijiang wrote:
> @@ -727,12 +772,16 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
>    */
>   static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
>   {
> -	u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> +	u32 lbr_ctl_field = GUEST_IA32_DEBUGCTL;
>   
> -	if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
> -		data &= ~DEBUGCTLMSR_LBR;
> -		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> -	}
> +	if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI))
> +		return;
> +
> +	if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR) &&
> +	    guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
> +		lbr_ctl_field = GUEST_IA32_LBR_CTL;
> +
> +	vmcs_write64(lbr_ctl_field, vmcs_read64(lbr_ctl_field) & ~0x1ULL);
>   }
>   
>   static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)

The legacy lbr test case in KUT does not cover this scenario, but
arch lbr contributor should take the opportunity to fill this gap. Thanks.
Like Xu Dec. 22, 2022, 11:24 a.m. UTC | #3
On 25/11/2022 12:05 pm, Yang Weijiang wrote:
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index cea8c07f5229..1ae2efc29546 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2120,6 +2120,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   						VM_EXIT_SAVE_DEBUG_CONTROLS)
>   			get_vmcs12(vcpu)->guest_ia32_debugctl = data;
>   
> +		/*
> +		 * For Arch LBR, IA32_DEBUGCTL[bit 0] has no meaning.
> +		 * It can be written to 0 or 1, but reads will always return 0.
> +		 */

The comment looks good, please verify it with a test.

> +		if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
> +			data &= ~DEBUGCTLMSR_LBR;
> +
>   		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>   		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
>   		    (data & DEBUGCTLMSR_LBR))
Yang, Weijiang Dec. 25, 2022, 4:08 a.m. UTC | #4
On 12/22/2022 7:24 PM, Like Xu wrote:
> On 25/11/2022 12:05 pm, Yang Weijiang wrote:
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index cea8c07f5229..1ae2efc29546 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -2120,6 +2120,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, 
>> struct msr_data *msr_info)
>>                           VM_EXIT_SAVE_DEBUG_CONTROLS)
>>               get_vmcs12(vcpu)->guest_ia32_debugctl = data;
>>   +        /*
>> +         * For Arch LBR, IA32_DEBUGCTL[bit 0] has no meaning.
>> +         * It can be written to 0 or 1, but reads will always return 0.
>> +         */
>
> The comment looks good, please verify it with a test.


OK, I'll add the test into pmu_lbr KUT test together with arch-lbr test 
cases.



>
>> +        if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
>> +            data &= ~DEBUGCTLMSR_LBR;
>> +
>>           vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>>           if (intel_pmu_lbr_is_enabled(vcpu) && 
>> !to_vmx(vcpu)->lbr_desc.event &&
>>               (data & DEBUGCTLMSR_LBR))
Yang, Weijiang Dec. 25, 2022, 4:16 a.m. UTC | #5
On 12/22/2022 7:19 PM, Like Xu wrote:
> On 25/11/2022 12:05 pm, Yang Weijiang wrote:
>> @@ -727,12 +772,16 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
>>    */
>>   static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu 
>> *vcpu)
>>   {
>> -    u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
>> +    u32 lbr_ctl_field = GUEST_IA32_DEBUGCTL;
>>   -    if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
>> -        data &= ~DEBUGCTLMSR_LBR;
>> -        vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>> -    }
>> +    if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & 
>> DEBUGCTLMSR_FREEZE_LBRS_ON_PMI))
>> +        return;
>> +
>> +    if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR) &&
>> +        guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
>> +        lbr_ctl_field = GUEST_IA32_LBR_CTL;
>> +
>> +    vmcs_write64(lbr_ctl_field, vmcs_read64(lbr_ctl_field) & ~0x1ULL);
>>   }
>>     static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
>
> The legacy lbr test case in KUT does not cover this scenario, but
> arch lbr contributor should take the opportunity to fill this gap. 
> Thanks.


OK,  will try to add this missing part check.
Yang, Weijiang Dec. 25, 2022, 4:27 a.m. UTC | #6
On 12/22/2022 7:09 PM, Like Xu wrote:
> On 25/11/2022 12:05 pm, Yang Weijiang wrote:
>>   static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data 
>> *msr_info)
>>   {
>>       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>> @@ -377,6 +402,14 @@ static int intel_pmu_get_msr(struct kvm_vcpu 
>> *vcpu, struct msr_data *msr_info)
>>       case MSR_ARCH_LBR_DEPTH:
>>           msr_info->data = lbr_desc->records.nr;
>>           return 0;
>> +    case MSR_ARCH_LBR_CTL:
>> +        if (!kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR)) {
>> +            WARN_ON_ONCE(!msr_info->host_initiated);
>
> Why we need this warning even in the format of WARN_ON_ONCE() ?
> And why not for MSR_ARCH_LBR_DEPTH msr ?


IMO, this is just to give some informative message. Not necessary for every

arch-lbr MSRs as MSR_ARCH_LBR_CTL is the master control msr.



>
>> +            msr_info->data = 0;
>> +        } else {
>> +            msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
>> +        }
>> +        return 0;
Sean Christopherson Jan. 27, 2023, 9:42 p.m. UTC | #7
On Thu, Nov 24, 2022, Yang Weijiang wrote:
> @@ -345,6 +346,30 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
>  	return true;
>  }
>  
> +static bool arch_lbr_ctl_is_valid(struct kvm_vcpu *vcpu, u64 ctl)
> +{
> +	struct kvm_cpuid_entry2 *entry;
> +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +
> +	if (!pmu->kvm_arch_lbr_depth)
> +		return false;
> +
> +	if (ctl & ~KVM_ARCH_LBR_CTL_MASK)
> +		return false;
> +
> +	entry = kvm_find_cpuid_entry(vcpu, 0x1c);

Why!?!?!  Why does this series reinvent the wheel so many times.  We already have
a huge pile of reserved bits masks that are computed in intel_pmu_refresh(), just
do the easy thing and follow the established pattern.

> +	if (!entry)
> +		return false;
> +
> +	if (!(entry->ebx & BIT(0)) && (ctl & ARCH_LBR_CTL_CPL))
> +		return false;
> +	if (!(entry->ebx & BIT(2)) && (ctl & ARCH_LBR_CTL_STACK))
> +		return false;
> +	if (!(entry->ebx & BIT(1)) && (ctl & ARCH_LBR_CTL_FILTER))
> +		return false;
> +	return true;
> +}
> +
>  static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  {
>  	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> @@ -377,6 +402,14 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	case MSR_ARCH_LBR_DEPTH:
>  		msr_info->data = lbr_desc->records.nr;
>  		return 0;
> +	case MSR_ARCH_LBR_CTL:
> +		if (!kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR)) {

So I assume the point of this code is to allow reading MSR_ARCH_LBR_CTL from
userspace before doing KVM_SET_CPUID2, but I would much rather do that in a
generic way, i.e. build this series on 
https://lore.kernel.org/all/20230124234905.3774678-7-seanjc@google.com

> +			WARN_ON_ONCE(!msr_info->host_initiated);
> +			msr_info->data = 0;
> +		} else {
> +			msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
> +		}
> +		return 0;
>  	default:
>  		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>  		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> @@ -483,6 +516,18 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR))
>  			wrmsrl(MSR_ARCH_LBR_DEPTH, lbr_desc->records.nr);
>  		return 0;
> +	case MSR_ARCH_LBR_CTL:
> +		if (msr_info->host_initiated && !pmu->kvm_arch_lbr_depth)
> +			return data != 0;
> +
> +		if (!arch_lbr_ctl_is_valid(vcpu, data))
> +			break;
> +
> +		vmcs_write64(GUEST_IA32_LBR_CTL, data);
> +		if (intel_pmu_lbr_is_enabled(vcpu) && !lbr_desc->event &&
> +		    (data & ARCH_LBR_CTL_LBREN))
> +			intel_pmu_create_guest_lbr_event(vcpu);
> +		return 0;
>  	default:
>  		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>  		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> @@ -727,12 +772,16 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
>   */
>  static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
>  {
> -	u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> +	u32 lbr_ctl_field = GUEST_IA32_DEBUGCTL;
>  
> -	if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
> -		data &= ~DEBUGCTLMSR_LBR;
> -		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> -	}
> +	if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI))
> +		return;
> +
> +	if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR) &&
> +	    guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
> +		lbr_ctl_field = GUEST_IA32_LBR_CTL;

Similar to other comments, just rely on KVM not allowing LBRs to be enabled unless
the guest is configured with the correct flavor.

> +
> +	vmcs_write64(lbr_ctl_field, vmcs_read64(lbr_ctl_field) & ~0x1ULL);

Open coding a bit just because it happens to be in the same position in both fields
is ridiculous.

	u64 debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);

	if (!debugctl & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI))
		return;

	if (cpu_feature_enabled(X86_FEATURE_ARCH_LBR))
		vmcs_clear_bits64(GUEST_IA32_LBR_CTL, ARCH_LBR_CTL_LBREN);
	else
		vmcs_write64(GUEST_IA32_DEBUGCTL, debugctl & ~DEBUGCTLMSR_LBR);

>  }
>  
>  static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
> @@ -801,7 +850,8 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>  	struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
> -	bool lbr_enable = !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR) &&
> +	bool lbr_enable = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR) ?
> +		(vmcs_read64(GUEST_IA32_LBR_CTL) & ARCH_LBR_CTL_LBREN) :
>  		(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR);
>  
>  	if (!lbr_desc->event) {
> @@ -829,7 +879,8 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
>  
>  static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
>  {
> -	bool lbr_enable = !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR) &&
> +	bool lbr_enable = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR) ?
> +		(vmcs_read64(GUEST_IA32_LBR_CTL) & ARCH_LBR_CTL_LBREN) :
>  		(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR);

Instead of open coding the same ugly ternary operator in multiple locations, add
a helper.

>  
>  	if (!lbr_enable)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index cea8c07f5229..1ae2efc29546 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2120,6 +2120,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  						VM_EXIT_SAVE_DEBUG_CONTROLS)
>  			get_vmcs12(vcpu)->guest_ia32_debugctl = data;
>  
> +		/*
> +		 * For Arch LBR, IA32_DEBUGCTL[bit 0] has no meaning.
> +		 * It can be written to 0 or 1, but reads will always return 0.
> +		 */
> +		if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))

This can be dependent solely on the host CPU.  If LBRs are unsupported, the bit
will be marked invalid and cleared above.  And as mentioned multiple times, KVM
needs to allow enabling LBRs iff the guest and host flavors match.

> +			data &= ~DEBUGCTLMSR_LBR;

This needs to be done before shoving the value into vmcs12.

> +
>  		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>  		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
>  		    (data & DEBUGCTLMSR_LBR))
> -- 
> 2.27.0
>
diff mbox series

Patch

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index e7caabfa1377..ef589984b907 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -100,8 +100,6 @@ 
 	 ARCH_LBR_RETURN		|\
 	 ARCH_LBR_OTHER_BRANCH)
 
-#define ARCH_LBR_CTL_MASK			0x7f000e
-
 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
 
 static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 10ac52705892..2a9eaab2fdb1 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -220,6 +220,7 @@ 
 #define LBR_INFO_BR_TYPE		(0xfull << LBR_INFO_BR_TYPE_OFFSET)
 
 #define MSR_ARCH_LBR_CTL		0x000014ce
+#define ARCH_LBR_CTL_MASK		0x7f000e
 #define ARCH_LBR_CTL_LBREN		BIT(0)
 #define ARCH_LBR_CTL_CPL_OFFSET		1
 #define ARCH_LBR_CTL_CPL		(0x3ull << ARCH_LBR_CTL_CPL_OFFSET)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 498dc600bd5c..8502c068202c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -257,6 +257,8 @@  enum vmcs_field {
 	GUEST_BNDCFGS_HIGH              = 0x00002813,
 	GUEST_IA32_RTIT_CTL		= 0x00002814,
 	GUEST_IA32_RTIT_CTL_HIGH	= 0x00002815,
+	GUEST_IA32_LBR_CTL		= 0x00002816,
+	GUEST_IA32_LBR_CTL_HIGH		= 0x00002817,
 	HOST_IA32_PAT			= 0x00002c00,
 	HOST_IA32_PAT_HIGH		= 0x00002c01,
 	HOST_IA32_EFER			= 0x00002c02,
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 0c78cb4b72be..b57944d5e7d8 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -19,6 +19,7 @@ 
 #include "pmu.h"
 
 #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
+#define KVM_ARCH_LBR_CTL_MASK  (ARCH_LBR_CTL_MASK | ARCH_LBR_CTL_LBREN)
 
 static struct kvm_event_hw_type_mapping intel_arch_events[] = {
 	[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
@@ -178,7 +179,7 @@  static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index)
 	    (index == MSR_LBR_SELECT || index == MSR_LBR_TOS))
 		return true;
 
-	if (index == MSR_ARCH_LBR_DEPTH)
+	if (index == MSR_ARCH_LBR_DEPTH || index == MSR_ARCH_LBR_CTL)
 		return kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR) &&
 		       guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR);
 
@@ -345,6 +346,30 @@  static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+static bool arch_lbr_ctl_is_valid(struct kvm_vcpu *vcpu, u64 ctl)
+{
+	struct kvm_cpuid_entry2 *entry;
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
+	if (!pmu->kvm_arch_lbr_depth)
+		return false;
+
+	if (ctl & ~KVM_ARCH_LBR_CTL_MASK)
+		return false;
+
+	entry = kvm_find_cpuid_entry(vcpu, 0x1c);
+	if (!entry)
+		return false;
+
+	if (!(entry->ebx & BIT(0)) && (ctl & ARCH_LBR_CTL_CPL))
+		return false;
+	if (!(entry->ebx & BIT(2)) && (ctl & ARCH_LBR_CTL_STACK))
+		return false;
+	if (!(entry->ebx & BIT(1)) && (ctl & ARCH_LBR_CTL_FILTER))
+		return false;
+	return true;
+}
+
 static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -377,6 +402,14 @@  static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_ARCH_LBR_DEPTH:
 		msr_info->data = lbr_desc->records.nr;
 		return 0;
+	case MSR_ARCH_LBR_CTL:
+		if (!kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR)) {
+			WARN_ON_ONCE(!msr_info->host_initiated);
+			msr_info->data = 0;
+		} else {
+			msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
+		}
+		return 0;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -483,6 +516,18 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR))
 			wrmsrl(MSR_ARCH_LBR_DEPTH, lbr_desc->records.nr);
 		return 0;
+	case MSR_ARCH_LBR_CTL:
+		if (msr_info->host_initiated && !pmu->kvm_arch_lbr_depth)
+			return data != 0;
+
+		if (!arch_lbr_ctl_is_valid(vcpu, data))
+			break;
+
+		vmcs_write64(GUEST_IA32_LBR_CTL, data);
+		if (intel_pmu_lbr_is_enabled(vcpu) && !lbr_desc->event &&
+		    (data & ARCH_LBR_CTL_LBREN))
+			intel_pmu_create_guest_lbr_event(vcpu);
+		return 0;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -727,12 +772,16 @@  static void intel_pmu_reset(struct kvm_vcpu *vcpu)
  */
 static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
 {
-	u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
+	u32 lbr_ctl_field = GUEST_IA32_DEBUGCTL;
 
-	if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
-		data &= ~DEBUGCTLMSR_LBR;
-		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
-	}
+	if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI))
+		return;
+
+	if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR) &&
+	    guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
+		lbr_ctl_field = GUEST_IA32_LBR_CTL;
+
+	vmcs_write64(lbr_ctl_field, vmcs_read64(lbr_ctl_field) & ~0x1ULL);
 }
 
 static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
@@ -801,7 +850,8 @@  void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
-	bool lbr_enable = !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR) &&
+	bool lbr_enable = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR) ?
+		(vmcs_read64(GUEST_IA32_LBR_CTL) & ARCH_LBR_CTL_LBREN) :
 		(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR);
 
 	if (!lbr_desc->event) {
@@ -829,7 +879,8 @@  void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
 
 static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
 {
-	bool lbr_enable = !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR) &&
+	bool lbr_enable = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR) ?
+		(vmcs_read64(GUEST_IA32_LBR_CTL) & ARCH_LBR_CTL_LBREN) :
 		(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR);
 
 	if (!lbr_enable)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cea8c07f5229..1ae2efc29546 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2120,6 +2120,13 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 						VM_EXIT_SAVE_DEBUG_CONTROLS)
 			get_vmcs12(vcpu)->guest_ia32_debugctl = data;
 
+		/*
+		 * For Arch LBR, IA32_DEBUGCTL[bit 0] has no meaning.
+		 * It can be written to 0 or 1, but reads will always return 0.
+		 */
+		if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
+			data &= ~DEBUGCTLMSR_LBR;
+
 		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
 		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
 		    (data & DEBUGCTLMSR_LBR))