diff mbox series

[v7,05/15] KVM: vmx/pmu: Emulate MSR_ARCH_LBR_CTL for guest Arch LBR

Message ID 1628235745-26566-6-git-send-email-weijiang.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce Architectural LBR for vPMU | expand

Commit Message

Yang, Weijiang Aug. 6, 2021, 7:42 a.m. UTC
From: Like Xu <like.xu@linux.intel.com>

Arch LBRs are enabled by setting MSR_ARCH_LBR_CTL.LBREn to 1. A new guest
state field named "Guest IA32_LBR_CTL" is added to enhance guest LBR usage.
When guest Arch LBR is enabled, a guest LBR event will be created like the
model-specific LBR does. Clear guest LBR enable bit on host PMI handling so
guest can see expected config.

On processors that support Arch LBR, MSR_IA32_DEBUGCTLMSR[bit 0] has no
meaning. It can be written to 0 or 1, but reads will always return 0.
Like IA32_DEBUGCTL, IA32_ARCH_LBR_CTL msr is also preserved on INIT.

Regardless of the Arch LBR or legacy LBR, when the LBR_EN bit 0 of the
corresponding control MSR is set to 1, LBR recording will be enabled.

Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/x86/events/intel/lbr.c      |  2 --
 arch/x86/include/asm/msr-index.h |  1 +
 arch/x86/include/asm/vmx.h       |  2 ++
 arch/x86/kvm/vmx/pmu_intel.c     | 48 ++++++++++++++++++++++++++++----
 arch/x86/kvm/vmx/vmx.c           |  9 ++++++
 5 files changed, 55 insertions(+), 7 deletions(-)

Comments

kernel test robot Aug. 6, 2021, 4:26 p.m. UTC | #1
Hi Yang,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on v5.14-rc4]
[also build test WARNING on next-20210805]
[cannot apply to kvm/queue tip/perf/core tip/x86/core]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Yang-Weijiang/Introduce-Architectural-LBR-for-vPMU/20210806-153154
base:    c500bee1c5b2f1d59b1081ac879d73268ab0ff17
config: x86_64-rhel-8.3-kselftests (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-348-gf0e6938b-dirty
        # https://github.com/0day-ci/linux/commit/1d5b4967a3c4553c3648638f1189dcbff631328e
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Yang-Weijiang/Introduce-Architectural-LBR-for-vPMU/20210806-153154
        git checkout 1d5b4967a3c4553c3648638f1189dcbff631328e
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=x86_64 SHELL=/bin/bash arch/x86/kvm/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
   arch/x86/kvm/vmx/pmu_intel.c: note: in included file (through arch/x86/kvm/vmx/vmx_ops.h, arch/x86/kvm/vmx/vmx.h, arch/x86/kvm/vmx/nested.h):
>> arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a058a becomes 58a)
>> arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a058a becomes 58a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a008a becomes 8a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a008a becomes 8a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a008a becomes 8a)

vim +81 arch/x86/kvm/vmx/evmcs.h

75edce8a45486f arch/x86/kvm/vmx/evmcs.h Sean Christopherson 2018-12-03  77  
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  78  static __always_inline int get_evmcs_offset(unsigned long field,
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  79  					    u16 *clean_field)
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  80  {
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20 @81  	unsigned int index = ROL16(field, 6);
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  82  	const struct evmcs_field *evmcs_field;
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  83  
75edce8a45486f arch/x86/kvm/vmx/evmcs.h Sean Christopherson 2018-12-03  84  	if (unlikely(index >= nr_evmcs_1_fields)) {
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  85  		WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n",
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  86  			  field);
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  87  		return -ENOENT;
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  88  	}
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  89  
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  90  	evmcs_field = &vmcs_field_to_evmcs_1[index];
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  91  
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  92  	if (clean_field)
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  93  		*clean_field = evmcs_field->clean_field;
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  94  
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  95  	return evmcs_field->offset;
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  96  }
773e8a0425c923 arch/x86/kvm/vmx_evmcs.h Vitaly Kuznetsov    2018-03-20  97  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Like Xu Aug. 9, 2021, 1:36 p.m. UTC | #2
On 6/8/2021 3:42 pm, Yang Weijiang wrote:
> From: Like Xu <like.xu@linux.intel.com>
> 
> Arch LBRs are enabled by setting MSR_ARCH_LBR_CTL.LBREn to 1. A new guest
> state field named "Guest IA32_LBR_CTL" is added to enhance guest LBR usage.
> When guest Arch LBR is enabled, a guest LBR event will be created like the
> model-specific LBR does. Clear guest LBR enable bit on host PMI handling so
> guest can see expected config.
> 
> On processors that support Arch LBR, MSR_IA32_DEBUGCTLMSR[bit 0] has no
> meaning. It can be written to 0 or 1, but reads will always return 0.
> Like IA32_DEBUGCTL, IA32_ARCH_LBR_CTL msr is also preserved on INIT.
> 
> Regardless of the Arch LBR or legacy LBR, when the LBR_EN bit 0 of the
> corresponding control MSR is set to 1, LBR recording will be enabled.
> 
> Signed-off-by: Like Xu <like.xu@linux.intel.com>
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> ---
>   arch/x86/events/intel/lbr.c      |  2 --
>   arch/x86/include/asm/msr-index.h |  1 +
>   arch/x86/include/asm/vmx.h       |  2 ++
>   arch/x86/kvm/vmx/pmu_intel.c     | 48 ++++++++++++++++++++++++++++----
>   arch/x86/kvm/vmx/vmx.c           |  9 ++++++
>   5 files changed, 55 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
> index ceff16eb4bcb..d6773a200c70 100644
> --- a/arch/x86/events/intel/lbr.c
> +++ b/arch/x86/events/intel/lbr.c
> @@ -168,8 +168,6 @@ enum {
>   	 ARCH_LBR_RETURN		|\
>   	 ARCH_LBR_OTHER_BRANCH)
>   
> -#define ARCH_LBR_CTL_MASK			0x7f000e
> -
>   static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
>   
>   static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index a7c413432b33..04059e8ed115 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -169,6 +169,7 @@
>   #define LBR_INFO_BR_TYPE		(0xfull << LBR_INFO_BR_TYPE_OFFSET)
>   
>   #define MSR_ARCH_LBR_CTL		0x000014ce
> +#define ARCH_LBR_CTL_MASK		0x7f000e
>   #define ARCH_LBR_CTL_LBREN		BIT(0)
>   #define ARCH_LBR_CTL_CPL_OFFSET		1
>   #define ARCH_LBR_CTL_CPL		(0x3ull << ARCH_LBR_CTL_CPL_OFFSET)
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 0ffaa3156a4e..ea3be961cc8e 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -245,6 +245,8 @@ enum vmcs_field {
>   	GUEST_BNDCFGS_HIGH              = 0x00002813,
>   	GUEST_IA32_RTIT_CTL		= 0x00002814,
>   	GUEST_IA32_RTIT_CTL_HIGH	= 0x00002815,
> +	GUEST_IA32_LBR_CTL		= 0x00002816,
> +	GUEST_IA32_LBR_CTL_HIGH		= 0x00002817,
>   	HOST_IA32_PAT			= 0x00002c00,
>   	HOST_IA32_PAT_HIGH		= 0x00002c01,
>   	HOST_IA32_EFER			= 0x00002c02,
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index a4ef5bbce186..b2631fea5e6c 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -19,6 +19,7 @@
>   #include "pmu.h"
>   
>   #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
> +#define KVM_ARCH_LBR_CTL_MASK  (ARCH_LBR_CTL_MASK | ARCH_LBR_CTL_LBREN)
>   
>   static struct kvm_event_hw_type_mapping intel_arch_events[] = {
>   	/* Index must match CPUID 0x0A.EBX bit vector */
> @@ -221,6 +222,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
>   		ret = pmu->version > 1;
>   		break;
>   	case MSR_ARCH_LBR_DEPTH:
> +	case MSR_ARCH_LBR_CTL:
>   		if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR))
>   			ret = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR);
>   		break;
> @@ -369,6 +371,26 @@ static bool arch_lbr_depth_is_valid(struct kvm_vcpu *vcpu, u64 depth)
>   	return (depth == fls(eax & 0xff) * 8);
>   }
>   
> +#define ARCH_LBR_CTL_BRN_MASK   GENMASK_ULL(22, 16)
> +
> +static bool arch_lbr_ctl_is_valid(struct kvm_vcpu *vcpu, u64 ctl)
> +{
> +	unsigned int eax, ebx, ecx, edx;
> +
> +	if (!kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR))
> +		return false;
> +
> +	cpuid_count(0x1c, 0, &eax, &ebx, &ecx, &edx);
> +	if (!(ebx & BIT(0)) && (ctl & ARCH_LBR_CTL_CPL))
> +		return false;
> +	if (!(ebx & BIT(2)) && (ctl & ARCH_LBR_CTL_STACK))
> +		return false;
> +	if (!(ebx & BIT(1)) && (ctl & ARCH_LBR_CTL_BRN_MASK))
> +		return false;
> +
> +	return !(ctl & ~KVM_ARCH_LBR_CTL_MASK);
> +}

Please check it with the *guest* cpuid entry.

And it should remove the bits that are not supported by x86_pmu.lbr_ctl_mask before
vmcs_write64(...) if the guest value is a superset of the host value with 
warning message.

> +
>   static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   {
>   	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> @@ -392,6 +414,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   	case MSR_ARCH_LBR_DEPTH:
>   		msr_info->data = lbr_desc->records.nr;
>   		return 0;
> +	case MSR_ARCH_LBR_CTL:
> +		msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
> +		return 0;
>   	default:
>   		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>   		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> @@ -460,6 +485,15 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   		if (!msr_info->host_initiated)
>   			wrmsrl(MSR_ARCH_LBR_DEPTH, lbr_desc->records.nr);
>   		return 0;
> +	case MSR_ARCH_LBR_CTL:
> +		if (!arch_lbr_ctl_is_valid(vcpu, data))
> +			break;
> +
> +		vmcs_write64(GUEST_IA32_LBR_CTL, data);
> +		if (intel_pmu_lbr_is_enabled(vcpu) && !lbr_desc->event &&
> +		    (data & ARCH_LBR_CTL_LBREN))
> +			intel_pmu_create_guest_lbr_event(vcpu);
> +		return 0;
>   	default:
>   		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>   		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> @@ -637,12 +671,16 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
>    */
>   static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
>   {
> -	u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> +	u32 lbr_ctl_field = GUEST_IA32_DEBUGCTL;
>   
> -	if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
> -		data &= ~DEBUGCTLMSR_LBR;
> -		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> -	}
> +	if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI))
> +		return;
> +
> +	if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR) &&
> +	    guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
> +		lbr_ctl_field = GUEST_IA32_LBR_CTL;
> +
> +	vmcs_write64(lbr_ctl_field, vmcs_read64(lbr_ctl_field) & ~0x1ULL);
>   }
>   
>   static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 927a552393b9..e8df44faf900 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2002,6 +2002,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   						VM_EXIT_SAVE_DEBUG_CONTROLS)
>   			get_vmcs12(vcpu)->guest_ia32_debugctl = data;
>   
> +		/*
> +		 * For Arch LBR, IA32_DEBUGCTL[bit 0] has no meaning.
> +		 * It can be written to 0 or 1, but reads will always return 0.
> +		 */
> +		if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
> +			data &= ~DEBUGCTLMSR_LBR;
> +
>   		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>   		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
>   		    (data & DEBUGCTLMSR_LBR))
> @@ -4441,6 +4448,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>   		vmcs_writel(GUEST_SYSENTER_ESP, 0);
>   		vmcs_writel(GUEST_SYSENTER_EIP, 0);
>   		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
> +		if (static_cpu_has(X86_FEATURE_ARCH_LBR))
> +			vmcs_write64(GUEST_IA32_LBR_CTL, 0);

Please update dump_vmcs() to dump GUEST_IA32_LBR_CTL as well.

How about update the load_vmcs12_host_state() for GUEST_IA32_LBR_CTL
since you enabled the nested case in this patch set ?

>   	}
>   
>   	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
>
Yang, Weijiang Aug. 10, 2021, 8:30 a.m. UTC | #3
On Mon, Aug 09, 2021 at 09:36:49PM +0800, Like Xu wrote:
> On 6/8/2021 3:42 pm, Yang Weijiang wrote:
> >From: Like Xu <like.xu@linux.intel.com>
> >
> >Arch LBRs are enabled by setting MSR_ARCH_LBR_CTL.LBREn to 1. A new guest
> >state field named "Guest IA32_LBR_CTL" is added to enhance guest LBR usage.
> >When guest Arch LBR is enabled, a guest LBR event will be created like the
> >model-specific LBR does. Clear guest LBR enable bit on host PMI handling so
> >guest can see expected config.
> >
> >On processors that support Arch LBR, MSR_IA32_DEBUGCTLMSR[bit 0] has no
> >meaning. It can be written to 0 or 1, but reads will always return 0.
> >Like IA32_DEBUGCTL, IA32_ARCH_LBR_CTL msr is also preserved on INIT.
> >
> >Regardless of the Arch LBR or legacy LBR, when the LBR_EN bit 0 of the
> >corresponding control MSR is set to 1, LBR recording will be enabled.
> >
> >Signed-off-by: Like Xu <like.xu@linux.intel.com>
> >Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> >---
> >  arch/x86/events/intel/lbr.c      |  2 --
> >  arch/x86/include/asm/msr-index.h |  1 +
> >  arch/x86/include/asm/vmx.h       |  2 ++
> >  arch/x86/kvm/vmx/pmu_intel.c     | 48 ++++++++++++++++++++++++++++----
> >  arch/x86/kvm/vmx/vmx.c           |  9 ++++++
> >  5 files changed, 55 insertions(+), 7 deletions(-)
> >

[...]

> >+static bool arch_lbr_ctl_is_valid(struct kvm_vcpu *vcpu, u64 ctl)
> >+{
> >+	unsigned int eax, ebx, ecx, edx;
> >+
> >+	if (!kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR))
> >+		return false;
> >+
> >+	cpuid_count(0x1c, 0, &eax, &ebx, &ecx, &edx);
> >+	if (!(ebx & BIT(0)) && (ctl & ARCH_LBR_CTL_CPL))
> >+		return false;
> >+	if (!(ebx & BIT(2)) && (ctl & ARCH_LBR_CTL_STACK))
> >+		return false;
> >+	if (!(ebx & BIT(1)) && (ctl & ARCH_LBR_CTL_BRN_MASK))
> >+		return false;
> >+
> >+	return !(ctl & ~KVM_ARCH_LBR_CTL_MASK);
> >+}
> 
> Please check it with the *guest* cpuid entry.
If KVM "trusts" user-space, then check with guest cpuid is OK.
But if user-space enable excessive controls, then check against guest
cpuid could make things mess.

> 
> And it should remove the bits that are not supported by x86_pmu.lbr_ctl_mask before
> vmcs_write64(...) if the guest value is a superset of the host value with
> warning message.
Then I think it makes more sense to check against x86_pmu.lbr_xxx masks in above function
for compatibility. What do you think of it?
> 
> >+
> >  static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  {
> >  	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> >@@ -392,6 +414,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  	case MSR_ARCH_LBR_DEPTH:
> >  		msr_info->data = lbr_desc->records.nr;
> >  		return 0;
> >+	case MSR_ARCH_LBR_CTL:
> >+		msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
> >+		return 0;
> >  	default:
> >  		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
> >  		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> >  		vmcs_write64(GUEST_IA32_DEBUGCTL, data);

[...]

> >  		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
> >  		    (data & DEBUGCTLMSR_LBR))
> >@@ -4441,6 +4448,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> >  		vmcs_writel(GUEST_SYSENTER_ESP, 0);
> >  		vmcs_writel(GUEST_SYSENTER_EIP, 0);
> >  		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
> >+		if (static_cpu_has(X86_FEATURE_ARCH_LBR))
> >+			vmcs_write64(GUEST_IA32_LBR_CTL, 0);
> 
> Please update dump_vmcs() to dump GUEST_IA32_LBR_CTL as well.
OK, will add it.
> 
> How about update the load_vmcs12_host_state() for GUEST_IA32_LBR_CTL
> since you enabled the nested case in this patch set ?
No, I didn't enable nested Arch LBR but unblocked some issues for nested case.
> 
> >  	}
> >  	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
> >
Like Xu Aug. 10, 2021, 8:37 a.m. UTC | #4
On 10/8/2021 4:30 pm, Yang Weijiang wrote:
> On Mon, Aug 09, 2021 at 09:36:49PM +0800, Like Xu wrote:
>> On 6/8/2021 3:42 pm, Yang Weijiang wrote:
>>> From: Like Xu <like.xu@linux.intel.com>
>>>
>>> Arch LBRs are enabled by setting MSR_ARCH_LBR_CTL.LBREn to 1. A new guest
>>> state field named "Guest IA32_LBR_CTL" is added to enhance guest LBR usage.
>>> When guest Arch LBR is enabled, a guest LBR event will be created like the
>>> model-specific LBR does. Clear guest LBR enable bit on host PMI handling so
>>> guest can see expected config.
>>>
>>> On processors that support Arch LBR, MSR_IA32_DEBUGCTLMSR[bit 0] has no
>>> meaning. It can be written to 0 or 1, but reads will always return 0.
>>> Like IA32_DEBUGCTL, IA32_ARCH_LBR_CTL msr is also preserved on INIT.
>>>
>>> Regardless of the Arch LBR or legacy LBR, when the LBR_EN bit 0 of the
>>> corresponding control MSR is set to 1, LBR recording will be enabled.
>>>
>>> Signed-off-by: Like Xu <like.xu@linux.intel.com>
>>> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
>>> ---
>>>   arch/x86/events/intel/lbr.c      |  2 --
>>>   arch/x86/include/asm/msr-index.h |  1 +
>>>   arch/x86/include/asm/vmx.h       |  2 ++
>>>   arch/x86/kvm/vmx/pmu_intel.c     | 48 ++++++++++++++++++++++++++++----
>>>   arch/x86/kvm/vmx/vmx.c           |  9 ++++++
>>>   5 files changed, 55 insertions(+), 7 deletions(-)
>>>
> 
> [...]
> 
>>> +static bool arch_lbr_ctl_is_valid(struct kvm_vcpu *vcpu, u64 ctl)
>>> +{
>>> +	unsigned int eax, ebx, ecx, edx;
>>> +
>>> +	if (!kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR))
>>> +		return false;
>>> +
>>> +	cpuid_count(0x1c, 0, &eax, &ebx, &ecx, &edx);
>>> +	if (!(ebx & BIT(0)) && (ctl & ARCH_LBR_CTL_CPL))
>>> +		return false;
>>> +	if (!(ebx & BIT(2)) && (ctl & ARCH_LBR_CTL_STACK))
>>> +		return false;
>>> +	if (!(ebx & BIT(1)) && (ctl & ARCH_LBR_CTL_BRN_MASK))
>>> +		return false;
>>> +
>>> +	return !(ctl & ~KVM_ARCH_LBR_CTL_MASK);
>>> +}
>>
>> Please check it with the *guest* cpuid entry.
> If KVM "trusts" user-space, then check with guest cpuid is OK.
> But if user-space enable excessive controls, then check against guest
> cpuid could make things mess.

The user space should be aware of its own risk
if it sets the cpuid that exceeds the host's capabilities.

> 
>>
>> And it should remove the bits that are not supported by x86_pmu.lbr_ctl_mask before
>> vmcs_write64(...) if the guest value is a superset of the host value with
>> warning message.
> Then I think it makes more sense to check against x86_pmu.lbr_xxx masks in above function
> for compatibility. What do you think of it?

The host driver hard-codes x86_pmu.lbr_ctl_mask to ARCH_LBR_CTL_MASK
but the user space can mask out some of the capability based on its cpuid selection.
In that case, we need to check it with the guest cpuid entry.

If user space exceeds the KVM supported capabilities,
KVM could leave a warning before the vm-entry fails.

>>
>>> +
>>>   static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>>   {
>>>   	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>>> @@ -392,6 +414,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>>   	case MSR_ARCH_LBR_DEPTH:
>>>   		msr_info->data = lbr_desc->records.nr;
>>>   		return 0;
>>> +	case MSR_ARCH_LBR_CTL:
>>> +		msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
>>> +		return 0;
>>>   	default:
>>>   		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>>>   		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>>>   		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> 
> [...]
> 
>>>   		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
>>>   		    (data & DEBUGCTLMSR_LBR))
>>> @@ -4441,6 +4448,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>>>   		vmcs_writel(GUEST_SYSENTER_ESP, 0);
>>>   		vmcs_writel(GUEST_SYSENTER_EIP, 0);
>>>   		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
>>> +		if (static_cpu_has(X86_FEATURE_ARCH_LBR))
>>> +			vmcs_write64(GUEST_IA32_LBR_CTL, 0);
>>
>> Please update dump_vmcs() to dump GUEST_IA32_LBR_CTL as well.
> OK, will add it.
>>
>> How about update the load_vmcs12_host_state() for GUEST_IA32_LBR_CTL
>> since you enabled the nested case in this patch set ?
> No, I didn't enable nested Arch LBR but unblocked some issues for nested case.

Would you like to explain more about the unblocked issue ?

>>
>>>   	}
>>>   	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
>>>
diff mbox series

Patch

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index ceff16eb4bcb..d6773a200c70 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -168,8 +168,6 @@  enum {
 	 ARCH_LBR_RETURN		|\
 	 ARCH_LBR_OTHER_BRANCH)
 
-#define ARCH_LBR_CTL_MASK			0x7f000e
-
 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
 
 static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a7c413432b33..04059e8ed115 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -169,6 +169,7 @@ 
 #define LBR_INFO_BR_TYPE		(0xfull << LBR_INFO_BR_TYPE_OFFSET)
 
 #define MSR_ARCH_LBR_CTL		0x000014ce
+#define ARCH_LBR_CTL_MASK		0x7f000e
 #define ARCH_LBR_CTL_LBREN		BIT(0)
 #define ARCH_LBR_CTL_CPL_OFFSET		1
 #define ARCH_LBR_CTL_CPL		(0x3ull << ARCH_LBR_CTL_CPL_OFFSET)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 0ffaa3156a4e..ea3be961cc8e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -245,6 +245,8 @@  enum vmcs_field {
 	GUEST_BNDCFGS_HIGH              = 0x00002813,
 	GUEST_IA32_RTIT_CTL		= 0x00002814,
 	GUEST_IA32_RTIT_CTL_HIGH	= 0x00002815,
+	GUEST_IA32_LBR_CTL		= 0x00002816,
+	GUEST_IA32_LBR_CTL_HIGH		= 0x00002817,
 	HOST_IA32_PAT			= 0x00002c00,
 	HOST_IA32_PAT_HIGH		= 0x00002c01,
 	HOST_IA32_EFER			= 0x00002c02,
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index a4ef5bbce186..b2631fea5e6c 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -19,6 +19,7 @@ 
 #include "pmu.h"
 
 #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
+#define KVM_ARCH_LBR_CTL_MASK  (ARCH_LBR_CTL_MASK | ARCH_LBR_CTL_LBREN)
 
 static struct kvm_event_hw_type_mapping intel_arch_events[] = {
 	/* Index must match CPUID 0x0A.EBX bit vector */
@@ -221,6 +222,7 @@  static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 		ret = pmu->version > 1;
 		break;
 	case MSR_ARCH_LBR_DEPTH:
+	case MSR_ARCH_LBR_CTL:
 		if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR))
 			ret = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR);
 		break;
@@ -369,6 +371,26 @@  static bool arch_lbr_depth_is_valid(struct kvm_vcpu *vcpu, u64 depth)
 	return (depth == fls(eax & 0xff) * 8);
 }
 
+#define ARCH_LBR_CTL_BRN_MASK   GENMASK_ULL(22, 16)
+
+static bool arch_lbr_ctl_is_valid(struct kvm_vcpu *vcpu, u64 ctl)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	if (!kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR))
+		return false;
+
+	cpuid_count(0x1c, 0, &eax, &ebx, &ecx, &edx);
+	if (!(ebx & BIT(0)) && (ctl & ARCH_LBR_CTL_CPL))
+		return false;
+	if (!(ebx & BIT(2)) && (ctl & ARCH_LBR_CTL_STACK))
+		return false;
+	if (!(ebx & BIT(1)) && (ctl & ARCH_LBR_CTL_BRN_MASK))
+		return false;
+
+	return !(ctl & ~KVM_ARCH_LBR_CTL_MASK);
+}
+
 static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -392,6 +414,9 @@  static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_ARCH_LBR_DEPTH:
 		msr_info->data = lbr_desc->records.nr;
 		return 0;
+	case MSR_ARCH_LBR_CTL:
+		msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
+		return 0;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -460,6 +485,15 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (!msr_info->host_initiated)
 			wrmsrl(MSR_ARCH_LBR_DEPTH, lbr_desc->records.nr);
 		return 0;
+	case MSR_ARCH_LBR_CTL:
+		if (!arch_lbr_ctl_is_valid(vcpu, data))
+			break;
+
+		vmcs_write64(GUEST_IA32_LBR_CTL, data);
+		if (intel_pmu_lbr_is_enabled(vcpu) && !lbr_desc->event &&
+		    (data & ARCH_LBR_CTL_LBREN))
+			intel_pmu_create_guest_lbr_event(vcpu);
+		return 0;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -637,12 +671,16 @@  static void intel_pmu_reset(struct kvm_vcpu *vcpu)
  */
 static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
 {
-	u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
+	u32 lbr_ctl_field = GUEST_IA32_DEBUGCTL;
 
-	if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
-		data &= ~DEBUGCTLMSR_LBR;
-		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
-	}
+	if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI))
+		return;
+
+	if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR) &&
+	    guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
+		lbr_ctl_field = GUEST_IA32_LBR_CTL;
+
+	vmcs_write64(lbr_ctl_field, vmcs_read64(lbr_ctl_field) & ~0x1ULL);
 }
 
 static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 927a552393b9..e8df44faf900 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2002,6 +2002,13 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 						VM_EXIT_SAVE_DEBUG_CONTROLS)
 			get_vmcs12(vcpu)->guest_ia32_debugctl = data;
 
+		/*
+		 * For Arch LBR, IA32_DEBUGCTL[bit 0] has no meaning.
+		 * It can be written to 0 or 1, but reads will always return 0.
+		 */
+		if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
+			data &= ~DEBUGCTLMSR_LBR;
+
 		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
 		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
 		    (data & DEBUGCTLMSR_LBR))
@@ -4441,6 +4448,8 @@  static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 		vmcs_writel(GUEST_SYSENTER_ESP, 0);
 		vmcs_writel(GUEST_SYSENTER_EIP, 0);
 		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+		if (static_cpu_has(X86_FEATURE_ARCH_LBR))
+			vmcs_write64(GUEST_IA32_LBR_CTL, 0);
 	}
 
 	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);