diff mbox series

[V7,15/18] KVM: x86/pmu: Disable guest PEBS temporarily in two rare situations

Message ID 20210622094306.8336-16-lingshan.zhu@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/pmu: Add *basic* support to enable guest PEBS via DS | expand

Commit Message

Zhu, Lingshan June 22, 2021, 9:43 a.m. UTC
From: Like Xu <like.xu@linux.intel.com>

The guest PEBS will be disabled when some users try to perf KVM and
its user-space through the same PEBS facility OR when the host perf
doesn't schedule the guest PEBS counter in a one-to-one mapping manner
(neither of these are typical scenarios).

The PEBS records in the guest DS buffer are still accurate and the
above two restrictions will be checked before each vm-entry only if
guest PEBS is deemed to be enabled.

Suggested-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
---
 arch/x86/events/intel/core.c    | 11 +++++++++--
 arch/x86/include/asm/kvm_host.h |  9 +++++++++
 arch/x86/kvm/vmx/pmu_intel.c    | 19 +++++++++++++++++++
 arch/x86/kvm/vmx/vmx.c          |  4 ++++
 arch/x86/kvm/vmx/vmx.h          |  1 +
 5 files changed, 42 insertions(+), 2 deletions(-)

Comments

Peter Zijlstra July 2, 2021, 12:46 p.m. UTC | #1
On Tue, Jun 22, 2021 at 05:43:03PM +0800, Zhu Lingshan wrote:
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 22386c1a32b4..8bf494f8af3e 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -3970,8 +3970,15 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
>  		.guest = pebs_mask & ~cpuc->intel_ctrl_host_mask,
>  	};
>  
> -	/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
> -	arr[0].guest |= arr[*nr].guest;
> +	if (arr[*nr].host) {
> +		/* Disable guest PEBS if host PEBS is enabled. */
> +		arr[*nr].guest = 0;
> +	} else {
> +		/* Disable guest PEBS for cross-mapped PEBS counters. */
> +		arr[*nr].guest &= ~pmu->host_cross_mapped_mask;
> +		/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
> +		arr[0].guest |= arr[*nr].guest;
> +	}

Not saying I disagree, but is there any way for the guest to figure out
why things aren't working? Is there like a guest log we can dump
something in?

> +void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
> +{
> +	struct kvm_pmc *pmc = NULL;
> +	int bit;
> +
> +	for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl,
> +			 X86_PMC_IDX_MAX) {
> +		pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
> +
> +		if (!pmc || !pmc_speculative_in_use(pmc) ||
> +		    !pmc_is_enabled(pmc))
> +			continue;
> +
> +		if (pmc->perf_event && (pmc->idx != pmc->perf_event->hw.idx))
> +			pmu->host_cross_mapped_mask |=
> +				BIT_ULL(pmc->perf_event->hw.idx);

{ } again.

> +	}
> +}
Zhu, Lingshan July 8, 2021, 8:52 a.m. UTC | #2
On 7/2/2021 8:46 PM, Peter Zijlstra wrote:
> On Tue, Jun 22, 2021 at 05:43:03PM +0800, Zhu Lingshan wrote:
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 22386c1a32b4..8bf494f8af3e 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -3970,8 +3970,15 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
>>   		.guest = pebs_mask & ~cpuc->intel_ctrl_host_mask,
>>   	};
>>   
>> -	/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
>> -	arr[0].guest |= arr[*nr].guest;
>> +	if (arr[*nr].host) {
>> +		/* Disable guest PEBS if host PEBS is enabled. */
>> +		arr[*nr].guest = 0;
>> +	} else {
>> +		/* Disable guest PEBS for cross-mapped PEBS counters. */
>> +		arr[*nr].guest &= ~pmu->host_cross_mapped_mask;
>> +		/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
>> +		arr[0].guest |= arr[*nr].guest;
>> +	}
> Not saying I disagree, but is there any way for the guest to figure out
> why things aren't working? Is there like a guest log we can dump
> something in?
Hi Peter,

We expect to handle these cases in the "slow path" series, try to 
cross-map the counters.

Thanks!
>
>> +void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
>> +{
>> +	struct kvm_pmc *pmc = NULL;
>> +	int bit;
>> +
>> +	for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl,
>> +			 X86_PMC_IDX_MAX) {
>> +		pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
>> +
>> +		if (!pmc || !pmc_speculative_in_use(pmc) ||
>> +		    !pmc_is_enabled(pmc))
>> +			continue;
>> +
>> +		if (pmc->perf_event && (pmc->idx != pmc->perf_event->hw.idx))
>> +			pmu->host_cross_mapped_mask |=
>> +				BIT_ULL(pmc->perf_event->hw.idx);
> { } again.
>
>> +	}
>> +}
diff mbox series

Patch

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 22386c1a32b4..8bf494f8af3e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3970,8 +3970,15 @@  static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
 		.guest = pebs_mask & ~cpuc->intel_ctrl_host_mask,
 	};
 
-	/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
-	arr[0].guest |= arr[*nr].guest;
+	if (arr[*nr].host) {
+		/* Disable guest PEBS if host PEBS is enabled. */
+		arr[*nr].guest = 0;
+	} else {
+		/* Disable guest PEBS for cross-mapped PEBS counters. */
+		arr[*nr].guest &= ~pmu->host_cross_mapped_mask;
+		/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
+		arr[0].guest |= arr[*nr].guest;
+	}
 
 	++(*nr);
 	return arr;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ef22a742649b..e21989650d2a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -479,6 +479,15 @@  struct kvm_pmu {
 	u64 pebs_data_cfg;
 	u64 pebs_data_cfg_mask;
 
+	/*
+	 * If a guest counter is cross-mapped to host counter with different
+	 * index, its PEBS capability will be temporarily disabled.
+	 *
+	 * The user should make sure that this mask is updated
+	 * after disabling interrupts and before perf_guest_get_msrs();
+	 */
+	u64 host_cross_mapped_mask;
+
 	/*
 	 * The gate to release perf_events not marked in
 	 * pmc_in_use only once in a vcpu time slice.
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 296246bf253d..28152d7fd12d 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -770,6 +770,25 @@  static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
 		intel_pmu_release_guest_lbr_event(vcpu);
 }
 
+void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
+{
+	struct kvm_pmc *pmc = NULL;
+	int bit;
+
+	for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl,
+			 X86_PMC_IDX_MAX) {
+		pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
+
+		if (!pmc || !pmc_speculative_in_use(pmc) ||
+		    !pmc_is_enabled(pmc))
+			continue;
+
+		if (pmc->perf_event && (pmc->idx != pmc->perf_event->hw.idx))
+			pmu->host_cross_mapped_mask |=
+				BIT_ULL(pmc->perf_event->hw.idx);
+	}
+}
+
 struct kvm_pmu_ops intel_pmu_ops = {
 	.find_arch_event = intel_find_arch_event,
 	.find_fixed_event = intel_find_fixed_event,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3930e89679fc..299f75747936 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6597,6 +6597,10 @@  static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
 	struct perf_guest_switch_msr *msrs;
 	struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu);
 
+	pmu->host_cross_mapped_mask = 0;
+	if (pmu->pebs_enable & pmu->global_ctrl)
+		intel_pmu_cross_mapped_check(pmu);
+
 	/* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */
 	msrs = perf_guest_get_msrs(&nr_msrs, (void *)pmu);
 	if (!msrs)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 16e4e457ba23..72f1175e474b 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -96,6 +96,7 @@  union vmx_exit_reason {
 #define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
 #define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
 
+void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
 bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu);
 bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu);