diff mbox series

[V9,03/18] perf/x86/intel: Handle guest PEBS overflow PMI for KVM guest

Message ID 20210722054159.4459-4-lingshan.zhu@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/pmu: Add *basic* support to enable guest PEBS via DS | expand

Commit Message

Zhu, Lingshan July 22, 2021, 5:41 a.m. UTC
From: Like Xu <like.xu@linux.intel.com>

With PEBS virtualization, the guest PEBS records get delivered to the
guest DS, and the host pmi handler uses perf_guest_cbs->is_in_guest()
to distinguish whether the PMI comes from the guest code like Intel PT.

No matter how many guest PEBS counters are overflowed, only triggering
one fake event is enough. The fake event causes the KVM PMI callback to
be called, thereby injecting the PEBS overflow PMI into the guest.

KVM may inject the PMI with BUFFER_OVF set, even if the guest DS is
empty. That should really be harmless. Thus guest PEBS handler would
retrieve the correct information from its own PEBS records buffer.

Originally-by: Andi Kleen <ak@linux.intel.com>
Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
---
 arch/x86/events/intel/core.c | 45 ++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

Comments

Liuxiangdong Aug. 5, 2021, 1:15 a.m. UTC | #1
On 2021/7/22 13:41, Zhu Lingshan wrote:
> From: Like Xu <like.xu@linux.intel.com>
>
> With PEBS virtualization, the guest PEBS records get delivered to the
> guest DS, and the host pmi handler uses perf_guest_cbs->is_in_guest()
> to distinguish whether the PMI comes from the guest code like Intel PT.
>
> No matter how many guest PEBS counters are overflowed, only triggering
> one fake event is enough. The fake event causes the KVM PMI callback to
> be called, thereby injecting the PEBS overflow PMI into the guest.
>
> KVM may inject the PMI with BUFFER_OVF set, even if the guest DS is
> empty. That should really be harmless. Thus guest PEBS handler would
> retrieve the correct information from its own PEBS records buffer.
>
> Originally-by: Andi Kleen <ak@linux.intel.com>
> Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> Signed-off-by: Like Xu <like.xu@linux.intel.com>
> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
> ---
>   arch/x86/events/intel/core.c | 45 ++++++++++++++++++++++++++++++++++++
>   1 file changed, 45 insertions(+)
>
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index da835f5a37e2..2eceb73cd303 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2783,6 +2783,50 @@ static void intel_pmu_reset(void)
>   }
>   
>   DECLARE_STATIC_CALL(x86_guest_handle_intel_pt_intr, *(perf_guest_cbs->handle_intel_pt_intr));
> +DECLARE_STATIC_CALL(x86_guest_state, *(perf_guest_cbs->state));
> +
> +/*
> + * We may be running with guest PEBS events created by KVM, and the
> + * PEBS records are logged into the guest's DS and invisible to host.
> + *
> + * In the case of guest PEBS overflow, we only trigger a fake event
> + * to emulate the PEBS overflow PMI for guest PBES counters in KVM.
> + * The guest will then vm-entry and check the guest DS area to read
> + * the guest PEBS records.
> + *
> + * The contents and other behavior of the guest event do not matter.
> + */
> +static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
> +				      struct perf_sample_data *data)
> +{
> +	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +	u64 guest_pebs_idxs = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;

guest_pebs_idxs has been defined here.

> +	struct perf_event *event = NULL;
> +	unsigned int guest = 0;
> +	int bit;
> +
> +	guest = static_call(x86_guest_state)();
> +	if (!(guest & PERF_GUEST_ACTIVE))
> +		return;
> +
> +	if (!x86_pmu.pebs_vmx || !x86_pmu.pebs_active ||
> +	    !(cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask))
> +		return;
> +
Why not use guest_pebs_idxs?

+	if (!x86_pmu.pebs_vmx || !x86_pmu.pebs_active ||
+	    !guest_pebs_idxs)
+		return;


> +	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs,
> +			 INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) {
> +		event = cpuc->events[bit];
> +		if (!event->attr.precise_ip)
> +			continue;
> +
> +		perf_sample_data_init(data, 0, event->hw.last_period);
> +		if (perf_event_overflow(event, data, regs))
> +			x86_pmu_stop(event, 0);
> +
> +		/* Inject one fake event is enough. */
> +		break;
> +	}
> +}
>   
>   static int handle_pmi_common(struct pt_regs *regs, u64 status)
>   {
> @@ -2835,6 +2879,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
>   		u64 pebs_enabled = cpuc->pebs_enabled;
>   
>   		handled++;
> +		x86_pmu_handle_guest_pebs(regs, &data);
>   		x86_pmu.drain_pebs(regs, &data);
>   		status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
>
Zhu, Lingshan Aug. 6, 2021, 12:23 p.m. UTC | #2
On 8/5/2021 9:15 AM, Liuxiangdong wrote:
>
>
> On 2021/7/22 13:41, Zhu Lingshan wrote:
>> From: Like Xu <like.xu@linux.intel.com>
>>
>> With PEBS virtualization, the guest PEBS records get delivered to the
>> guest DS, and the host pmi handler uses perf_guest_cbs->is_in_guest()
>> to distinguish whether the PMI comes from the guest code like Intel PT.
>>
>> No matter how many guest PEBS counters are overflowed, only triggering
>> one fake event is enough. The fake event causes the KVM PMI callback to
>> be called, thereby injecting the PEBS overflow PMI into the guest.
>>
>> KVM may inject the PMI with BUFFER_OVF set, even if the guest DS is
>> empty. That should really be harmless. Thus guest PEBS handler would
>> retrieve the correct information from its own PEBS records buffer.
>>
>> Originally-by: Andi Kleen <ak@linux.intel.com>
>> Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>> Signed-off-by: Like Xu <like.xu@linux.intel.com>
>> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
>> ---
>>   arch/x86/events/intel/core.c | 45 ++++++++++++++++++++++++++++++++++++
>>   1 file changed, 45 insertions(+)
>>
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index da835f5a37e2..2eceb73cd303 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -2783,6 +2783,50 @@ static void intel_pmu_reset(void)
>>   }
>>     DECLARE_STATIC_CALL(x86_guest_handle_intel_pt_intr, 
>> *(perf_guest_cbs->handle_intel_pt_intr));
>> +DECLARE_STATIC_CALL(x86_guest_state, *(perf_guest_cbs->state));
>> +
>> +/*
>> + * We may be running with guest PEBS events created by KVM, and the
>> + * PEBS records are logged into the guest's DS and invisible to host.
>> + *
>> + * In the case of guest PEBS overflow, we only trigger a fake event
>> + * to emulate the PEBS overflow PMI for guest PBES counters in KVM.
>> + * The guest will then vm-entry and check the guest DS area to read
>> + * the guest PEBS records.
>> + *
>> + * The contents and other behavior of the guest event do not matter.
>> + */
>> +static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
>> +                      struct perf_sample_data *data)
>> +{
>> +    struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>> +    u64 guest_pebs_idxs = cpuc->pebs_enabled & 
>> ~cpuc->intel_ctrl_host_mask;
>
> guest_pebs_idxs has been defined here.
>
>> +    struct perf_event *event = NULL;
>> +    unsigned int guest = 0;
>> +    int bit;
>> +
>> +    guest = static_call(x86_guest_state)();
>> +    if (!(guest & PERF_GUEST_ACTIVE))
>> +        return;
>> +
>> +    if (!x86_pmu.pebs_vmx || !x86_pmu.pebs_active ||
>> +        !(cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask))
>> +        return;
>> +
> Why not use guest_pebs_idxs?
>
> +    if (!x86_pmu.pebs_vmx || !x86_pmu.pebs_active ||
> +        !guest_pebs_idxs)
> +        return;
Thanks, I have apply this change in V10

Thanks
>
>
>> + for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs,
>> +             INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) {
>> +        event = cpuc->events[bit];
>> +        if (!event->attr.precise_ip)
>> +            continue;
>> +
>> +        perf_sample_data_init(data, 0, event->hw.last_period);
>> +        if (perf_event_overflow(event, data, regs))
>> +            x86_pmu_stop(event, 0);
>> +
>> +        /* Inject one fake event is enough. */
>> +        break;
>> +    }
>> +}
>>     static int handle_pmi_common(struct pt_regs *regs, u64 status)
>>   {
>> @@ -2835,6 +2879,7 @@ static int handle_pmi_common(struct pt_regs 
>> *regs, u64 status)
>>           u64 pebs_enabled = cpuc->pebs_enabled;
>>             handled++;
>> +        x86_pmu_handle_guest_pebs(regs, &data);
>>           x86_pmu.drain_pebs(regs, &data);
>>           status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
>
diff mbox series

Patch

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index da835f5a37e2..2eceb73cd303 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2783,6 +2783,50 @@  static void intel_pmu_reset(void)
 }
 
 DECLARE_STATIC_CALL(x86_guest_handle_intel_pt_intr, *(perf_guest_cbs->handle_intel_pt_intr));
+DECLARE_STATIC_CALL(x86_guest_state, *(perf_guest_cbs->state));
+
+/*
+ * We may be running with guest PEBS events created by KVM, and the
+ * PEBS records are logged into the guest's DS and invisible to host.
+ *
+ * In the case of guest PEBS overflow, we only trigger a fake event
+ * to emulate the PEBS overflow PMI for guest PBES counters in KVM.
+ * The guest will then vm-entry and check the guest DS area to read
+ * the guest PEBS records.
+ *
+ * The contents and other behavior of the guest event do not matter.
+ */
+static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
+				      struct perf_sample_data *data)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	u64 guest_pebs_idxs = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
+	struct perf_event *event = NULL;
+	unsigned int guest = 0;
+	int bit;
+
+	guest = static_call(x86_guest_state)();
+	if (!(guest & PERF_GUEST_ACTIVE))
+		return;
+
+	if (!x86_pmu.pebs_vmx || !x86_pmu.pebs_active ||
+	    !(cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask))
+		return;
+
+	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs,
+			 INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) {
+		event = cpuc->events[bit];
+		if (!event->attr.precise_ip)
+			continue;
+
+		perf_sample_data_init(data, 0, event->hw.last_period);
+		if (perf_event_overflow(event, data, regs))
+			x86_pmu_stop(event, 0);
+
+		/* Inject one fake event is enough. */
+		break;
+	}
+}
 
 static int handle_pmi_common(struct pt_regs *regs, u64 status)
 {
@@ -2835,6 +2879,7 @@  static int handle_pmi_common(struct pt_regs *regs, u64 status)
 		u64 pebs_enabled = cpuc->pebs_enabled;
 
 		handled++;
+		x86_pmu_handle_guest_pebs(regs, &data);
 		x86_pmu.drain_pebs(regs, &data);
 		status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;