diff mbox series

[v3,07/17] KVM: x86/pmu: Add IA32_DS_AREA MSR emulation to manage guest DS buffer

Message ID 20210104131542.495413-8-like.xu@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/pmu: Add support to enable Guest PEBS via DS | expand

Commit Message

Like Xu Jan. 4, 2021, 1:15 p.m. UTC
When CPUID.01H:EDX.DS[21] is set, the IA32_DS_AREA MSR exists and
points to the linear address of the first byte of the DS buffer
management area, which is used to manage the PEBS records.

When guest PEBS is enabled and the value is different from the
host, KVM will add the IA32_DS_AREA MSR to the msr-switch list.
The guest's DS value can be loaded to the real HW before VM-entry,
and will be removed when guest PEBS is disabled.

The WRMSR to IA32_DS_AREA MSR brings a #GP(0) if the source register
contains a non-canonical address. The switch of IA32_DS_AREA MSR would
also, setup a quiescent period to write the host PEBS records (if any)
to host DS area rather than guest DS area.

When guest PEBS is enabled, the MSR_IA32_DS_AREA MSR will be
added to the perf_guest_switch_msr() and switched during the
VMX transitions just like CORE_PERF_GLOBAL_CTRL MSR.

Originally-by: Andi Kleen <ak@linux.intel.com>
Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
---
 arch/x86/events/intel/core.c    | 13 +++++++++++++
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx/pmu_intel.c    | 11 +++++++++++
 arch/x86/kvm/vmx/vmx.c          |  6 ++++++
 4 files changed, 31 insertions(+)

Comments

Sean Christopherson Jan. 5, 2021, 9:16 p.m. UTC | #1
On Mon, Jan 04, 2021, Like Xu wrote:
> When CPUID.01H:EDX.DS[21] is set, the IA32_DS_AREA MSR exists and
> points to the linear address of the first byte of the DS buffer
> management area, which is used to manage the PEBS records.
> 
> When guest PEBS is enabled and the value is different from the
> host, KVM will add the IA32_DS_AREA MSR to the msr-switch list.
> The guest's DS value can be loaded to the real HW before VM-entry,
> and will be removed when guest PEBS is disabled.
> 
> The WRMSR to IA32_DS_AREA MSR brings a #GP(0) if the source register
> contains a non-canonical address. The switch of IA32_DS_AREA MSR would
> also, setup a quiescent period to write the host PEBS records (if any)
> to host DS area rather than guest DS area.
> 
> When guest PEBS is enabled, the MSR_IA32_DS_AREA MSR will be
> added to the perf_guest_switch_msr() and switched during the
> VMX transitions just like CORE_PERF_GLOBAL_CTRL MSR.
> 
> Originally-by: Andi Kleen <ak@linux.intel.com>
> Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> Signed-off-by: Like Xu <like.xu@linux.intel.com>
> ---
>  arch/x86/events/intel/core.c    | 13 +++++++++++++
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/vmx/pmu_intel.c    | 11 +++++++++++
>  arch/x86/kvm/vmx/vmx.c          |  6 ++++++
>  4 files changed, 31 insertions(+)
> 
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 6453b8a6834a..ccddda455bec 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -3690,6 +3690,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
>  {
>  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>  	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
> +	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
>  
>  	arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
>  	arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
> @@ -3735,6 +3736,18 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
>  		*nr = 2;
>  	}
>  
> +	if (arr[1].guest) {
> +		arr[2].msr = MSR_IA32_DS_AREA;
> +		arr[2].host = (unsigned long)ds;
> +		/* KVM will update MSR_IA32_DS_AREA with the trapped guest value. */
> +		arr[2].guest = 0ull;
> +		*nr = 3;
> +	} else if (*nr == 2) {
> +		arr[2].msr = MSR_IA32_DS_AREA;
> +		arr[2].host = arr[2].guest = 0;
> +		*nr = 3;
> +	}

Similar comments as the previous patch, please figure out a way to properly
integrate this into the PEBS logic instead of querying arr/nr.

> +
>  	return arr;
>  }
Xu, Like Jan. 8, 2021, 3:05 a.m. UTC | #2
Hi Sean,

On 2021/1/6 5:16, Sean Christopherson wrote:
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 6453b8a6834a..ccddda455bec 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -3690,6 +3690,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
>>   {
>>   	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>>   	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
>> +	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
>>   
>>   	arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
>>   	arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
>> @@ -3735,6 +3736,18 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
>>   		*nr = 2;
>>   	}
>>   
>> +	if (arr[1].guest) {
>> +		arr[2].msr = MSR_IA32_DS_AREA;
>> +		arr[2].host = (unsigned long)ds;
>> +		/* KVM will update MSR_IA32_DS_AREA with the trapped guest value. */
>> +		arr[2].guest = 0ull;
>> +		*nr = 3;
>> +	} else if (*nr == 2) {
>> +		arr[2].msr = MSR_IA32_DS_AREA;
>> +		arr[2].host = arr[2].guest = 0;
>> +		*nr = 3;
>> +	}
> Similar comments as the previous patch, please figure out a way to properly
> integrate this into the PEBS logic instead of querying arr/nr.

To address your comment, please help confirm whether you are
fine or happy with the streamlined logic of intel_guest_get_msrs():

static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
{
     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
     struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
     struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);

     arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
     arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
     arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;

     /*
      * Disable PEBS in the guest if PEBS is used by the host; enabling PEBS
      * in both will lead to unexpected PMIs in the host and/or missed PMIs
      * in the guest.
      */
     if (cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask) {
         if (x86_pmu.flags & PMU_FL_PEBS_ALL)
             arr[0].guest &= ~cpuc->pebs_enabled;
         else
             arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
     }
     *nr = 1;

     if (x86_pmu.pebs) {
         arr[1].msr = MSR_IA32_PEBS_ENABLE;
         arr[2].msr = MSR_IA32_DS_AREA;
         if (x86_pmu.intel_cap.pebs_baseline)
             arr[3].msr = MSR_PEBS_DATA_CFG;

         /* Skip the MSR loads by stuffing guest=host (KVM will remove the 
entry). */
         arr[1].guest = arr[1].host = cpuc->pebs_enabled & 
~cpuc->intel_ctrl_guest_mask;
         arr[2].guest = arr[2].host = (unsigned long)ds;
         if (x86_pmu.intel_cap.pebs_baseline)
             arr[3].guest = arr[3].host = cpuc->pebs_data_cfg;

         /*
          * Host and guest PEBS are mutually exclusive. Load the guest
          * value iff PEBS is disabled in the host.
          *
          * If PEBS is enabled in the host and the CPU supports PEBS isolation,
          * disabling the counters is sufficient (see commit 9b545c04abd4);
          * Without isolation, PEBS must be explicitly disabled prior to
          * VM-Enter to prevent PEBS writes from overshooting VM-Enter.
          *
          * KVM will update arr[2|3].guest with the trapped guest values
          * iff guest PEBS is allowed to be enabled.
          */
         if (!arr[1].host) {
             arr[1].guest = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
             arr[0].guest |= arr[1].guest;
         } else if (x86_pmu.pebs_no_isolation)
             arr[1].guest = 0;

         *nr = x86_pmu.intel_cap.pebs_baseline ? 4 : 3;
     }

     return arr;
}

---
thx,likexu

>
>> +
>>   	return arr;
>>   }
diff mbox series

Patch

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 6453b8a6834a..ccddda455bec 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3690,6 +3690,7 @@  static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
 
 	arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
 	arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
@@ -3735,6 +3736,18 @@  static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
 		*nr = 2;
 	}
 
+	if (arr[1].guest) {
+		arr[2].msr = MSR_IA32_DS_AREA;
+		arr[2].host = (unsigned long)ds;
+		/* KVM will update MSR_IA32_DS_AREA with the trapped guest value. */
+		arr[2].guest = 0ull;
+		*nr = 3;
+	} else if (*nr == 2) {
+		arr[2].msr = MSR_IA32_DS_AREA;
+		arr[2].host = arr[2].guest = 0;
+		*nr = 3;
+	}
+
 	return arr;
 }
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 88a403fa46d4..520a21af711b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -449,6 +449,7 @@  struct kvm_pmu {
 	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
 	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
 
+	u64 ds_area;
 	u64 pebs_enable;
 	u64 pebs_enable_mask;
 
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 2f10587bda19..ff5fc405703f 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -183,6 +183,9 @@  static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 	case MSR_IA32_PEBS_ENABLE:
 		ret = vcpu->arch.perf_capabilities & PERF_CAP_PEBS_FORMAT;
 		break;
+	case MSR_IA32_DS_AREA:
+		ret = guest_cpuid_has(vcpu, X86_FEATURE_DS);
+		break;
 	default:
 		ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
 			get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
@@ -227,6 +230,9 @@  static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_PEBS_ENABLE:
 		msr_info->data = pmu->pebs_enable;
 		return 0;
+	case MSR_IA32_DS_AREA:
+		msr_info->data = pmu->ds_area;
+		return 0;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -294,6 +300,11 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 0;
 		}
 		break;
+	case MSR_IA32_DS_AREA:
+		if (is_noncanonical_address(data, vcpu))
+			return 1;
+		pmu->ds_area = data;
+		return 0;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 09bc41c53cd8..42c65acc6c01 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -974,6 +974,7 @@  static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 			return;
 		}
 		break;
+	case MSR_IA32_DS_AREA:
 	case MSR_IA32_PEBS_ENABLE:
 		/* PEBS needs a quiescent period after being disabled (to write
 		 * a record).  Disabling PEBS through VMX MSR swapping doesn't
@@ -6522,12 +6523,17 @@  static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
 {
 	int i, nr_msrs;
 	struct perf_guest_switch_msr *msrs;
+	struct kvm_vcpu *vcpu = &vmx->vcpu;
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 
 	msrs = perf_guest_get_msrs(&nr_msrs);
 
 	if (!msrs)
 		return;
 
+	if (nr_msrs > 2 && msrs[1].guest)
+		msrs[2].guest = pmu->ds_area;
+
 	for (i = 0; i < nr_msrs; i++)
 		if (msrs[i].host == msrs[i].guest)
 			clear_atomic_switch_msr(vmx, msrs[i].msr);