@@ -381,7 +381,12 @@ static inline u64 vmx_get_perf_capabilities(void)
static inline u64 vmx_supported_debugctl(void)
{
- return DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF;
+ u64 debugctl = DEBUGCTLMSR_BTF;
+
+ if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
+ debugctl |= DEBUGCTLMSR_LBR;
+
+ return debugctl;
}
#endif /* __KVM_X86_VMX_CAPS_H */
@@ -235,6 +235,65 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
return pmc;
}
+static inline void intel_pmu_release_guest_lbr_event(struct kvm_vcpu *vcpu)
+{
+ struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
+
+ if (lbr_desc->event) {
+ perf_event_release_kernel(lbr_desc->event);
+ lbr_desc->event = NULL;
+ vcpu_to_pmu(vcpu)->event_count--;
+ }
+}
+
+int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu)
+{
+ struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
+ struct perf_event *event;
+
+ /*
+ * The perf_event_attr is constructed in the minimum efficient way:
+ * - set 'pinned = true' to make it task pinned so that if another
+ * cpu pinned event reclaims LBR, the event->oncpu will be set to -1;
+ * - set '.exclude_host = true' to record guest branches behavior;
+ *
+ * - set '.config = INTEL_FIXED_VLBR_EVENT' to indicates host perf
+ * schedule the event without a real HW counter but a fake one;
+ * check is_guest_lbr_event() and __intel_get_event_constraints();
+ *
+ * - set 'sample_type = PERF_SAMPLE_BRANCH_STACK' and
+ * 'branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
+ * PERF_SAMPLE_BRANCH_USER' to configure it as a LBR callstack
+ * event, which helps KVM to save/restore guest LBR records
+ * during host context switches and reduces quite a lot overhead,
+ * check branch_user_callstack() and intel_pmu_lbr_sched_task();
+ */
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(attr),
+ .config = INTEL_FIXED_VLBR_EVENT,
+ .sample_type = PERF_SAMPLE_BRANCH_STACK,
+ .pinned = true,
+ .exclude_host = true,
+ .branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
+ PERF_SAMPLE_BRANCH_USER,
+ };
+
+ if (unlikely(lbr_desc->event))
+ return 0;
+
+ event = perf_event_create_kernel_counter(&attr, -1,
+ current, NULL, NULL);
+ if (IS_ERR(event)) {
+ pr_debug_ratelimited("%s: failed %ld\n",
+ __func__, PTR_ERR(event));
+ return -ENOENT;
+ }
+ lbr_desc->event = event;
+ vcpu_to_pmu(vcpu)->event_count++;
+ return 0;
+}
+
static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -441,6 +500,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
vcpu->arch.perf_capabilities = guest_cpuid_has(vcpu, X86_FEATURE_PDCM) ?
vmx_get_perf_capabilities() : 0;
lbr_desc->records.nr = 0;
+ lbr_desc->event = NULL;
}
static void intel_pmu_reset(struct kvm_vcpu *vcpu)
@@ -465,6 +525,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
pmu->global_ovf_ctrl = 0;
+ intel_pmu_release_guest_lbr_event(vcpu);
}
struct kvm_pmu_ops intel_pmu_ops = {
@@ -1957,8 +1957,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
goto find_shared_msr;
case MSR_IA32_DEBUGCTLMSR:
- msr_info->data = 0;
- break;
+ msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ return 0;
default:
find_shared_msr:
msr = find_msr_entry(vmx, msr_info->index);
@@ -1982,6 +1982,16 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
return (unsigned long)data;
}
+static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu)
+{
+ u64 debugctl = vmx_supported_debugctl();
+
+ if (!intel_pmu_lbr_is_enabled(vcpu))
+ debugctl &= ~DEBUGCTLMSR_LBR;
+
+ return debugctl;
+}
+
/*
* Writes msr value into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
@@ -2037,10 +2047,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
VM_EXIT_SAVE_DEBUG_CONTROLS)
get_vmcs12(vcpu)->guest_ia32_debugctl = data;
- if (data & ~vmx_supported_debugctl())
+ if (data & ~vcpu_supported_debugctl(vcpu))
return 1;
- vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
- __func__, data);
+ if (data & DEBUGCTLMSR_BTF) {
+ vcpu_unimpl(vcpu, "%s: BTF in MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
+ __func__, data);
+ data &= ~DEBUGCTLMSR_BTF;
+ }
+ vmcs_write64(GUEST_IA32_DEBUGCTL, data);
+ if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
+ (data & DEBUGCTLMSR_LBR))
+ intel_pmu_create_guest_lbr_event(vcpu);
return 0;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
@@ -98,9 +98,19 @@ struct pt_desc {
bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu);
bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu);
+int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
+
struct lbr_desc {
/* Basic info about guest LBR records. */
struct x86_pmu_lbr records;
+
+ /*
+ * Emulate LBR feature via passthrough LBR registers when the
+ * per-vcpu guest LBR event is scheduled on the current pcpu.
+ *
+ * The records may be inaccurate if the host reclaims the LBR.
+ */
+ struct perf_event *event;
};
/*