Message ID | 20220823093221.38075-7-likexu@tencent.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | x86/pmu: Corner cases fixes and optimization | expand |
On Tue, Aug 23, 2022, Like Xu wrote: > From: Like Xu <likexu@tencent.com> > > There are contextual restrictions on the functions that can be called > in the *_exit_handlers_fastpath path, for example calling > pmc_reprogram_counter() brings up a host complaint like: State the actual problem instead of forcing the reader to decipher that from the stacktrace. > [*] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580 > [*] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2981888, name: CPU 15/KVM > [*] preempt_count: 1, expected: 0 > [*] RCU nest depth: 0, expected: 0 > [*] INFO: lockdep is turned off. > [*] irq event stamp: 0 > [*] hardirqs last enabled at (0): [<0000000000000000>] 0x0 > [*] hardirqs last disabled at (0): [<ffffffff8121222a>] copy_process+0x146a/0x62d0 > [*] softirqs last enabled at (0): [<ffffffff81212269>] copy_process+0x14a9/0x62d0 > [*] softirqs last disabled at (0): [<0000000000000000>] 0x0 > [*] Preemption disabled at: > [*] [<ffffffffc2063fc1>] vcpu_enter_guest+0x1001/0x3dc0 [kvm] > [*] CPU: 17 PID: 2981888 Comm: CPU 15/KVM Kdump: 5.19.0-rc1-g239111db364c-dirty #2 > [*] Call Trace: > [*] <TASK> > [*] dump_stack_lvl+0x6c/0x9b > [*] __might_resched.cold+0x22e/0x297 > [*] __mutex_lock+0xc0/0x23b0 > [*] perf_event_ctx_lock_nested+0x18f/0x340 > [*] perf_event_pause+0x1a/0x110 > [*] reprogram_counter+0x2af/0x1490 [kvm] > [*] kvm_pmu_trigger_event+0x429/0x950 [kvm] > [*] kvm_skip_emulated_instruction+0x48/0x90 [kvm] > [*] handle_fastpath_set_msr_irqoff+0x349/0x3b0 [kvm] > [*] vmx_vcpu_run+0x268e/0x3b80 [kvm_intel] > [*] vcpu_enter_guest+0x1d22/0x3dc0 [kvm] > > A new stale_counter field is introduced to keep this part of the semantics > invariant. It records the current counter value and it's used to determine > whether to inject an emulated overflow interrupt in the later > kvm_pmu_handle_event(), given that the internal count value from its > perf_event has not been added to pmc->counter in time, or the guest > will update the value of a running counter directly. Describe what the change is at a high level, don't give a play-by-play of the code changes. Defer reprogramming counters and handling overflow via KVM_REQ_PMU when incrementing counters. KVM skips emulated WRMSR in the VM-Exit fastpath, the fastpath runs with IRQs disabled, skipping instructions can increment and reprogram counters, reprogramming counters can sleep, and sleeping is disallowed while IRQs are disabled. <stack trace> Add a field to kvm_pmc to track the previous counter value in order to defer overflow detection to kvm_pmu_handle_event() (reprogramming must be done before handling overflow). > Opportunistically shrink sizeof(struct kvm_pmc) a bit. > > Suggested-by: Wanpeng Li <wanpengli@tencent.com> > Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions") > Signed-off-by: Like Xu <likexu@tencent.com> > --- > arch/x86/include/asm/kvm_host.h | 5 +++-- > arch/x86/kvm/pmu.c | 15 ++++++++------- > arch/x86/kvm/svm/pmu.c | 2 +- > arch/x86/kvm/vmx/pmu_intel.c | 4 ++-- > 4 files changed, 14 insertions(+), 12 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 4e568a7ef464..ffd982bf015d 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -488,7 +488,10 @@ enum pmc_type { > struct kvm_pmc { > enum pmc_type type; > u8 idx; > + bool is_paused; > + bool intr; > u64 counter; > + u64 stale_counter; Use "prev_counter", "stale" makes it sound like a flag, e.g. "this counter is stale". > u64 eventsel; > struct perf_event *perf_event; > struct kvm_vcpu *vcpu; > @@ -498,8 +501,6 @@ struct kvm_pmc { > * ctrl value for fixed counters. > */ > u64 current_config; > - bool is_paused; > - bool intr; > }; > > #define KVM_PMC_MAX_FIXED 3 > diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c > index 6940cbeee54d..45d062cb1dd5 100644 > --- a/arch/x86/kvm/pmu.c > +++ b/arch/x86/kvm/pmu.c > @@ -350,6 +350,12 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) > } > > __reprogram_counter(pmc); > + > + if (pmc->stale_counter) { This check is unnecessary. The values are unsigned, so counter can't be less than the previous value if the previous value was '0'. > + if (pmc->counter < pmc->stale_counter) > + __kvm_perf_overflow(pmc, false); > + pmc->stale_counter = 0; > + }
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4e568a7ef464..ffd982bf015d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -488,7 +488,10 @@ enum pmc_type { struct kvm_pmc { enum pmc_type type; u8 idx; + bool is_paused; + bool intr; u64 counter; + u64 stale_counter; u64 eventsel; struct perf_event *perf_event; struct kvm_vcpu *vcpu; @@ -498,8 +501,6 @@ struct kvm_pmc { * ctrl value for fixed counters. */ u64 current_config; - bool is_paused; - bool intr; }; #define KVM_PMC_MAX_FIXED 3 diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 6940cbeee54d..45d062cb1dd5 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -350,6 +350,12 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) } __reprogram_counter(pmc); + + if (pmc->stale_counter) { + if (pmc->counter < pmc->stale_counter) + __kvm_perf_overflow(pmc, false); + pmc->stale_counter = 0; + } } /* @@ -522,14 +528,9 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu) static void kvm_pmu_incr_counter(struct kvm_pmc *pmc) { - u64 prev_count; - - prev_count = pmc->counter; + pmc->stale_counter = pmc->counter; pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc); - - __reprogram_counter(pmc); - if (pmc->counter < prev_count) - __kvm_perf_overflow(pmc, false); + reprogram_counter(pmc); } static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index f24613a108c5..e9c66dd659a6 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -290,7 +290,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) struct kvm_pmc *pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); - pmc->counter = pmc->eventsel = 0; + pmc->counter = pmc->stale_counter = pmc->eventsel = 0; } } diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 6242b0b81116..42b591755010 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -647,14 +647,14 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); - pmc->counter = pmc->eventsel = 0; + pmc->counter = pmc->stale_counter = pmc->eventsel = 0; } for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { pmc = &pmu->fixed_counters[i]; pmc_stop_counter(pmc); - pmc->counter = 0; + pmc->counter = pmc->stale_counter = 0; } pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0;