Message ID | 20240801045907.4010984-11-mizhang@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Mediated Passthrough vPMU 3.0 for x86 | expand |
On Thu, Aug 01, 2024 at 04:58:19AM +0000, Mingwei Zhang wrote: > +void perf_guest_exit(void) > +{ > + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); > + > + lockdep_assert_irqs_disabled(); > + > + perf_ctx_lock(cpuctx, cpuctx->task_ctx); > + > + if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest))) > + goto unlock; > + > + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); > + ctx_sched_in(&cpuctx->ctx, EVENT_GUEST); > + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); > + if (cpuctx->task_ctx) { > + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); > + ctx_sched_in(cpuctx->task_ctx, EVENT_GUEST); > + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); > + } Does this not violate the scheduling order of events? AFAICT this will do: cpu pinned cpu flexible task pinned task flexible as opposed to: cpu pinned task pinned cpu flexible task flexible We have the perf_event_sched_in() helper for this. > + > + __this_cpu_write(perf_in_guest, false); > +unlock: > + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); > +} > +EXPORT_SYMBOL_GPL(perf_guest_exit);
On 2024-10-14 7:20 a.m., Peter Zijlstra wrote: > On Thu, Aug 01, 2024 at 04:58:19AM +0000, Mingwei Zhang wrote: >> +void perf_guest_exit(void) >> +{ >> + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); >> + >> + lockdep_assert_irqs_disabled(); >> + >> + perf_ctx_lock(cpuctx, cpuctx->task_ctx); >> + >> + if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest))) >> + goto unlock; >> + >> + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); >> + ctx_sched_in(&cpuctx->ctx, EVENT_GUEST); >> + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); >> + if (cpuctx->task_ctx) { >> + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); >> + ctx_sched_in(cpuctx->task_ctx, EVENT_GUEST); >> + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); >> + } > > Does this not violate the scheduling order of events? AFAICT this will > do: > > cpu pinned > cpu flexible > task pinned > task flexible > > as opposed to: > > cpu pinned > task pinned > cpu flexible > task flexible > > We have the perf_event_sched_in() helper for this. Yes, we can avoid the sched_in() with EVENT_GUEST flag, then invoke the perf_event_sched_in() helper to do the real schedule. I will do more tests to double check. Thanks, Kan > >> + >> + __this_cpu_write(perf_in_guest, false); >> +unlock: >> + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); >> +} >> +EXPORT_SYMBOL_GPL(perf_guest_exit); >
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 81a5f8399cb8..75773f9890cc 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1738,6 +1738,8 @@ extern int perf_event_period(struct perf_event *event, u64 value); extern u64 perf_event_pause(struct perf_event *event, bool reset); int perf_get_mediated_pmu(void); void perf_put_mediated_pmu(void); +void perf_guest_enter(void); +void perf_guest_exit(void); #else /* !CONFIG_PERF_EVENTS: */ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, @@ -1831,6 +1833,8 @@ static inline int perf_get_mediated_pmu(void) } static inline void perf_put_mediated_pmu(void) { } +static inline void perf_guest_enter(void) { } +static inline void perf_guest_exit(void) { } #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) diff --git a/kernel/events/core.c b/kernel/events/core.c index 57648736e43e..57ff737b922b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5941,6 +5941,60 @@ void perf_put_mediated_pmu(void) } EXPORT_SYMBOL_GPL(perf_put_mediated_pmu); +/* When entering a guest, schedule out all exclude_guest events. */ +void perf_guest_enter(void) +{ + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); + + lockdep_assert_irqs_disabled(); + + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + + if (WARN_ON_ONCE(__this_cpu_read(perf_in_guest))) + goto unlock; + + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); + ctx_sched_out(&cpuctx->ctx, EVENT_GUEST); + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); + if (cpuctx->task_ctx) { + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); + task_ctx_sched_out(cpuctx->task_ctx, EVENT_GUEST); + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); + } + + __this_cpu_write(perf_in_guest, true); + +unlock: + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); +} +EXPORT_SYMBOL_GPL(perf_guest_enter); + +void perf_guest_exit(void) +{ + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); + + lockdep_assert_irqs_disabled(); + + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + + if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest))) + goto unlock; + + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); + ctx_sched_in(&cpuctx->ctx, EVENT_GUEST); + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); + if (cpuctx->task_ctx) { + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); + ctx_sched_in(cpuctx->task_ctx, EVENT_GUEST); + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); + } + + __this_cpu_write(perf_in_guest, false); +unlock: + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); +} +EXPORT_SYMBOL_GPL(perf_guest_exit); + /* * Holding the top-level event's child_mutex means that any * descendant process that has inherited this event will block