@@ -296,6 +296,55 @@ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock_fastpath,
TP_ARGS(cgrp, cpu, contended)
);
+DECLARE_EVENT_CLASS(cgroup_ongoing,
+
+ TP_PROTO(struct cgroup *cgrp, struct cgroup *cgrp_ongoing, \
+ long res, unsigned int race, ktime_t ts),
+
+ TP_ARGS(cgrp, cgrp_ongoing, res, race, ts),
+
+ TP_STRUCT__entry(
+ __field( int, root )
+ __field( int, level )
+ __field( u64, id )
+ __field( u64, id_ongoing )
+ __field( ktime_t, ts )
+ __field( long, res )
+ __field( u64, race )
+ ),
+
+ TP_fast_assign(
+ __entry->root = cgrp->root->hierarchy_id;
+ __entry->id = cgroup_id(cgrp);
+ __entry->level = cgrp->level;
+ __entry->id_ongoing = cgroup_id(cgrp_ongoing);
+ __entry->res = res;
+ __entry->race = race;
+ __entry->ts = ts;
+ ),
+
+ TP_printk("root=%d id=%llu level=%d ongoing_flusher=%llu res=%ld race=%llu ts=%lld",
+ __entry->root, __entry->id, __entry->level,
+ __entry->id_ongoing, __entry->res, __entry->race, __entry->ts)
+);
+
+DEFINE_EVENT(cgroup_ongoing, cgroup_ongoing_flusher,
+
+ TP_PROTO(struct cgroup *cgrp, struct cgroup *cgrp_ongoing, \
+ long res, unsigned int race, ktime_t ts),
+
+ TP_ARGS(cgrp, cgrp_ongoing, res, race, ts)
+);
+
+DEFINE_EVENT(cgroup_ongoing, cgroup_ongoing_flusher_wait,
+
+ TP_PROTO(struct cgroup *cgrp, struct cgroup *cgrp_ongoing, \
+ long res, unsigned int race, ktime_t ts),
+
+ TP_ARGS(cgrp, cgrp_ongoing, res, race, ts)
+);
+
+
#endif /* _TRACE_CGROUP_H */
/* This part must be outside protection */
@@ -321,6 +321,7 @@ static inline void __cgroup_rstat_unlock(struct cgroup *cgrp, int cpu_in_loop)
static bool cgroup_rstat_trylock_flusher(struct cgroup *cgrp)
{
struct cgroup *ongoing;
+ unsigned int race = 0;
bool locked;
/* Check if ongoing flusher is already taking care of this, if
@@ -330,17 +331,25 @@ static bool cgroup_rstat_trylock_flusher(struct cgroup *cgrp)
retry:
ongoing = READ_ONCE(cgrp_rstat_ongoing_flusher);
if (ongoing && cgroup_is_descendant(cgrp, ongoing)) {
- wait_for_completion_interruptible_timeout(
+ ktime_t ts = ktime_get_mono_fast_ns();
+ long res = 0;
+
+ trace_cgroup_ongoing_flusher(cgrp, ongoing, 0, race, ts);
+
+ res = wait_for_completion_interruptible_timeout(
&ongoing->flush_done, MAX_WAIT);
- /* TODO: Add tracepoint here */
+ trace_cgroup_ongoing_flusher_wait(cgrp, ongoing, res, race, ts);
+
return false;
}
locked = __cgroup_rstat_trylock(cgrp, -1);
if (!locked) {
/* Contended: Handle loosing race for ongoing flusher */
- if (!ongoing && READ_ONCE(cgrp_rstat_ongoing_flusher))
+ if (!ongoing && READ_ONCE(cgrp_rstat_ongoing_flusher)) {
+ race++;
goto retry;
+ }
__cgroup_rstat_lock(cgrp, -1, false);
}
I'll be using this tracepoint in production and will report back on findings, e.g. measuring how often the race for ongoing flusher happens. Tthen we can decide if it is worth to keep/apply this patch. Signed-off-by: Jesper Dangaard Brouer <hawk@kernel.org> --- include/trace/events/cgroup.h | 49 +++++++++++++++++++++++++++++++++++++++++ kernel/cgroup/rstat.c | 15 ++++++++++--- 2 files changed, 61 insertions(+), 3 deletions(-)