@@ -7,7 +7,8 @@
#ifndef _LINUX_RV_H
#define _LINUX_RV_H
-#define MAX_DA_NAME_LEN 32
+#define MAX_DA_NAME_LEN 32
+#define MAX_DA_RETRY_RACING_EVENTS 3
#ifdef CONFIG_RV
/*
@@ -82,16 +82,19 @@ static inline void da_monitor_reset_##name(struct da_monitor *da_mon) \
*/ \
static inline type da_monitor_curr_state_##name(struct da_monitor *da_mon) \
{ \
- return da_mon->curr_state; \
+ return READ_ONCE(da_mon->curr_state); \
} \
\
/* \
* da_monitor_set_state_##name - set the new current state \
+ * \
+ * return false without the change in case the state was modified elsewhere \
*/ \
-static inline void \
-da_monitor_set_state_##name(struct da_monitor *da_mon, enum states_##name state) \
+static inline bool \
+da_monitor_set_state_##name(struct da_monitor *da_mon, enum states_##name prev_state, \
+ enum states_##name state) \
{ \
- da_mon->curr_state = state; \
+ return try_cmpxchg(&da_mon->curr_state, &prev_state, state); \
} \
\
/* \
@@ -150,17 +153,29 @@ static inline bool da_monitor_handling_event_##name(struct da_monitor *da_mon)
* Event handler for implicit monitors. Implicit monitor is the one which the
* handler does not need to specify which da_monitor to manipulate. Examples
* of implicit monitor are the per_cpu or the global ones.
+ *
+ * Retry, in case there is a race while getting and setting the next state
+ * return an invalid current state if we run out of retries. The monitor should
+ * be able to handle various orders.
*/
#define DECLARE_DA_MON_MODEL_HANDLER_IMPLICIT(name, type) \
\
static inline bool \
da_event_##name(struct da_monitor *da_mon, enum events_##name event) \
{ \
- type curr_state = da_monitor_curr_state_##name(da_mon); \
- type next_state = model_get_next_state_##name(curr_state, event); \
+ bool changed; \
+ type curr_state, next_state; \
\
- if (next_state != INVALID_STATE) { \
- da_monitor_set_state_##name(da_mon, next_state); \
+ for (int i = 0; i < MAX_DA_RETRY_RACING_EVENTS; i++) { \
+ curr_state = da_monitor_curr_state_##name(da_mon); \
+ next_state = model_get_next_state_##name(curr_state, event); \
+ if (next_state == INVALID_STATE) \
+ break; \
+ changed = da_monitor_set_state_##name(da_mon, curr_state, next_state); \
+ if (unlikely(!changed)) { \
+ curr_state = -1; \
+ continue; \
+ } \
\
trace_event_##name(model_get_state_name_##name(curr_state), \
model_get_event_name_##name(event), \
@@ -181,17 +196,29 @@ da_event_##name(struct da_monitor *da_mon, enum events_##name event) \
/*
* Event handler for per_task monitors.
+ *
+ * Retry, in case there is a race while getting and setting the next state
+ * return an invalid current state if we run out of retries. The monitor should
+ * be able to handle various orders.
*/
#define DECLARE_DA_MON_MODEL_HANDLER_PER_TASK(name, type) \
\
static inline bool da_event_##name(struct da_monitor *da_mon, struct task_struct *tsk, \
enum events_##name event) \
{ \
- type curr_state = da_monitor_curr_state_##name(da_mon); \
- type next_state = model_get_next_state_##name(curr_state, event); \
- \
- if (next_state != INVALID_STATE) { \
- da_monitor_set_state_##name(da_mon, next_state); \
+ bool changed; \
+ type curr_state, next_state; \
+ \
+ for (int i = 0; i < MAX_DA_RETRY_RACING_EVENTS; i++) { \
+ curr_state = da_monitor_curr_state_##name(da_mon); \
+ next_state = model_get_next_state_##name(curr_state, event); \
+ if (next_state == INVALID_STATE) \
+ break; \
+ changed = da_monitor_set_state_##name(da_mon, curr_state, next_state); \
+ if (unlikely(!changed)) { \
+ curr_state = -1; \
+ continue; \
+ } \
\
trace_event_##name(tsk->pid, \
model_get_state_name_##name(curr_state), \
DA monitor can be accessed from multiple cores simultaneously, this is likely, for instance when dealing with per-task monitors reacting on events that do not always occur on the CPU where the task is running. This can cause race conditions where two events change the next state and we see inconsistent values. E.g.: [62] event_srs: 27: sleepable x sched_wakeup -> running (final) [63] event_srs: 27: sleepable x sched_set_state_sleepable -> sleepable [63] error_srs: 27: event sched_switch_suspend not expected in the state running In this case the monitor fails because the event on CPU 62 wins against the one on CPU 63, although the correct state should have been sleepable, since the task get suspended. Detect if the current state was modified by using try_cmpxchg while storing the next value. If it was, try again reading the current state. After a maximum number of failed retries, react as if it was an error with invalid current state (we cannot determine it). Monitors where this type of condition can occur must be able to account for racing events in any possible order, as we cannot know the winner. Cc: Ingo Molnar <mingo@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Gabriele Monaco <gmonaco@redhat.com> --- include/linux/rv.h | 3 ++- include/rv/da_monitor.h | 53 +++++++++++++++++++++++++++++++---------- 2 files changed, 42 insertions(+), 14 deletions(-)