===================================================================
@@ -135,7 +135,8 @@ extern bool cpuidle_not_available(struct
struct cpuidle_device *dev);
extern int cpuidle_select(struct cpuidle_driver *drv,
- struct cpuidle_device *dev);
+ struct cpuidle_device *dev,
+ bool *stop_tick);
extern int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index);
extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
@@ -167,7 +168,7 @@ static inline bool cpuidle_not_available
struct cpuidle_device *dev)
{return true; }
static inline int cpuidle_select(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+ struct cpuidle_device *dev, bool *stop_tick)
{return -ENODEV; }
static inline int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index)
@@ -250,7 +251,8 @@ struct cpuidle_governor {
struct cpuidle_device *dev);
int (*select) (struct cpuidle_driver *drv,
- struct cpuidle_device *dev);
+ struct cpuidle_device *dev,
+ bool *stop_tick);
void (*reflect) (struct cpuidle_device *dev, int index);
};
===================================================================
@@ -188,13 +188,15 @@ static void cpuidle_idle_call(void)
next_state = cpuidle_find_deepest_state(drv, dev);
call_cpuidle(drv, dev, next_state);
} else {
+ bool stop_tick = true;
+
tick_nohz_idle_stop_tick();
rcu_idle_enter();
/*
* Ask the cpuidle framework to choose a convenient idle state.
*/
- next_state = cpuidle_select(drv, dev);
+ next_state = cpuidle_select(drv, dev, &stop_tick);
entered_state = call_cpuidle(drv, dev, next_state);
/*
* Give the governor an opportunity to reflect on the outcome
===================================================================
@@ -272,12 +272,18 @@ int cpuidle_enter_state(struct cpuidle_d
*
* @drv: the cpuidle driver
* @dev: the cpuidle device
+ * @stop_tick: indication on whether or not to stop the tick
*
* Returns the index of the idle state. The return value must not be negative.
+ *
+ * The memory location pointed to by @stop_tick is expected to be written the
+ * 'false' boolean value if the scheduler tick should not be stopped before
+ * entering the returned state.
*/
-int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ bool *stop_tick)
{
- return cpuidle_curr_governor->select(drv, dev);
+ return cpuidle_curr_governor->select(drv, dev, stop_tick);
}
/**
===================================================================
@@ -63,9 +63,10 @@ static inline void ladder_do_selection(s
* ladder_select_state - selects the next state to enter
* @drv: cpuidle driver
* @dev: the CPU
+ * @dummy: not used
*/
static int ladder_select_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+ struct cpuidle_device *dev, bool *dummy)
{
struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
struct device *device = get_cpu_device(dev->cpu);
===================================================================
@@ -279,8 +279,10 @@ again:
* menu_select - selects the next idle state to enter
* @drv: cpuidle driver containing state data
* @dev: the CPU
+ * @stop_tick: indication on whether or not to stop the tick
*/
-static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ bool *stop_tick)
{
struct menu_device *data = this_cpu_ptr(&menu_devices);
struct device *device = get_cpu_device(dev->cpu);
@@ -303,8 +305,10 @@ static int menu_select(struct cpuidle_dr
latency_req = resume_latency;
/* Special case when user has set very strict latency requirement */
- if (unlikely(latency_req == 0))
+ if (unlikely(latency_req == 0)) {
+ *stop_tick = false;
return 0;
+ }
/* determine the expected residency time, round up */
data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
@@ -354,6 +358,7 @@ static int menu_select(struct cpuidle_dr
if (latency_req > interactivity_req)
latency_req = interactivity_req;
+ expected_interval = data->predicted_us;
/*
* Find the idle state with the lowest power while satisfying
* our constraints.
@@ -369,15 +374,30 @@ static int menu_select(struct cpuidle_dr
idx = i; /* first enabled state */
if (s->target_residency > data->predicted_us)
break;
- if (s->exit_latency > latency_req)
+ if (s->exit_latency > latency_req) {
+ /*
+ * If we break out of the loop for latency reasons, use
+ * the target residency of the selected state as the
+ * expected idle duration so that the tick is retained
+ * as long as that target residency is low enough.
+ */
+ expected_interval = drv->states[idx].target_residency;
break;
-
+ }
idx = i;
}
if (idx == -1)
idx = 0; /* No states enabled. Must use 0. */
+ /*
+ * Don't stop the tick if the selected state is a polling one or if the
+ * expected idle duration is shorter than the tick period length.
+ */
+ if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
+ expected_interval < TICK_USEC)
+ *stop_tick = false;
+
data->last_state_idx = idx;
return data->last_state_idx;
@@ -396,7 +416,34 @@ static void menu_reflect(struct cpuidle_
struct menu_device *data = this_cpu_ptr(&menu_devices);
data->last_state_idx = index;
- data->needs_update = 1;
+ /*
+ * Tick wakeups occurring when the tick_nohz_get_sleep_length() return
+ * value is within the tick boundary should be treated as regular ones,
+ * as the nohz code itself doesn't stop the tick then.
+ */
+ if (tick_nohz_idle_got_tick() && data->next_timer_us > TICK_USEC) {
+ unsigned int new_factor = data->correction_factor[data->bucket];
+
+ /*
+ * Only update the correction factor, don't update the repeating
+ * pattern data to avoid polluting it with the tick period
+ * length which is a design artifact here.
+ */
+ new_factor -= new_factor / DECAY;
+ /*
+ * The nohz code said that there wouldn't be any wakeups
+ * within the tick boundary (if the tick wasn't stopped), but
+ * menu_select() had a differing opinion. Yet, the CPU was
+ * woken up by the tick, so menu_select() was not quite right.
+ * Try to make it do a better job next time by bumping up the
+ * correction factor. Use 0.75 * RESOLUTION (which is easy
+ * enough to get) that should work fine on the average.
+ */
+ new_factor += RESOLUTION / 2 + RESOLUTION / 4;
+ data->correction_factor[data->bucket] = new_factor;
+ } else {
+ data->needs_update = 1;
+ }
}
/**
===================================================================
@@ -991,6 +991,20 @@ void tick_nohz_irq_exit(void)
}
/**
+ * tick_nohz_idle_got_tick - Check whether or not the tick handler has run
+ */
+bool tick_nohz_idle_got_tick(void)
+{
+ struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+ if (ts->inidle > 1) {
+ ts->inidle = 1;
+ return true;
+ }
+ return false;
+}
+
+/**
* tick_nohz_get_sleep_length - return the length of the current sleep
*
* Called from power state control code with interrupts disabled
@@ -1101,6 +1115,9 @@ static void tick_nohz_handler(struct clo
struct pt_regs *regs = get_irq_regs();
ktime_t now = ktime_get();
+ if (ts->inidle)
+ ts->inidle = 2;
+
dev->next_event = KTIME_MAX;
tick_sched_do_timer(now);
@@ -1198,6 +1215,9 @@ static enum hrtimer_restart tick_sched_t
struct pt_regs *regs = get_irq_regs();
ktime_t now = ktime_get();
+ if (ts->inidle)
+ ts->inidle = 2;
+
tick_sched_do_timer(now);
/*
===================================================================
@@ -119,6 +119,7 @@ extern void tick_nohz_idle_restart_tick(
extern void tick_nohz_idle_enter(void);
extern void tick_nohz_idle_exit(void);
extern void tick_nohz_irq_exit(void);
+extern bool tick_nohz_idle_got_tick(void);
extern ktime_t tick_nohz_get_sleep_length(void);
extern unsigned long tick_nohz_get_idle_calls(void);
extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
@@ -139,6 +140,7 @@ static inline void tick_nohz_idle_stop_t
static inline void tick_nohz_idle_restart_tick(void) { }
static inline void tick_nohz_idle_enter(void) { }
static inline void tick_nohz_idle_exit(void) { }
+static inline bool tick_nohz_idle_got_tick(void) { return false; }
static inline ktime_t tick_nohz_get_sleep_length(void)
{