@@ -40,9 +40,7 @@ static DEFINE_MUTEX(round_robin_lock);
static unsigned long power_saving_mwait_eax;
static unsigned char tsc_detected_unstable;
-static unsigned char tsc_marked_unstable;
static unsigned char lapic_detected_unstable;
-static unsigned char lapic_marked_unstable;
static void power_saving_mwait_init(void)
{
@@ -152,10 +150,9 @@ static int power_saving_thread(void *dat
unsigned int tsk_index = (unsigned long)data;
u64 last_jiffies = 0;
- sched_setscheduler(current, SCHED_RR, ¶m);
+ sched_setscheduler(current, SCHED_FIFO, ¶m);
while (!kthread_should_stop()) {
- int cpu;
u64 expire_time;
try_to_freeze();
@@ -170,41 +167,7 @@ static int power_saving_thread(void *dat
expire_time = jiffies + HZ * (100 - idle_pct) / 100;
- while (!need_resched()) {
- if (tsc_detected_unstable && !tsc_marked_unstable) {
- /* TSC could halt in idle, so notify users */
- mark_tsc_unstable("TSC halts in idle");
- tsc_marked_unstable = 1;
- }
- if (lapic_detected_unstable && !lapic_marked_unstable) {
- int i;
- /* LAPIC could halt in idle, so notify users */
- for_each_online_cpu(i)
- clockevents_notify(
- CLOCK_EVT_NOTIFY_BROADCAST_ON,
- &i);
- lapic_marked_unstable = 1;
- }
- local_irq_disable();
- cpu = smp_processor_id();
- if (lapic_marked_unstable)
- clockevents_notify(
- CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
- stop_critical_timings();
-
- mwait_idle_with_hints(power_saving_mwait_eax, 1);
-
- start_critical_timings();
- if (lapic_marked_unstable)
- clockevents_notify(
- CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
- local_irq_enable();
-
- if (jiffies > expire_time) {
- do_sleep = 1;
- break;
- }
- }
+ play_idle(expire_time);
/*
* current sched_rt has threshold for rt task running time.
@@ -256,11 +256,6 @@ static u64 pkg_state_counter(void)
return count;
}
-static void noop_timer(unsigned long foo)
-{
- /* empty... just the fact that we get the interrupt wakes us up */
-}
-
static unsigned int get_compensation(int ratio)
{
unsigned int comp = 0;
@@ -365,7 +360,6 @@ static bool powerclamp_adjust_controls(u
static int clamp_thread(void *arg)
{
int cpunr = (unsigned long)arg;
- DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0);
static const struct sched_param param = {
.sched_priority = MAX_USER_RT_PRIO/2,
};
@@ -374,11 +368,9 @@ static int clamp_thread(void *arg)
set_bit(cpunr, cpu_clamping_mask);
set_freezable();
- init_timer_on_stack(&wakeup_timer);
sched_setscheduler(current, SCHED_FIFO, ¶m);
- while (true == clamping && !kthread_should_stop() &&
- cpu_online(cpunr)) {
+ while (clamping && !kthread_should_stop() && cpu_online(cpunr)) {
int sleeptime;
unsigned long target_jiffies;
unsigned int guard;
@@ -426,35 +418,11 @@ static int clamp_thread(void *arg)
if (should_skip)
continue;
- target_jiffies = jiffies + duration_jiffies;
- mod_timer(&wakeup_timer, target_jiffies);
if (unlikely(local_softirq_pending()))
continue;
- /*
- * stop tick sched during idle time, interrupts are still
- * allowed. thus jiffies are updated properly.
- */
- preempt_disable();
- tick_nohz_idle_enter();
- /* mwait until target jiffies is reached */
- while (time_before(jiffies, target_jiffies)) {
- unsigned long ecx = 1;
- unsigned long eax = target_mwait;
-
- /*
- * REVISIT: may call enter_idle() to notify drivers who
- * can save power during cpu idle. same for exit_idle()
- */
- local_touch_nmi();
- stop_critical_timings();
- mwait_idle_with_hints(eax, ecx);
- start_critical_timings();
- atomic_inc(&idle_wakeup_counter);
- }
- tick_nohz_idle_exit();
- preempt_enable();
+
+ play_idle(duration_jiffies);
}
- del_timer_sync(&wakeup_timer);
clear_bit(cpunr, cpu_clamping_mask);
return 0;
@@ -255,6 +255,8 @@ enum cpuhp_state {
CPUHP_ONLINE,
};
+void play_idle(unsigned long jiffies);
+
void cpu_startup_entry(enum cpuhp_state state);
void cpu_idle(void);
@@ -1892,6 +1892,7 @@ extern void thread_group_cputime_adjuste
/*
* Per process flags
*/
+#define PF_IDLE 0x00000002 /* I am an IDLE thread */
#define PF_EXITING 0x00000004 /* getting shut down */
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
@@ -2204,7 +2205,7 @@ extern struct task_struct *idle_task(int
*/
static inline bool is_idle_task(const struct task_struct *p)
{
- return p->pid == 0;
+ return !!(p->flags & PF_IDLE);
}
extern struct task_struct *curr_task(int cpu);
extern void set_curr_task(int cpu, struct task_struct *p);
@@ -4537,6 +4537,7 @@ void init_idle(struct task_struct *idle,
__sched_fork(0, idle);
idle->state = TASK_RUNNING;
idle->se.exec_start = sched_clock();
+ idle->flags |= PF_IDLE;
do_set_cpus_allowed(idle, cpumask_of(cpu));
/*
@@ -184,72 +184,102 @@ static void cpuidle_idle_call(void)
*
* Called with polling cleared.
*/
-static void cpu_idle_loop(void)
+static void do_idle(void)
{
- while (1) {
- /*
- * If the arch has a polling bit, we maintain an invariant:
- *
- * Our polling bit is clear if we're not scheduled (i.e. if
- * rq->curr != rq->idle). This means that, if rq->idle has
- * the polling bit set, then setting need_resched is
- * guaranteed to cause the cpu to reschedule.
- */
+ /*
+ * If the arch has a polling bit, we maintain an invariant:
+ *
+ * Our polling bit is clear if we're not scheduled (i.e. if
+ * rq->curr != rq->idle). This means that, if rq->idle has
+ * the polling bit set, then setting need_resched is
+ * guaranteed to cause the cpu to reschedule.
+ */
- __current_set_polling();
- tick_nohz_idle_enter();
+ __current_set_polling();
+ tick_nohz_idle_enter();
- while (!need_resched()) {
- check_pgt_cache();
- rmb();
-
- if (cpu_is_offline(smp_processor_id()))
- arch_cpu_idle_dead();
-
- local_irq_disable();
- arch_cpu_idle_enter();
-
- /*
- * In poll mode we reenable interrupts and spin.
- *
- * Also if we detected in the wakeup from idle
- * path that the tick broadcast device expired
- * for us, we don't want to go deep idle as we
- * know that the IPI is going to arrive right
- * away
- */
- if (cpu_idle_force_poll || tick_check_broadcast_expired())
- cpu_idle_poll();
- else
- cpuidle_idle_call();
+ while (!need_resched()) {
+ check_pgt_cache();
+ rmb();
- arch_cpu_idle_exit();
- }
+ if (cpu_is_offline(smp_processor_id()))
+ arch_cpu_idle_dead();
- /*
- * Since we fell out of the loop above, we know
- * TIF_NEED_RESCHED must be set, propagate it into
- * PREEMPT_NEED_RESCHED.
- *
- * This is required because for polling idle loops we will
- * not have had an IPI to fold the state for us.
- */
- preempt_set_need_resched();
- tick_nohz_idle_exit();
- __current_clr_polling();
+ local_irq_disable();
+ arch_cpu_idle_enter();
/*
- * We promise to call sched_ttwu_pending and reschedule
- * if need_resched is set while polling is set. That
- * means that clearing polling needs to be visible
- * before doing these things.
+ * In poll mode we reenable interrupts and spin.
+ *
+ * Also if we detected in the wakeup from idle
+ * path that the tick broadcast device expired
+ * for us, we don't want to go deep idle as we
+ * know that the IPI is going to arrive right
+ * away
*/
- smp_mb__after_atomic();
+ if (cpu_idle_force_poll || tick_check_broadcast_expired())
+ cpu_idle_poll();
+ else
+ cpuidle_idle_call();
- sched_ttwu_pending();
- schedule_preempt_disabled();
+ arch_cpu_idle_exit();
}
+
+ /*
+ * Since we fell out of the loop above, we know
+ * TIF_NEED_RESCHED must be set, propagate it into
+ * PREEMPT_NEED_RESCHED.
+ *
+ * This is required because for polling idle loops we will
+ * not have had an IPI to fold the state for us.
+ */
+ preempt_set_need_resched();
+ tick_nohz_idle_exit();
+ __current_clr_polling();
+
+ /*
+ * We promise to call sched_ttwu_pending and reschedule
+ * if need_resched is set while polling is set. That
+ * means that clearing polling needs to be visible
+ * before doing these things.
+ */
+ smp_mb__after_atomic();
+
+ sched_ttwu_pending();
+ schedule_preempt_disabled();
+}
+
+static void play_idle_timer(unsigned long foo)
+{
+ set_tsk_need_resched(current);
+}
+
+void play_idle(unsigned long duration)
+{
+ DEFINE_TIMER(wakeup_timer, play_idle_timer, 0, 0);
+
+ /*
+ * Only FIFO tasks can disable the tick since they don't need the forced
+ * preemption.
+ */
+ WARN_ON_ONCE(current->policy != SCHED_FIFO);
+ WARN_ON_ONCE(current->nr_cpus_allowed != 1);
+ WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY));
+ WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
+ rcu_sleep_check();
+
+ init_timer_on_stack(&wakeup_timer);
+ mod_timer_pinned(&wakeup_timer, jiffies + duration);
+
+ preempt_disable();
+ current->flags |= PF_IDLE;
+ do_idle();
+ current->flags &= ~PF_IDLE;
+ del_timer_sync(&wakeup_timer);
+ preempt_fold_need_resched();
+ preempt_enable();
}
+EXPORT_SYMBOL_GPL(play_idle);
void cpu_startup_entry(enum cpuhp_state state)
{
@@ -269,5 +299,6 @@ void cpu_startup_entry(enum cpuhp_state
boot_init_stack_canary();
#endif
arch_cpu_idle_prepare();
- cpu_idle_loop();
+ while (1)
+ do_idle();
}
@@ -807,7 +807,6 @@ void tick_nohz_idle_enter(void)
local_irq_enable();
}
-EXPORT_SYMBOL_GPL(tick_nohz_idle_enter);
/**
* tick_nohz_irq_exit - update next tick event from interrupt exit
@@ -934,7 +933,6 @@ void tick_nohz_idle_exit(void)
local_irq_enable();
}
-EXPORT_SYMBOL_GPL(tick_nohz_idle_exit);
static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
{