From patchwork Tue Mar 30 22:55:05 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Greg Kroah-Hartman X-Patchwork-Id: 89734 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o2V0EenQ005705 for ; Wed, 31 Mar 2010 00:14:40 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932161Ab0CaAOL (ORCPT ); Tue, 30 Mar 2010 20:14:11 -0400 Received: from kroah.org ([198.145.64.141]:51861 "EHLO coco.kroah.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932135Ab0C3XRo (ORCPT ); Tue, 30 Mar 2010 19:17:44 -0400 Received: from localhost (c-67-168-196-28.hsd1.or.comcast.net [67.168.196.28]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by coco.kroah.org (Postfix) with ESMTPSA id C32AB48DCE; Tue, 30 Mar 2010 16:17:43 -0700 (PDT) X-Mailbox-Line: From linux@linux.site Tue Mar 30 15:56:15 2010 Message-Id: <20100330225615.121275326@linux.site> User-Agent: quilt/0.47-14.9 Date: Tue, 30 Mar 2010 15:55:05 -0700 From: Greg KH To: linux-kernel@vger.kernel.org, stable@kernel.org Cc: stable-review@kernel.org, torvalds@linux-foundation.org, akpm@linux-foundation.org, alan@lxorguk.ukuu.org.uk, Thomas Gleixner , kvm@vger.kernel.org, Jeremy Fitzhardinge , Greg Kroah-Hartman Subject: [027/116] hrtimer: Tune hrtimer_interrupt hang logic In-Reply-To: <20100330230600.GA28802@kroah.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Wed, 31 Mar 2010 00:14:40 +0000 (UTC) --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -162,10 +162,11 @@ struct hrtimer_clock_base { * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode - * @check_clocks: Indictator, when set evaluate time source and clock - * event devices whether high resolution mode can be - * activated. - * @nr_events: Total number of timer interrupt events + * @hang_detected: The last hrtimer interrupt detected a hang + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt */ struct hrtimer_cpu_base { spinlock_t lock; @@ -173,7 +174,11 @@ struct hrtimer_cpu_base { #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; + int hang_detected; unsigned long nr_events; + unsigned long nr_retries; + unsigned long nr_hangs; + ktime_t max_hang_time; #endif }; --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -557,7 +557,7 @@ hrtimer_force_reprogram(struct hrtimer_c static int hrtimer_reprogram(struct hrtimer *timer, struct hrtimer_clock_base *base) { - ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); int res; @@ -582,7 +582,16 @@ static int hrtimer_reprogram(struct hrti if (expires.tv64 < 0) return -ETIME; - if (expires.tv64 >= expires_next->tv64) + if (expires.tv64 >= cpu_base->expires_next.tv64) + return 0; + + /* + * If a hang was detected in the last timer interrupt then we + * do not schedule a timer which is earlier than the expiry + * which we enforced in the hang detection. We want the system + * to make progress. + */ + if (cpu_base->hang_detected) return 0; /* @@ -590,7 +599,7 @@ static int hrtimer_reprogram(struct hrti */ res = tick_program_event(expires, 0); if (!IS_ERR_VALUE(res)) - *expires_next = expires; + cpu_base->expires_next = expires; return res; } @@ -1217,29 +1226,6 @@ static void __run_hrtimer(struct hrtimer #ifdef CONFIG_HIGH_RES_TIMERS -static int force_clock_reprogram; - -/* - * After 5 iteration's attempts, we consider that hrtimer_interrupt() - * is hanging, which could happen with something that slows the interrupt - * such as the tracing. Then we force the clock reprogramming for each future - * hrtimer interrupts to avoid infinite loops and use the min_delta_ns - * threshold that we will overwrite. - * The next tick event will be scheduled to 3 times we currently spend on - * hrtimer_interrupt(). This gives a good compromise, the cpus will spend - * 1/4 of their time to process the hrtimer interrupts. This is enough to - * let it running without serious starvation. - */ - -static inline void -hrtimer_interrupt_hanging(struct clock_event_device *dev, - ktime_t try_time) -{ - force_clock_reprogram = 1; - dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; - printk(KERN_WARNING "hrtimer: interrupt too slow, " - "forcing clock min delta to %lu ns\n", dev->min_delta_ns); -} /* * High resolution timer interrupt * Called with interrupts disabled @@ -1248,21 +1234,15 @@ void hrtimer_interrupt(struct clock_even { struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_clock_base *base; - ktime_t expires_next, now; - int nr_retries = 0; - int i; + ktime_t expires_next, now, entry_time, delta; + int i, retries = 0; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; dev->next_event.tv64 = KTIME_MAX; - retry: - /* 5 retries is enough to notice a hang */ - if (!(++nr_retries % 5)) - hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now)); - - now = ktime_get(); - + entry_time = now = ktime_get(); +retry: expires_next.tv64 = KTIME_MAX; spin_lock(&cpu_base->lock); @@ -1324,10 +1304,48 @@ void hrtimer_interrupt(struct clock_even spin_unlock(&cpu_base->lock); /* Reprogramming necessary ? */ - if (expires_next.tv64 != KTIME_MAX) { - if (tick_program_event(expires_next, force_clock_reprogram)) - goto retry; + if (expires_next.tv64 == KTIME_MAX || + !tick_program_event(expires_next, 0)) { + cpu_base->hang_detected = 0; + return; } + + /* + * The next timer was already expired due to: + * - tracing + * - long lasting callbacks + * - being scheduled away when running in a VM + * + * We need to prevent that we loop forever in the hrtimer + * interrupt routine. We give it 3 attempts to avoid + * overreacting on some spurious event. + */ + now = ktime_get(); + cpu_base->nr_retries++; + if (++retries < 3) + goto retry; + /* + * Give the system a chance to do something else than looping + * here. We stored the entry time, so we know exactly how long + * we spent here. We schedule the next event this amount of + * time away. + */ + cpu_base->nr_hangs++; + cpu_base->hang_detected = 1; + delta = ktime_sub(now, entry_time); + if (delta.tv64 > cpu_base->max_hang_time.tv64) + cpu_base->max_hang_time = delta; + /* + * Limit it to a sensible value as we enforce a longer + * delay. Give the CPU at least 100ms to catch up. + */ + if (delta.tv64 > 100 * NSEC_PER_MSEC) + expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); + else + expires_next = ktime_add(now, delta); + tick_program_event(expires_next, 1); + printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n", + ktime_to_ns(delta)); } /* --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -150,6 +150,9 @@ static void print_cpu(struct seq_file *m P_ns(expires_next); P(hres_active); P(nr_events); + P(nr_retries); + P(nr_hangs); + P_ns(max_hang_time); #endif #undef P #undef P_ns @@ -252,7 +255,7 @@ static int timer_list_show(struct seq_fi u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Timer List Version: v0.4\n"); + SEQ_printf(m, "Timer List Version: v0.5\n"); SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);