diff mbox

[PATCHv3,01/10] clocksource: add generic dummy timer driver

Message ID 1363198676-30417-2-git-send-email-sboyd@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Stephen Boyd March 13, 2013, 6:17 p.m. UTC
From: Mark Rutland <mark.rutland@arm.com>

Several architectures have a dummy timer driver tightly coupled with
their broadcast code to support machines without cpu-local timers (or
where there is a lack of driver support).

Since 12ad100046: "clockevents: Add generic timer broadcast function"
it's been possible to write broadcast-capable timer drivers decoupled
from the broadcast mechanism. We can use this functionality to implement
a generic dummy timer driver that can be shared by all architectures
with generic tick broadcast (ARCH_HAS_TICK_BROADCAST).

This patch implements a generic dummy timer using this facility.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
[sboyd: Make percpu data static and use __this_cpu_ptr()]
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clocksource/Makefile      |  1 +
 drivers/clocksource/dummy_timer.c | 67 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 drivers/clocksource/dummy_timer.c

Comments

Mark Rutland March 21, 2013, 6:09 p.m. UTC | #1
Hi Stephen,

I've just been trying to test the dummy timer, and realised it's broken, as it
registers a cpu notifier from a device_initcall (after SMP's been brought up),
and doesn't ensure all active CPUs have been set up. Evidently no-one else has
attempted to test it thus far, and I'm not able to throughly test it at the
moment.

For now I'd recommend going back to the rearranging the existing arm
implementation as you had in v1. We seem to have ironed out the kinks with
having a simultaneous arch_timer and dummy, so I don't imagine we'll have major
issues taking that route.

Thanks,
Mark.

On Wed, Mar 13, 2013 at 06:17:47PM +0000, Stephen Boyd wrote:
> From: Mark Rutland <mark.rutland@arm.com>
> 
> Several architectures have a dummy timer driver tightly coupled with
> their broadcast code to support machines without cpu-local timers (or
> where there is a lack of driver support).
> 
> Since 12ad100046: "clockevents: Add generic timer broadcast function"
> it's been possible to write broadcast-capable timer drivers decoupled
> from the broadcast mechanism. We can use this functionality to implement
> a generic dummy timer driver that can be shared by all architectures
> with generic tick broadcast (ARCH_HAS_TICK_BROADCAST).
> 
> This patch implements a generic dummy timer using this facility.
> 
> Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> Cc: John Stultz <john.stultz@linaro.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> [sboyd: Make percpu data static and use __this_cpu_ptr()]
> Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
> ---
>  drivers/clocksource/Makefile      |  1 +
>  drivers/clocksource/dummy_timer.c | 67 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 68 insertions(+)
>  create mode 100644 drivers/clocksource/dummy_timer.c
> 
> diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
> index 4d8283a..655d718 100644
> --- a/drivers/clocksource/Makefile
> +++ b/drivers/clocksource/Makefile
> @@ -22,3 +22,4 @@ obj-$(CONFIG_VT8500_TIMER)	+= vt8500_timer.o
>  
>  obj-$(CONFIG_ARM_ARCH_TIMER)		+= arm_arch_timer.o
>  obj-$(CONFIG_CLKSRC_METAG_GENERIC)	+= metag_generic.o
> +obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST)	+= dummy_timer.o
> diff --git a/drivers/clocksource/dummy_timer.c b/drivers/clocksource/dummy_timer.c
> new file mode 100644
> index 0000000..7e1ce45
> --- /dev/null
> +++ b/drivers/clocksource/dummy_timer.c
> @@ -0,0 +1,67 @@
> +/*
> + *  linux/drivers/clocksource/dummy_timer.c
> + *
> + *  Copyright (C) 2013 ARM Ltd.
> + *  All Rights Reserved
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +#include <linux/clockchips.h>
> +#include <linux/cpu.h>
> +#include <linux/init.h>
> +#include <linux/percpu.h>
> +
> +static DEFINE_PER_CPU(struct clock_event_device, dummy_timer_evt);
> +
> +static void dummy_timer_set_mode(enum clock_event_mode mode,
> +			   struct clock_event_device *evt)
> +{
> +	/*
> +	 * Core clockevents code will call this when exchanging timer devices.
> +	 * We don't need to do anything here.
> +	 */
> +}
> +
> +static void __cpuinit dummy_timer_setup(void)
> +{
> +	int cpu = smp_processor_id();
> +	struct clock_event_device *evt = __this_cpu_ptr(&dummy_timer_evt);
> +
> +	evt->name	= "dummy_timer";
> +	evt->features	= CLOCK_EVT_FEAT_PERIODIC |
> +			  CLOCK_EVT_FEAT_ONESHOT |
> +			  CLOCK_EVT_FEAT_DUMMY;
> +	evt->rating	= 100;
> +	evt->set_mode	= dummy_timer_set_mode;
> +	evt->cpumask	= cpumask_of(cpu);
> +
> +	clockevents_register_device(evt);
> +}
> +
> +static int __cpuinit dummy_timer_cpu_notify(struct notifier_block *self,
> +				      unsigned long action, void *hcpu)
> +{
> +	if ((action & ~CPU_TASKS_FROZEN) == CPU_STARTING)
> +		dummy_timer_setup();
> +
> +	return NOTIFY_OK;
> +}
> +
> +static struct notifier_block dummy_timer_cpu_nb __cpuinitdata = {
> +	.notifier_call = dummy_timer_cpu_notify,
> +};
> +
> +static int __init dummy_timer_register(void)
> +{
> +	int err = register_cpu_notifier(&dummy_timer_cpu_nb);
> +	if (err)
> +		return err;
> +
> +	/* We won't get a call on the boot CPU, so register immediately */
> +	dummy_timer_setup();
> +
> +	return 0;
> +}
> +device_initcall(dummy_timer_register);
> -- 
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> hosted by The Linux Foundation
> 
>
Stephen Boyd March 21, 2013, 6:13 p.m. UTC | #2
On 03/21/13 11:09, Mark Rutland wrote:
> Hi Stephen,
>
> I've just been trying to test the dummy timer, and realised it's broken, as it
> registers a cpu notifier from a device_initcall (after SMP's been brought up),
> and doesn't ensure all active CPUs have been set up. Evidently no-one else has
> attempted to test it thus far, and I'm not able to throughly test it at the
> moment.

Would it be sufficient to register as a pre-smp initcall?
Mark Rutland March 22, 2013, 6:03 p.m. UTC | #3
On Thu, Mar 21, 2013 at 06:13:17PM +0000, Stephen Boyd wrote:
> On 03/21/13 11:09, Mark Rutland wrote:
> > Hi Stephen,
> >
> > I've just been trying to test the dummy timer, and realised it's broken, as it
> > registers a cpu notifier from a device_initcall (after SMP's been brought up),
> > and doesn't ensure all active CPUs have been set up. Evidently no-one else has
> > attempted to test it thus far, and I'm not able to throughly test it at the
> > moment.
> 
> Would it be sufficient to register as a pre-smp initcall?

I've looked a bit further into the problem, and I believe using early_initcall
will make it work as well as the current arm-specific dummy timers work with a
rating of 100 (this means the recent patch lowering the rating broke tc2 as
explained below).

I've spent the last few hours trying to get the dummy_timer driver working on
tc2 with the sp804 as the broadcast source (with architected timer support
disabled). It turns out that having dummy timer's rating so low means that it
won't be selected as the tick device on cpu0 in preference to the sp804, and
thus won't push the sp804 out of that spot (allowing it to become the broadcast
source). This leads to boot stalling.

Jumping the dummy_timer's rating up would fix this, but that doesn't seem
great. Registering the dummy before all other clocks would also fix this (I
tried calling dummy_timer_register from time_init), but I can't see a way to do
that while keeping the driver self-contained.

Thanks,
Mark.
Stephen Boyd March 25, 2013, 4:49 p.m. UTC | #4
On 03/22/13 11:03, Mark Rutland wrote:
> On Thu, Mar 21, 2013 at 06:13:17PM +0000, Stephen Boyd wrote:
>> On 03/21/13 11:09, Mark Rutland wrote:
>>> Hi Stephen,
>>>
>>> I've just been trying to test the dummy timer, and realised it's broken, as it
>>> registers a cpu notifier from a device_initcall (after SMP's been brought up),
>>> and doesn't ensure all active CPUs have been set up. Evidently no-one else has
>>> attempted to test it thus far, and I'm not able to throughly test it at the
>>> moment.
>> Would it be sufficient to register as a pre-smp initcall?
> I've looked a bit further into the problem, and I believe using early_initcall
> will make it work as well as the current arm-specific dummy timers work with a
> rating of 100 (this means the recent patch lowering the rating broke tc2 as
> explained below).

Yes, moving to early_initcall() should make the dummy timer equivalent
to the code that is already in the arm directory. It sounds like there
is a problem with mainline though?

>
> I've spent the last few hours trying to get the dummy_timer driver working on
> tc2 with the sp804 as the broadcast source (with architected timer support
> disabled). It turns out that having dummy timer's rating so low means that it
> won't be selected as the tick device on cpu0 in preference to the sp804, and
> thus won't push the sp804 out of that spot (allowing it to become the broadcast
> source). This leads to boot stalling.

I'm not following here. Why would we want to remove sp804 from the tick
duty?

>
> Jumping the dummy_timer's rating up would fix this, but that doesn't seem
> great. Registering the dummy before all other clocks would also fix this (I
> tried calling dummy_timer_register from time_init), but I can't see a way to do
> that while keeping the driver self-contained.
Mark Rutland March 25, 2013, 6 p.m. UTC | #5
On Mon, Mar 25, 2013 at 04:49:18PM +0000, Stephen Boyd wrote:
> On 03/22/13 11:03, Mark Rutland wrote:
> > On Thu, Mar 21, 2013 at 06:13:17PM +0000, Stephen Boyd wrote:
> >> On 03/21/13 11:09, Mark Rutland wrote:
> >>> Hi Stephen,
> >>>
> >>> I've just been trying to test the dummy timer, and realised it's broken, as it
> >>> registers a cpu notifier from a device_initcall (after SMP's been brought up),
> >>> and doesn't ensure all active CPUs have been set up. Evidently no-one else has
> >>> attempted to test it thus far, and I'm not able to throughly test it at the
> >>> moment.
> >> Would it be sufficient to register as a pre-smp initcall?
> > I've looked a bit further into the problem, and I believe using early_initcall
> > will make it work as well as the current arm-specific dummy timers work with a
> > rating of 100 (this means the recent patch lowering the rating broke tc2 as
> > explained below).
> 
> Yes, moving to early_initcall() should make the dummy timer equivalent
> to the code that is already in the arm directory. It sounds like there
> is a problem with mainline though?

Sorry, I rushed writing my earlier email in an attempt to get something on the
list, I should've spent some time making it clearer.

Using early_initcall will make it equivalent to the current code.

Unfortunately the existing code's current rating of 100 (as of Santosh's patch
[1]) breaks running an SMP system without local timers (at least using the
sp804 as a broadcast source on tc2 with architected timer support disabled).

It seems my patch making the timer core always reject dummy timers as broadcast
sources hasn't gotten to mainline yet, so we can't revert the patch without
breaking the platform Santosh noticed the problem on.

Thomas, do you have an idea of if/when tip/timers/urgent will hit mainline?

> 
> >
> > I've spent the last few hours trying to get the dummy_timer driver working on
> > tc2 with the sp804 as the broadcast source (with architected timer support
> > disabled). It turns out that having dummy timer's rating so low means that it
> > won't be selected as the tick device on cpu0 in preference to the sp804, and
> > thus won't push the sp804 out of that spot (allowing it to become the broadcast
> > source). This leads to boot stalling.
> 
> I'm not following here. Why would we want to remove sp804 from the tick
> duty?

To run an SMP system without local timers, we need the sp804 to be the
broadcast timer. Unfortunately the tick device and broadcast timer are mutually
exclusive positions, so we need to have a dummy timer knock the sp804 out of
tick device duty to enable broadcast.

When the dummy timer's rating was 400 (against the sp804's 350), this worked.
The sp804 would be registered, and would become cpu0's tick device. Later the
dummy would get registered, knocking the sp804 out (whereupon it would get
cycled back through tick_check_new_device and become the broadcast timer).

With the dummy timer's rating lower, the sp804 stays as cpu0's tick device, all
other cpus get dummy timers, but there's no broadcast source, so the system
locks up waiting for secondary cpus.

Thanks,
Mark.

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2013-March/154582.html
Thomas Gleixner March 25, 2013, 8:47 p.m. UTC | #6
On Mon, 25 Mar 2013, Mark Rutland wrote:
> Thomas, do you have an idea of if/when tip/timers/urgent will hit mainline?

Thanks for the reminder. I just sent a pull request Linuswards.

Thanks,

	tglx
Stephen Boyd March 26, 2013, 2:14 a.m. UTC | #7
On 03/25/13 11:00, Mark Rutland wrote:
>
>>> I've spent the last few hours trying to get the dummy_timer driver working on
>>> tc2 with the sp804 as the broadcast source (with architected timer support
>>> disabled). It turns out that having dummy timer's rating so low means that it
>>> won't be selected as the tick device on cpu0 in preference to the sp804, and
>>> thus won't push the sp804 out of that spot (allowing it to become the broadcast
>>> source). This leads to boot stalling.
>> I'm not following here. Why would we want to remove sp804 from the tick
>> duty?
> To run an SMP system without local timers, we need the sp804 to be the
> broadcast timer. Unfortunately the tick device and broadcast timer are mutually
> exclusive positions, so we need to have a dummy timer knock the sp804 out of
> tick device duty to enable broadcast.
>
> When the dummy timer's rating was 400 (against the sp804's 350), this worked.
> The sp804 would be registered, and would become cpu0's tick device. Later the
> dummy would get registered, knocking the sp804 out (whereupon it would get
> cycled back through tick_check_new_device and become the broadcast timer).
>
> With the dummy timer's rating lower, the sp804 stays as cpu0's tick device, all
> other cpus get dummy timers, but there's no broadcast source, so the system
> locks up waiting for secondary cpus.

Ok. Thanks for clearing up my confusion.

Like you say, increasing the dummy timer rating seems like a hack. But
it also sounds like you want to keep the dummy timer driver fully self
contained. I'm not opposed to calling dummy_timer_register() at the
bottom of tick_init() if we have to, but it sounds like you don't like that.

An alternative would be to push the dummy timer logic into the core
clockevents layer under the ifdef for arch has broadcast. This is
probably the correct approach because most devices don't want to have a
dummy timer sitting around unused. I might be able to take a look at
this tomorrow.

One final question, if you remove all other CPUs besides the CPU that is
servicing the sp804 interrupt do we end up in a situation where the
sp804 is broadcasting to the dummy tick device? I haven't read through
all the code yet for that one. I would think tick_switch_to_oneshot()
would complain on your device?
Mark Rutland March 26, 2013, 11:28 a.m. UTC | #8
On Tue, Mar 26, 2013 at 02:14:53AM +0000, Stephen Boyd wrote:
> On 03/25/13 11:00, Mark Rutland wrote:
> >
> >>> I've spent the last few hours trying to get the dummy_timer driver working on
> >>> tc2 with the sp804 as the broadcast source (with architected timer support
> >>> disabled). It turns out that having dummy timer's rating so low means that it
> >>> won't be selected as the tick device on cpu0 in preference to the sp804, and
> >>> thus won't push the sp804 out of that spot (allowing it to become the broadcast
> >>> source). This leads to boot stalling.
> >> I'm not following here. Why would we want to remove sp804 from the tick
> >> duty?
> > To run an SMP system without local timers, we need the sp804 to be the
> > broadcast timer. Unfortunately the tick device and broadcast timer are mutually
> > exclusive positions, so we need to have a dummy timer knock the sp804 out of
> > tick device duty to enable broadcast.
> >
> > When the dummy timer's rating was 400 (against the sp804's 350), this worked.
> > The sp804 would be registered, and would become cpu0's tick device. Later the
> > dummy would get registered, knocking the sp804 out (whereupon it would get
> > cycled back through tick_check_new_device and become the broadcast timer).
> >
> > With the dummy timer's rating lower, the sp804 stays as cpu0's tick device, all
> > other cpus get dummy timers, but there's no broadcast source, so the system
> > locks up waiting for secondary cpus.
> 
> Ok. Thanks for clearing up my confusion.
> 
> Like you say, increasing the dummy timer rating seems like a hack. But
> it also sounds like you want to keep the dummy timer driver fully self
> contained. I'm not opposed to calling dummy_timer_register() at the
> bottom of tick_init() if we have to, but it sounds like you don't like that.

I'd like to keep the dummy timer driver self-contained if we can, but if it
makes it more robust and/or easier to deal with by having an external call to
register the driver, then I'm not opposed to that.

> 
> An alternative would be to push the dummy timer logic into the core
> clockevents layer under the ifdef for arch has broadcast. This is
> probably the correct approach because most devices don't want to have a
> dummy timer sitting around unused. I might be able to take a look at
> this tomorrow.

I'm also not opposed to this idea.

> 
> One final question, if you remove all other CPUs besides the CPU that is
> servicing the sp804 interrupt do we end up in a situation where the
> sp804 is broadcasting to the dummy tick device? I haven't read through
> all the code yet for that one. I would think tick_switch_to_oneshot()
> would complain on your device?

The current code in arch/arm/kernel/smp.c only registers the local timer on
cpu0 if we have more than one CPU, so the sp804 stays as cpu0's tick device.

With the generic dummy timer (with a rating bumped up to 400, using an
early_initcall, and the dummy timers rejected as broadcast sources [1]), even
when booting only with one cpu described in the dt the sp804 is relegated to
broadcast timer duty, broadcasting to the dummy timer on cpu0. Echoing 'q' to
/proc/sysrq-trigger gives me:

Tick Device: mode:     0
Broadcast device
Clock Event Device: v2m-timer0
 max_delta_ns:   4294966591000
 min_delta_ns:   14999
 mult:           2147484
 shift:          31
 mode:           2
 next_event:     9223372036854775807 nsecs
 set_next_event: sp804_set_next_event
 set_mode:       sp804_set_mode
 event_handler:  tick_handle_periodic_broadcast
 retries:        0
tick_broadcast_mask: 00000001
tick_broadcast_oneshot_mask: 00000000


Tick Device: mode:     0
Per CPU device: 0
Clock Event Device: dummy_timer
 max_delta_ns:   0
 min_delta_ns:   0
 mult:           0
 shift:          0
 mode:           1
 next_event:     9223372036854775807 nsecs
 set_next_event: <00000000>
 set_mode:       dummy_timer_set_mode
 event_handler:  tick_handle_periodic
 retries:        0

Though "broadcasting" to the same cpu is special-cased in tick_do_broadcast,
which will call the tick device's event_handler directly rather than having the
cpu attempt to IPI to itself.

As you suggest, tick_switch_to_oneshot does complain:

Clockevents: could not switch to one-shot mode: dummy_timer is not functional.
Could not switch to high resolution mode on CPU 0

To handle this case we could check cpu_possible_mask when initialising the
dummy and only register it if more than 1 cpu is possible. That would be
roughly in line with how we handle this case in smp.c.

Thanks,
Mark.

[1] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=a7dc19b8652c862d5b7c4d2339bd3c428bd29c4a
Mark Rutland March 26, 2013, 3:26 p.m. UTC | #9
On Mon, Mar 25, 2013 at 08:47:42PM +0000, Thomas Gleixner wrote:
> On Mon, 25 Mar 2013, Mark Rutland wrote:
> > Thomas, do you have an idea of if/when tip/timers/urgent will hit mainline?
> 
> Thanks for the reminder. I just sent a pull request Linuswards.

Cheers!

Mark.
diff mbox

Patch

diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 4d8283a..655d718 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -22,3 +22,4 @@  obj-$(CONFIG_VT8500_TIMER)	+= vt8500_timer.o
 
 obj-$(CONFIG_ARM_ARCH_TIMER)		+= arm_arch_timer.o
 obj-$(CONFIG_CLKSRC_METAG_GENERIC)	+= metag_generic.o
+obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST)	+= dummy_timer.o
diff --git a/drivers/clocksource/dummy_timer.c b/drivers/clocksource/dummy_timer.c
new file mode 100644
index 0000000..7e1ce45
--- /dev/null
+++ b/drivers/clocksource/dummy_timer.c
@@ -0,0 +1,67 @@ 
+/*
+ *  linux/drivers/clocksource/dummy_timer.c
+ *
+ *  Copyright (C) 2013 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clockchips.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+
+static DEFINE_PER_CPU(struct clock_event_device, dummy_timer_evt);
+
+static void dummy_timer_set_mode(enum clock_event_mode mode,
+			   struct clock_event_device *evt)
+{
+	/*
+	 * Core clockevents code will call this when exchanging timer devices.
+	 * We don't need to do anything here.
+	 */
+}
+
+static void __cpuinit dummy_timer_setup(void)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *evt = __this_cpu_ptr(&dummy_timer_evt);
+
+	evt->name	= "dummy_timer";
+	evt->features	= CLOCK_EVT_FEAT_PERIODIC |
+			  CLOCK_EVT_FEAT_ONESHOT |
+			  CLOCK_EVT_FEAT_DUMMY;
+	evt->rating	= 100;
+	evt->set_mode	= dummy_timer_set_mode;
+	evt->cpumask	= cpumask_of(cpu);
+
+	clockevents_register_device(evt);
+}
+
+static int __cpuinit dummy_timer_cpu_notify(struct notifier_block *self,
+				      unsigned long action, void *hcpu)
+{
+	if ((action & ~CPU_TASKS_FROZEN) == CPU_STARTING)
+		dummy_timer_setup();
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block dummy_timer_cpu_nb __cpuinitdata = {
+	.notifier_call = dummy_timer_cpu_notify,
+};
+
+static int __init dummy_timer_register(void)
+{
+	int err = register_cpu_notifier(&dummy_timer_cpu_nb);
+	if (err)
+		return err;
+
+	/* We won't get a call on the boot CPU, so register immediately */
+	dummy_timer_setup();
+
+	return 0;
+}
+device_initcall(dummy_timer_register);