diff mbox

[PATCHv3,1/3] sched_clock: Add support for >32 bit sched_clock

Message ID 1370476485-468-2-git-send-email-sboyd@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Stephen Boyd June 5, 2013, 11:54 p.m. UTC
The ARM architected system counter has at least 56 useable bits.
Add support for counters with more than 32 bits to the generic
sched_clock implementation so we can avoid the complexity of
dealing with wrap-around on these devices while benefiting from
the irqtime accounting and suspend/resume handling that the
generic sched_clock code already has.

All users should switch over to the 64bit read function so we can
deprecate setup_sched_clock() in favor of sched_clock_setup().

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---

I've noticed that we probably need to update the mult/shift
calculation similar to how clocksources are done. Should we
just copy/paste the maxsec calculation code here or do something
smarter?

 include/linux/sched_clock.h |  1 +
 kernel/time/sched_clock.c   | 41 +++++++++++++++++++++++++++--------------
 2 files changed, 28 insertions(+), 14 deletions(-)

Comments

John Stultz June 6, 2013, 12:38 a.m. UTC | #1
On 06/05/2013 04:54 PM, Stephen Boyd wrote:
> The ARM architected system counter has at least 56 useable bits.
> Add support for counters with more than 32 bits to the generic
> sched_clock implementation so we can avoid the complexity of
> dealing with wrap-around on these devices while benefiting from
> the irqtime accounting and suspend/resume handling that the
> generic sched_clock code already has.
>
> All users should switch over to the 64bit read function so we can
> deprecate setup_sched_clock() in favor of sched_clock_setup().

Minor nits below.

>
> Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
> ---
>
> I've noticed that we probably need to update the mult/shift
> calculation similar to how clocksources are done. Should we
> just copy/paste the maxsec calculation code here or do something
> smarter?

So, the clocksource calculation has an extra variable it has to balance, 
which is the granularity of ntp adjustments being made (since with 
higher shift values, we can make relatively smaller changes by +1 or -1 
from mult).

sched_clock doesn't have to deal with frequency adjustments, so the 
shift value just needs to be high enough to be able to accurately 
express the desired counter frequency.  Too high and you risk 
multiplication overflows if there are large gaps between updates, too 
low though and you run into possible accuracy issues (though I hope 
there isn't much that's using sched_clock for long-term timing where 
slight accuracy issues would be problematic).

So I think its ok if the sched_clock code uses its own logic for 
calculating the mult/shift pair, since the constraints are different 
then what we expect from timekeeping.


>
>   include/linux/sched_clock.h |  1 +
>   kernel/time/sched_clock.c   | 41 +++++++++++++++++++++++++++--------------
>   2 files changed, 28 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
> index fa7922c..81baaef 100644
> --- a/include/linux/sched_clock.h
> +++ b/include/linux/sched_clock.h
> @@ -15,6 +15,7 @@ static inline void sched_clock_postinit(void) { }
>   #endif
>   
>   extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
> +extern void sched_clock_setup(u64 (*read)(void), int bits, unsigned long rate);

Eww. This sort of word-swizzled function names makes patch reviewing a pain.

I know you're trying to deprecate the old function and provide a smooth 
transition, but would you also consider including follow-on 
patch/patches with this set that converts the existing setup_sched_clock 
usage (at least just the ones in drivers/clocksource?) so it doesn't 
stick around forever?

And if not, at least add a clear comment here, and maybe some build 
warnings to the old function so the driver owners know to make the 
conversion happen quickly.



>   extern unsigned long long (*sched_clock_func)(void);
>   
> diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
> index aad1ae6..3478b6d 100644
> --- a/kernel/time/sched_clock.c
> +++ b/kernel/time/sched_clock.c
> @@ -14,11 +14,12 @@
>   #include <linux/syscore_ops.h>
>   #include <linux/timer.h>
>   #include <linux/sched_clock.h>
> +#include <linux/bitops.h>
>   
>   struct clock_data {
>   	u64 epoch_ns;
> -	u32 epoch_cyc;
> -	u32 epoch_cyc_copy;
> +	u64 epoch_cyc;
> +	u64 epoch_cyc_copy;
>   	unsigned long rate;
>   	u32 mult;
>   	u32 shift;
> @@ -35,24 +36,31 @@ static struct clock_data cd = {
>   	.mult	= NSEC_PER_SEC / HZ,
>   };
>   
> -static u32 __read_mostly sched_clock_mask = 0xffffffff;
> +static u64 __read_mostly sched_clock_mask;
>   
> -static u32 notrace jiffy_sched_clock_read(void)
> +static u64 notrace jiffy_sched_clock_read(void)
>   {
> -	return (u32)(jiffies - INITIAL_JIFFIES);
> +	return (u64)(jiffies - INITIAL_JIFFIES);
>   }

Also, you might add a comment noting you register jiffies w/ 
BITS_PER_LONG, to clarify don't have to use jiffies_64 here on 32bit 
systems (despite the u64 cast)?


thanks
-john
Stephen Boyd June 6, 2013, 1:43 a.m. UTC | #2
On 06/05, John Stultz wrote:
> On 06/05/2013 04:54 PM, Stephen Boyd wrote:
> >
> >I've noticed that we probably need to update the mult/shift
> >calculation similar to how clocksources are done. Should we
> >just copy/paste the maxsec calculation code here or do something
> >smarter?
> 
> So, the clocksource calculation has an extra variable it has to
> balance, which is the granularity of ntp adjustments being made
> (since with higher shift values, we can make relatively smaller
> changes by +1 or -1 from mult).
> 
> sched_clock doesn't have to deal with frequency adjustments, so the
> shift value just needs to be high enough to be able to accurately
> express the desired counter frequency.  Too high and you risk
> multiplication overflows if there are large gaps between updates,
> too low though and you run into possible accuracy issues (though I
> hope there isn't much that's using sched_clock for long-term timing
> where slight accuracy issues would be problematic).
> 
> So I think its ok if the sched_clock code uses its own logic for
> calculating the mult/shift pair, since the constraints are different
> then what we expect from timekeeping.
> 

I was thinking perhaps we can do the (1 << bits) / rate thing but
not limit it to 600 seconds. Instead let it be as big as it
actually is? Right now it's actually better to register as a 32
bit clock because the wraparound comes out to be larger when
maxsec is 0.

> 
> >
> >  include/linux/sched_clock.h |  1 +
> >  kernel/time/sched_clock.c   | 41 +++++++++++++++++++++++++++--------------
> >  2 files changed, 28 insertions(+), 14 deletions(-)
> >
> >diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
> >index fa7922c..81baaef 100644
> >--- a/include/linux/sched_clock.h
> >+++ b/include/linux/sched_clock.h
> >@@ -15,6 +15,7 @@ static inline void sched_clock_postinit(void) { }
> >  #endif
> >  extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
> >+extern void sched_clock_setup(u64 (*read)(void), int bits, unsigned long rate);
> 
> Eww. This sort of word-swizzled function names makes patch reviewing a pain.

How about sched_clock_register() or register_sched_clock()?

> 
> I know you're trying to deprecate the old function and provide a
> smooth transition, but would you also consider including follow-on
> patch/patches with this set that converts the existing
> setup_sched_clock usage (at least just the ones in
> drivers/clocksource?) so it doesn't stick around forever?
> 
> And if not, at least add a clear comment here, and maybe some build
> warnings to the old function so the driver owners know to make the
> conversion happen quickly.

Yes I plan to send out the conversion patches and deprecate the
function if this is acceptable. Then we can remove the function
after the merge window is over and all stragglers are converted.

> 
> 
> 
> >  extern unsigned long long (*sched_clock_func)(void);
> >diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
> >index aad1ae6..3478b6d 100644
> >--- a/kernel/time/sched_clock.c
> >+++ b/kernel/time/sched_clock.c
> >@@ -35,24 +36,31 @@ static struct clock_data cd = {
> >  	.mult	= NSEC_PER_SEC / HZ,
> >  };
> >-static u32 __read_mostly sched_clock_mask = 0xffffffff;
> >+static u64 __read_mostly sched_clock_mask;
> >-static u32 notrace jiffy_sched_clock_read(void)
> >+static u64 notrace jiffy_sched_clock_read(void)
> >  {
> >-	return (u32)(jiffies - INITIAL_JIFFIES);
> >+	return (u64)(jiffies - INITIAL_JIFFIES);
> >  }
> 
> Also, you might add a comment noting you register jiffies w/
> BITS_PER_LONG, to clarify don't have to use jiffies_64 here on 32bit
> systems (despite the u64 cast)?

Sure. Perhaps it is clearer if we don't have the u64 cast here at
all?
diff mbox

Patch

diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index fa7922c..81baaef 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -15,6 +15,7 @@  static inline void sched_clock_postinit(void) { }
 #endif
 
 extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
+extern void sched_clock_setup(u64 (*read)(void), int bits, unsigned long rate);
 
 extern unsigned long long (*sched_clock_func)(void);
 
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index aad1ae6..3478b6d 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -14,11 +14,12 @@ 
 #include <linux/syscore_ops.h>
 #include <linux/timer.h>
 #include <linux/sched_clock.h>
+#include <linux/bitops.h>
 
 struct clock_data {
 	u64 epoch_ns;
-	u32 epoch_cyc;
-	u32 epoch_cyc_copy;
+	u64 epoch_cyc;
+	u64 epoch_cyc_copy;
 	unsigned long rate;
 	u32 mult;
 	u32 shift;
@@ -35,24 +36,31 @@  static struct clock_data cd = {
 	.mult	= NSEC_PER_SEC / HZ,
 };
 
-static u32 __read_mostly sched_clock_mask = 0xffffffff;
+static u64 __read_mostly sched_clock_mask;
 
-static u32 notrace jiffy_sched_clock_read(void)
+static u64 notrace jiffy_sched_clock_read(void)
 {
-	return (u32)(jiffies - INITIAL_JIFFIES);
+	return (u64)(jiffies - INITIAL_JIFFIES);
 }
 
-static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+static u32 __read_mostly (*read_sched_clock_32)(void);
+
+static u64 notrace read_sched_clock_32_wrapper(void)
+{
+	return read_sched_clock_32();
+}
+
+static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 {
 	return (cyc * mult) >> shift;
 }
 
-static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask)
+static unsigned long long notrace cyc_to_sched_clock(u64 cyc, u64 mask)
 {
 	u64 epoch_ns;
-	u32 epoch_cyc;
+	u64 epoch_cyc;
 
 	/*
 	 * Load the epoch_cyc and epoch_ns atomically.  We do this by
@@ -77,7 +85,7 @@  static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask)
 static void notrace update_sched_clock(void)
 {
 	unsigned long flags;
-	u32 cyc;
+	u64 cyc;
 	u64 ns;
 
 	cyc = read_sched_clock();
@@ -103,7 +111,7 @@  static void sched_clock_poll(unsigned long wrap_ticks)
 	update_sched_clock();
 }
 
-void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
+void __init sched_clock_setup(u64 (*read)(void), int bits, unsigned long rate)
 {
 	unsigned long r, w;
 	u64 res, wrap;
@@ -112,10 +120,9 @@  void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 	if (cd.rate > rate)
 		return;
 
-	BUG_ON(bits > 32);
 	WARN_ON(!irqs_disabled());
 	read_sched_clock = read;
-	sched_clock_mask = (1 << bits) - 1;
+	sched_clock_mask = (1ULL << bits) - 1;
 	cd.rate = rate;
 
 	/* calculate the mult/shift to convert counter ticks to ns. */
@@ -160,9 +167,15 @@  void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 	pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
+void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
+{
+	read_sched_clock_32 = read;
+	sched_clock_setup(read_sched_clock_32_wrapper, bits, rate);
+}
+
 static unsigned long long notrace sched_clock_32(void)
 {
-	u32 cyc = read_sched_clock();
+	u64 cyc = read_sched_clock();
 	return cyc_to_sched_clock(cyc, sched_clock_mask);
 }
 
@@ -183,7 +196,7 @@  void __init sched_clock_postinit(void)
 	 * make it the final one one.
 	 */
 	if (read_sched_clock == jiffy_sched_clock_read)
-		setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
+		sched_clock_setup(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
 
 	sched_clock_poll(sched_clock_timer.data);
 }