diff mbox

sched: Support current clocksource handling in fallback sched_clock().

Message ID 20090528110902.GA27884@linux-sh.org (mailing list archive)
State Superseded
Headers show

Commit Message

Paul Mundt May 28, 2009, 11:09 a.m. UTC
On Thu, May 28, 2009 at 11:34:41AM +0200, Peter Zijlstra wrote:
> That's a single assignment, vs two reads on use. Should we be worried
> about the SMP race where we observe two different sched_clocksource
> pointers on read?
> 
> 
> I would suggest we write it as:
> 
> u64 __weak sched_clock(void)
> {
> 	struct clocksource *clock = ACCESS_ONCE(sched_clocksource);
> 
> 	return cyc2ns(clock, clocksource_read(clock));
> }

Here's an updated version, also with an added sanity check in the
unregister path..

--

 include/linux/clocksource.h |    4 +++-
 kernel/sched_clock.c        |    6 ++++--
 kernel/time/clocksource.c   |   13 +++++++++++++
 kernel/time/jiffies.c       |    2 +-
 4 files changed, 21 insertions(+), 4 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Thomas Gleixner May 28, 2009, 12:22 p.m. UTC | #1
On Thu, 28 May 2009, Paul Mundt wrote:
> @@ -437,10 +441,19 @@ void clocksource_unregister(struct clocksource *cs)
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(&clocksource_lock, flags);
> +
> +	if (sched_clocksource == cs) {
> +		printk(KERN_WARNING "Refusing to unregister "
> +		       "scheduler clocksource %s", cs->name);

  Hmm, isn't that dangerous ? The clocksource might be in a module or
  in kmalloced memory which is going to be freed.

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra May 28, 2009, 12:40 p.m. UTC | #2
On Thu, 2009-05-28 at 14:22 +0200, Thomas Gleixner wrote:
> On Thu, 28 May 2009, Paul Mundt wrote:
> > @@ -437,10 +441,19 @@ void clocksource_unregister(struct clocksource *cs)
> >  	unsigned long flags;
> >  
> >  	spin_lock_irqsave(&clocksource_lock, flags);
> > +
> > +	if (sched_clocksource == cs) {
> > +		printk(KERN_WARNING "Refusing to unregister "
> > +		       "scheduler clocksource %s", cs->name);
> 
>   Hmm, isn't that dangerous ? The clocksource might be in a module or
>   in kmalloced memory which is going to be freed.

One could play dirty tricks with 'leaking' module ref counts and such so
as to avoid that. The alternative is making sched_clock() look something
like:

u64 __weak sched_clock(void)
{
  struct clocksource *clock;
  u64 time;

  rcu_read_lock();
  clock = rcu_dereference(sched_clocksource);
  time = cyc2ns(clock, read_clocksource(clock));
  rcu_read_unlock;

  return time;
}

and make the module unload do a sync_rcu -- but this might be a little
overkill

--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c56457c..2109940 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -202,7 +202,8 @@  struct clocksource {
 #endif
 };
 
-extern struct clocksource *clock;	/* current clocksource */
+extern struct clocksource *clock;		/* current clocksource */
+extern struct clocksource *sched_clocksource;	/* sched_clock() clocksource */
 
 /*
  * Clock source flags bits::
@@ -212,6 +213,7 @@  extern struct clocksource *clock;	/* current clocksource */
 
 #define CLOCK_SOURCE_WATCHDOG			0x10
 #define CLOCK_SOURCE_VALID_FOR_HRES		0x20
+#define CLOCK_SOURCE_USE_FOR_SCHED_CLOCK	0x40
 
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index e1d16c9..b91190e 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -30,6 +30,7 @@ 
 #include <linux/percpu.h>
 #include <linux/ktime.h>
 #include <linux/sched.h>
+#include <linux/clocksource.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -38,8 +39,9 @@ 
  */
 unsigned long long __attribute__((weak)) sched_clock(void)
 {
-	return (unsigned long long)(jiffies - INITIAL_JIFFIES)
-					* (NSEC_PER_SEC / HZ);
+	struct clocksource *clock = ACCESS_ONCE(sched_clocksource);
+
+	return cyc2ns(clock, clocksource_read(clock));
 }
 
 static __read_mostly int sched_clock_running;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 80189f6..57b011a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -109,6 +109,7 @@  EXPORT_SYMBOL(timecounter_cyc2time);
 
 /* XXX - Would like a better way for initializing curr_clocksource */
 extern struct clocksource clocksource_jiffies;
+struct clocksource *sched_clocksource = &clocksource_jiffies;
 
 /*[Clocksource internal variables]---------
  * curr_clocksource:
@@ -362,6 +363,9 @@  static struct clocksource *select_clocksource(void)
 	if (next == curr_clocksource)
 		return NULL;
 
+	if (next->flags & CLOCK_SOURCE_USE_FOR_SCHED_CLOCK)
+		sched_clocksource = next;
+
 	return next;
 }
 
@@ -437,10 +441,19 @@  void clocksource_unregister(struct clocksource *cs)
 	unsigned long flags;
 
 	spin_lock_irqsave(&clocksource_lock, flags);
+
+	if (sched_clocksource == cs) {
+		printk(KERN_WARNING "Refusing to unregister "
+		       "scheduler clocksource %s", cs->name);
+		goto out;
+	}
+
 	list_del(&cs->list);
 	if (clocksource_override == cs)
 		clocksource_override = NULL;
 	next_clocksource = select_clocksource();
+
+out:
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index c3f6c30..727d881 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -52,7 +52,7 @@ 
 
 static cycle_t jiffies_read(struct clocksource *cs)
 {
-	return (cycle_t) jiffies;
+	return (cycle_t) (jiffies - INITIAL_JIFFIES);
 }
 
 struct clocksource clocksource_jiffies = {