diff mbox

[04/45] CPU hotplug: Add infrastructure to check lacking hotplug synchronization

Message ID 20130623133841.19094.69631.stgit@srivatsabhat.in.ibm.com (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Srivatsa S. Bhat June 23, 2013, 1:38 p.m. UTC
Add a debugging infrastructure to warn if an atomic hotplug reader has not
invoked get_online_cpus_atomic() before traversing/accessing the
cpu_online_mask. Encapsulate these checks under a new debug config option
DEBUG_HOTPLUG_CPU.

This debugging infrastructure proves useful in the tree-wide conversion
of atomic hotplug readers from preempt_disable() to the new APIs, and
help us catch the places we missed, much before we actually get rid of
stop_machine(). We can perhaps remove the debugging checks later on.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Alex Shi <alex.shi@intel.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 include/linux/cpumask.h |   12 ++++++++
 kernel/cpu.c            |   75 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)


--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Steven Rostedt June 24, 2013, 11:26 p.m. UTC | #1
On Sun, 2013-06-23 at 19:08 +0530, Srivatsa S. Bhat wrote:


Just to make the code a little cleaner, can you add:

> diff --git a/kernel/cpu.c b/kernel/cpu.c
> index 860f51a..e90d9d7 100644
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -63,6 +63,72 @@ static struct {
>  	.refcount = 0,
>  };
>  
> +#ifdef CONFIG_DEBUG_HOTPLUG_CPU
> +
> +static DEFINE_PER_CPU(unsigned long, atomic_reader_refcnt);
> +
> +static int current_is_hotplug_safe(const struct cpumask *mask)
> +{
> +
> +	/* If we are not dealing with cpu_online_mask, don't complain. */
> +	if (mask != cpu_online_mask)
> +		return 1;
> +
> +	/* If this is the task doing hotplug, don't complain. */
> +	if (unlikely(current == cpu_hotplug.active_writer))
> +		return 1;
> +
> +	/* If we are in early boot, don't complain. */
> +	if (system_state != SYSTEM_RUNNING)
> +		return 1;
> +
> +	/*
> +	 * Check if the current task is in atomic context and it has
> +	 * invoked get_online_cpus_atomic() to synchronize with
> +	 * CPU Hotplug.
> +	 */
> +	if (preempt_count() || irqs_disabled())
> +		return this_cpu_read(atomic_reader_refcnt);
> +	else
> +		return 1; /* No checks for non-atomic contexts for now */
> +}
> +
> +static inline void warn_hotplug_unsafe(void)
> +{
> +	WARN_ONCE(1, "Must use get/put_online_cpus_atomic() to synchronize"
> +		     " with CPU hotplug\n");
> +}
> +
> +/*
> + * Check if the task (executing in atomic context) has the required protection
> + * against CPU hotplug, while accessing the specified cpumask.
> + */
> +void check_hotplug_safe_cpumask(const struct cpumask *mask)
> +{
> +	if (!current_is_hotplug_safe(mask))
> +		warn_hotplug_unsafe();
> +}
> +EXPORT_SYMBOL_GPL(check_hotplug_safe_cpumask);
> +
> +/*
> + * Similar to check_hotplug_safe_cpumask(), except that we don't complain
> + * if the task (executing in atomic context) is testing whether the CPU it
> + * is executing on is online or not.
> + *
> + * (A task executing with preemption disabled on a CPU, automatically prevents
> + *  offlining that CPU, irrespective of the actual implementation of CPU
> + *  offline. So we don't enforce holding of get_online_cpus_atomic() for that
> + *  case).
> + */
> +void check_hotplug_safe_cpu(unsigned int cpu, const struct cpumask *mask)
> +{
> +	if(!current_is_hotplug_safe(mask) && cpu != smp_processor_id())
> +		warn_hotplug_unsafe();
> +}
> +EXPORT_SYMBOL_GPL(check_hotplug_safe_cpu);
> +

static inline void atomic_reader_refcnt_inc(void)
{
	this_cpu_inc(atomic_reader_refcnt);
}
static inline void atomic_reader_refcnt_dec(void)
{
	this_cpu_dec(atomic_reader_refcnt);
}

#else
static inline void atomic_reader_refcnt_inc(void)
{
}
static inline void atomic_reader_refcnt_dec(void)
{
}
#endif

> +#endif
> +
>  void get_online_cpus(void)
>  {
>  	might_sleep();
> @@ -189,13 +255,22 @@ unsigned int get_online_cpus_atomic(void)
>  	 * from going offline.
>  	 */
>  	preempt_disable();
> +
> +#ifdef CONFIG_DEBUG_HOTPLUG_CPU
> +	this_cpu_inc(atomic_reader_refcnt);
> +#endif

Replace the #ifdef with just:

	atomic_reader_refcnt_inc();

>  	return smp_processor_id();
>  }
>  EXPORT_SYMBOL_GPL(get_online_cpus_atomic);
>  
>  void put_online_cpus_atomic(void)
>  {
> +
> +#ifdef CONFIG_DEBUG_HOTPLUG_CPU
> +	this_cpu_dec(atomic_reader_refcnt);
> +#endif

And

	atomic_reader_refcnt_dec();

-- Steve

>  	preempt_enable();
> +
>  }
>  EXPORT_SYMBOL_GPL(put_online_cpus_atomic);
>  


--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Srivatsa S. Bhat June 25, 2013, 6:49 p.m. UTC | #2
On 06/25/2013 04:56 AM, Steven Rostedt wrote:
> On Sun, 2013-06-23 at 19:08 +0530, Srivatsa S. Bhat wrote:
> 
> 
> Just to make the code a little cleaner, can you add:
> 
>> diff --git a/kernel/cpu.c b/kernel/cpu.c
>> index 860f51a..e90d9d7 100644
>> --- a/kernel/cpu.c
>> +++ b/kernel/cpu.c
>> @@ -63,6 +63,72 @@ static struct {
>>  	.refcount = 0,
>>  };
>>  
>> +#ifdef CONFIG_DEBUG_HOTPLUG_CPU
>> +
[..]
> 
> static inline void atomic_reader_refcnt_inc(void)
> {
> 	this_cpu_inc(atomic_reader_refcnt);
> }
> static inline void atomic_reader_refcnt_dec(void)
> {
> 	this_cpu_dec(atomic_reader_refcnt);
> }
> 
> #else
> static inline void atomic_reader_refcnt_inc(void)
> {
> }
> static inline void atomic_reader_refcnt_dec(void)
> {
> }
> #endif
> 
>> +#endif
>> +
>>  void get_online_cpus(void)
>>  {
>>  	might_sleep();
>> @@ -189,13 +255,22 @@ unsigned int get_online_cpus_atomic(void)
>>  	 * from going offline.
>>  	 */
>>  	preempt_disable();
>> +
>> +#ifdef CONFIG_DEBUG_HOTPLUG_CPU
>> +	this_cpu_inc(atomic_reader_refcnt);
>> +#endif
> 
> Replace the #ifdef with just:
> 
> 	atomic_reader_refcnt_inc();
> 
>>  	return smp_processor_id();
>>  }
>>  EXPORT_SYMBOL_GPL(get_online_cpus_atomic);
>>  
>>  void put_online_cpus_atomic(void)
>>  {
>> +
>> +#ifdef CONFIG_DEBUG_HOTPLUG_CPU
>> +	this_cpu_dec(atomic_reader_refcnt);
>> +#endif
> 
> And
> 
> 	atomic_reader_refcnt_dec();
> 

This makes the code look much better. Thank you!
I'll make that change in my v2.

Regards,
Srivatsa S. Bhat

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d08e4d2..9197ca4 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -101,6 +101,18 @@  extern const struct cpumask *const cpu_active_mask;
 #define cpu_active(cpu)		((cpu) == 0)
 #endif
 
+#ifdef CONFIG_DEBUG_HOTPLUG_CPU
+extern void check_hotplug_safe_cpumask(const struct cpumask *mask);
+extern void check_hotplug_safe_cpu(unsigned int cpu,
+				   const struct cpumask *mask);
+#else
+static inline void check_hotplug_safe_cpumask(const struct cpumask *mask) { }
+static inline void check_hotplug_safe_cpu(unsigned int cpu,
+					  const struct cpumask *mask)
+{
+}
+#endif
+
 /* verify cpu argument to cpumask_* operators */
 static inline unsigned int cpumask_check(unsigned int cpu)
 {
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 860f51a..e90d9d7 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -63,6 +63,72 @@  static struct {
 	.refcount = 0,
 };
 
+#ifdef CONFIG_DEBUG_HOTPLUG_CPU
+
+static DEFINE_PER_CPU(unsigned long, atomic_reader_refcnt);
+
+static int current_is_hotplug_safe(const struct cpumask *mask)
+{
+
+	/* If we are not dealing with cpu_online_mask, don't complain. */
+	if (mask != cpu_online_mask)
+		return 1;
+
+	/* If this is the task doing hotplug, don't complain. */
+	if (unlikely(current == cpu_hotplug.active_writer))
+		return 1;
+
+	/* If we are in early boot, don't complain. */
+	if (system_state != SYSTEM_RUNNING)
+		return 1;
+
+	/*
+	 * Check if the current task is in atomic context and it has
+	 * invoked get_online_cpus_atomic() to synchronize with
+	 * CPU Hotplug.
+	 */
+	if (preempt_count() || irqs_disabled())
+		return this_cpu_read(atomic_reader_refcnt);
+	else
+		return 1; /* No checks for non-atomic contexts for now */
+}
+
+static inline void warn_hotplug_unsafe(void)
+{
+	WARN_ONCE(1, "Must use get/put_online_cpus_atomic() to synchronize"
+		     " with CPU hotplug\n");
+}
+
+/*
+ * Check if the task (executing in atomic context) has the required protection
+ * against CPU hotplug, while accessing the specified cpumask.
+ */
+void check_hotplug_safe_cpumask(const struct cpumask *mask)
+{
+	if (!current_is_hotplug_safe(mask))
+		warn_hotplug_unsafe();
+}
+EXPORT_SYMBOL_GPL(check_hotplug_safe_cpumask);
+
+/*
+ * Similar to check_hotplug_safe_cpumask(), except that we don't complain
+ * if the task (executing in atomic context) is testing whether the CPU it
+ * is executing on is online or not.
+ *
+ * (A task executing with preemption disabled on a CPU, automatically prevents
+ *  offlining that CPU, irrespective of the actual implementation of CPU
+ *  offline. So we don't enforce holding of get_online_cpus_atomic() for that
+ *  case).
+ */
+void check_hotplug_safe_cpu(unsigned int cpu, const struct cpumask *mask)
+{
+	if(!current_is_hotplug_safe(mask) && cpu != smp_processor_id())
+		warn_hotplug_unsafe();
+}
+EXPORT_SYMBOL_GPL(check_hotplug_safe_cpu);
+
+#endif
+
 void get_online_cpus(void)
 {
 	might_sleep();
@@ -189,13 +255,22 @@  unsigned int get_online_cpus_atomic(void)
 	 * from going offline.
 	 */
 	preempt_disable();
+
+#ifdef CONFIG_DEBUG_HOTPLUG_CPU
+	this_cpu_inc(atomic_reader_refcnt);
+#endif
 	return smp_processor_id();
 }
 EXPORT_SYMBOL_GPL(get_online_cpus_atomic);
 
 void put_online_cpus_atomic(void)
 {
+
+#ifdef CONFIG_DEBUG_HOTPLUG_CPU
+	this_cpu_dec(atomic_reader_refcnt);
+#endif
 	preempt_enable();
+
 }
 EXPORT_SYMBOL_GPL(put_online_cpus_atomic);