diff mbox series

[2/3] rcu: Stop stall warning from dumping stacks if grace period ends

Message ID 20241009180509.778133-2-paulmck@kernel.org (mailing list archive)
State Superseded
Commit 5aa92eac422f9c1f14662949eb2ba1ce4d7c45d3
Headers show
Series RCU CPU stall-warning changes for v6.13 | expand

Commit Message

Paul E. McKenney Oct. 9, 2024, 6:05 p.m. UTC
Currently, once an RCU CPU stall warning decides to dump the stalling
CPUs' stacks, the rcu_dump_cpu_stacks() function persists until it
has gone through the full list.  Unfortunately, if the stalled grace
periods ends midway through, this function will be dumping stacks of
innocent-bystander CPUs that happen to be blocking not the old grace
period, but instead the new one.  This can cause serious confusion.

This commit therefore stops dumping stacks if and when the stalled grace
period ends.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree_stall.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

Comments

Joel Fernandes Oct. 15, 2024, 6:48 p.m. UTC | #1
On Wed, Oct 09, 2024 at 11:05:08AM -0700, Paul E. McKenney wrote:
> Currently, once an RCU CPU stall warning decides to dump the stalling
> CPUs' stacks, the rcu_dump_cpu_stacks() function persists until it
> has gone through the full list.  Unfortunately, if the stalled grace
> periods ends midway through, this function will be dumping stacks of
> innocent-bystander CPUs that happen to be blocking not the old grace
> period, but instead the new one.  This can cause serious confusion.
> 
> This commit therefore stops dumping stacks if and when the stalled grace
> period ends.
> 
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> ---
>  kernel/rcu/tree_stall.h | 17 +++++++++++------
>  1 file changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
> index d7cdd535e50b1..9d79133377ff6 100644
> --- a/kernel/rcu/tree_stall.h
> +++ b/kernel/rcu/tree_stall.h
> @@ -335,13 +335,17 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
>   * that don't support NMI-based stack dumps.  The NMI-triggered stack
>   * traces are more accurate because they are printed by the target CPU.
>   */
> -static void rcu_dump_cpu_stacks(void)
> +static void rcu_dump_cpu_stacks(unsigned long gp_seq)
>  {
>  	int cpu;
>  	unsigned long flags;
>  	struct rcu_node *rnp;
>  
>  	rcu_for_each_leaf_node(rnp) {
> +		if (gp_seq != rcu_state.gp_seq) {
> +			pr_err("INFO: Stall ended during stack backtracing.\n");
> +			return;
> +		}

small nit, this also needs data_race() like you did in next patch? Although
you did delete this code in the next patch.

thanks,

 - Joel


>  		printk_deferred_enter();
>  		raw_spin_lock_irqsave_rcu_node(rnp, flags);
>  		for_each_leaf_node_possible_cpu(rnp, cpu)
> @@ -608,7 +612,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
>  	       (long)rcu_seq_current(&rcu_state.gp_seq), totqlen,
>  	       data_race(rcu_state.n_online_cpus)); // Diagnostic read
>  	if (ndetected) {
> -		rcu_dump_cpu_stacks();
> +		rcu_dump_cpu_stacks(gp_seq);
>  
>  		/* Complain about tasks blocking the grace period. */
>  		rcu_for_each_leaf_node(rnp)
> @@ -640,7 +644,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
>  	rcu_force_quiescent_state();  /* Kick them all. */
>  }
>  
> -static void print_cpu_stall(unsigned long gps)
> +static void print_cpu_stall(unsigned long gp_seq, unsigned long gps)
>  {
>  	int cpu;
>  	unsigned long flags;
> @@ -677,7 +681,7 @@ static void print_cpu_stall(unsigned long gps)
>  	rcu_check_gp_kthread_expired_fqs_timer();
>  	rcu_check_gp_kthread_starvation();
>  
> -	rcu_dump_cpu_stacks();
> +	rcu_dump_cpu_stacks(gp_seq);
>  
>  	raw_spin_lock_irqsave_rcu_node(rnp, flags);
>  	/* Rewrite if needed in case of slow consoles. */
> @@ -759,7 +763,8 @@ static void check_cpu_stall(struct rcu_data *rdp)
>  	gs2 = READ_ONCE(rcu_state.gp_seq);
>  	if (gs1 != gs2 ||
>  	    ULONG_CMP_LT(j, js) ||
> -	    ULONG_CMP_GE(gps, js))
> +	    ULONG_CMP_GE(gps, js) ||
> +	    !rcu_seq_state(gs2))
>  		return; /* No stall or GP completed since entering function. */
>  	rnp = rdp->mynode;
>  	jn = jiffies + ULONG_MAX / 2;
> @@ -780,7 +785,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
>  			pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
>  		} else if (self_detected) {
>  			/* We haven't checked in, so go dump stack. */
> -			print_cpu_stall(gps);
> +			print_cpu_stall(gs2, gps);
>  		} else {
>  			/* They had a few time units to dump stack, so complain. */
>  			print_other_cpu_stall(gs2, gps);
> -- 
> 2.40.1
> 
>
diff mbox series

Patch

diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index d7cdd535e50b1..9d79133377ff6 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -335,13 +335,17 @@  static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
  * that don't support NMI-based stack dumps.  The NMI-triggered stack
  * traces are more accurate because they are printed by the target CPU.
  */
-static void rcu_dump_cpu_stacks(void)
+static void rcu_dump_cpu_stacks(unsigned long gp_seq)
 {
 	int cpu;
 	unsigned long flags;
 	struct rcu_node *rnp;
 
 	rcu_for_each_leaf_node(rnp) {
+		if (gp_seq != rcu_state.gp_seq) {
+			pr_err("INFO: Stall ended during stack backtracing.\n");
+			return;
+		}
 		printk_deferred_enter();
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		for_each_leaf_node_possible_cpu(rnp, cpu)
@@ -608,7 +612,7 @@  static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
 	       (long)rcu_seq_current(&rcu_state.gp_seq), totqlen,
 	       data_race(rcu_state.n_online_cpus)); // Diagnostic read
 	if (ndetected) {
-		rcu_dump_cpu_stacks();
+		rcu_dump_cpu_stacks(gp_seq);
 
 		/* Complain about tasks blocking the grace period. */
 		rcu_for_each_leaf_node(rnp)
@@ -640,7 +644,7 @@  static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
 	rcu_force_quiescent_state();  /* Kick them all. */
 }
 
-static void print_cpu_stall(unsigned long gps)
+static void print_cpu_stall(unsigned long gp_seq, unsigned long gps)
 {
 	int cpu;
 	unsigned long flags;
@@ -677,7 +681,7 @@  static void print_cpu_stall(unsigned long gps)
 	rcu_check_gp_kthread_expired_fqs_timer();
 	rcu_check_gp_kthread_starvation();
 
-	rcu_dump_cpu_stacks();
+	rcu_dump_cpu_stacks(gp_seq);
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	/* Rewrite if needed in case of slow consoles. */
@@ -759,7 +763,8 @@  static void check_cpu_stall(struct rcu_data *rdp)
 	gs2 = READ_ONCE(rcu_state.gp_seq);
 	if (gs1 != gs2 ||
 	    ULONG_CMP_LT(j, js) ||
-	    ULONG_CMP_GE(gps, js))
+	    ULONG_CMP_GE(gps, js) ||
+	    !rcu_seq_state(gs2))
 		return; /* No stall or GP completed since entering function. */
 	rnp = rdp->mynode;
 	jn = jiffies + ULONG_MAX / 2;
@@ -780,7 +785,7 @@  static void check_cpu_stall(struct rcu_data *rdp)
 			pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
 		} else if (self_detected) {
 			/* We haven't checked in, so go dump stack. */
-			print_cpu_stall(gps);
+			print_cpu_stall(gs2, gps);
 		} else {
 			/* They had a few time units to dump stack, so complain. */
 			print_other_cpu_stall(gs2, gps);