diff mbox series

rcutorture: Replace schedule_timeout*() 1 jiffie waits with HZ/20

Message ID 20230816204913.450457-1-joel@joelfernandes.org (mailing list archive)
State Accepted
Commit 21491fe2d07cb8378f76d2d567c957340c85d6de
Headers show
Series rcutorture: Replace schedule_timeout*() 1 jiffie waits with HZ/20 | expand

Commit Message

Joel Fernandes Aug. 16, 2023, 8:49 p.m. UTC
In the past, we see that spinning on schedule_timeout* with a wait of 1
jiffie can hang the kernel. See d52d3a2bf408 ("torture: Fix hang during
kthread shutdown phase").

Recently again it showed up in torture's stutter code as well. The behavior is
the the function may instantly return and never go to sleep preempting whatever
was running under it.

To prevent future issues, apply the same fix mentioned in the above
commit d52d3a2bf408 to more places.

I took care to only apply it to places where I thought it made sense.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/rcu/rcutorture.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

Comments

Paul E. McKenney Aug. 23, 2023, 9:07 p.m. UTC | #1
On Wed, Aug 16, 2023 at 08:49:12PM +0000, Joel Fernandes (Google) wrote:
> In the past, we see that spinning on schedule_timeout* with a wait of 1
> jiffie can hang the kernel. See d52d3a2bf408 ("torture: Fix hang during
> kthread shutdown phase").
> 
> Recently again it showed up in torture's stutter code as well. The behavior is
> the the function may instantly return and never go to sleep preempting whatever
> was running under it.
> 
> To prevent future issues, apply the same fix mentioned in the above
> commit d52d3a2bf408 to more places.
> 
> I took care to only apply it to places where I thought it made sense.
> 
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

Hearing no objections, I applied this for further review and testing.

In the future, should some of these become hrtimers?

						Thanx, Paul

> ---
>  kernel/rcu/rcutorture.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
> index 8dd52ea78b52..a31297f32a2a 100644
> --- a/kernel/rcu/rcutorture.c
> +++ b/kernel/rcu/rcutorture.c
> @@ -1153,7 +1153,7 @@ static int rcu_torture_boost(void *arg)
>  				mutex_unlock(&boost_mutex);
>  				break;
>  			}
> -			schedule_timeout_uninterruptible(1);
> +			schedule_timeout_uninterruptible(HZ / 20);
>  		}
>  
>  		/* Go do the stutter. */
> @@ -1164,7 +1164,7 @@ checkwait:	if (stutter_wait("rcu_torture_boost"))
>  	/* Clean up and exit. */
>  	while (!kthread_should_stop()) {
>  		torture_shutdown_absorb("rcu_torture_boost");
> -		schedule_timeout_uninterruptible(1);
> +		schedule_timeout_uninterruptible(HZ / 20);
>  	}
>  	torture_kthread_stopping("rcu_torture_boost");
>  	return 0;
> @@ -1187,7 +1187,7 @@ rcu_torture_fqs(void *arg)
>  		fqs_resume_time = jiffies + fqs_stutter * HZ;
>  		while (time_before(jiffies, fqs_resume_time) &&
>  		       !kthread_should_stop()) {
> -			schedule_timeout_interruptible(1);
> +			schedule_timeout_interruptible(HZ / 20);
>  		}
>  		fqs_burst_remaining = fqs_duration;
>  		while (fqs_burst_remaining > 0 &&
> @@ -2903,7 +2903,7 @@ static int rcu_torture_fwd_prog(void *args)
>  			WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1);
>  		} else {
>  			while (READ_ONCE(rcu_fwd_seq) == oldseq && !torture_must_stop())
> -				schedule_timeout_interruptible(1);
> +				schedule_timeout_interruptible(HZ / 20);
>  			oldseq = READ_ONCE(rcu_fwd_seq);
>  		}
>  		pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
> @@ -3204,7 +3204,7 @@ static int rcu_torture_read_exit_child(void *trsp_in)
>  	set_user_nice(current, MAX_NICE);
>  	// Minimize time between reading and exiting.
>  	while (!kthread_should_stop())
> -		schedule_timeout_uninterruptible(1);
> +		schedule_timeout_uninterruptible(HZ / 20);
>  	(void)rcu_torture_one_read(trsp, -1);
>  	return 0;
>  }
> @@ -3252,7 +3252,7 @@ static int rcu_torture_read_exit(void *unused)
>  	smp_mb(); // Store before wakeup.
>  	wake_up(&read_exit_wq);
>  	while (!torture_must_stop())
> -		schedule_timeout_uninterruptible(1);
> +		schedule_timeout_uninterruptible(HZ / 20);
>  	torture_kthread_stopping("rcu_torture_read_exit");
>  	return 0;
>  }
> -- 
> 2.41.0.694.ge786442a9b-goog
>
Joel Fernandes Aug. 25, 2023, 2:55 a.m. UTC | #2
On Wed, Aug 23, 2023 at 02:07:55PM -0700, Paul E. McKenney wrote:
> On Wed, Aug 16, 2023 at 08:49:12PM +0000, Joel Fernandes (Google) wrote:
> > In the past, we see that spinning on schedule_timeout* with a wait of 1
> > jiffie can hang the kernel. See d52d3a2bf408 ("torture: Fix hang during
> > kthread shutdown phase").
> > 
> > Recently again it showed up in torture's stutter code as well. The behavior is
> > the the function may instantly return and never go to sleep preempting whatever
> > was running under it.
> > 
> > To prevent future issues, apply the same fix mentioned in the above
> > commit d52d3a2bf408 to more places.
> > 
> > I took care to only apply it to places where I thought it made sense.
> > 
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> 
> Hearing no objections, I applied this for further review and testing.
> 
> In the future, should some of these become hrtimers?

Yes they certainly could be but we don't need these ones to be high
resolution AFAICS and the jiffie granularity should be good.

thanks,

 - Joel
Paul E. McKenney Aug. 26, 2023, 1:31 a.m. UTC | #3
On Fri, Aug 25, 2023 at 02:55:38AM +0000, Joel Fernandes wrote:
> On Wed, Aug 23, 2023 at 02:07:55PM -0700, Paul E. McKenney wrote:
> > On Wed, Aug 16, 2023 at 08:49:12PM +0000, Joel Fernandes (Google) wrote:
> > > In the past, we see that spinning on schedule_timeout* with a wait of 1
> > > jiffie can hang the kernel. See d52d3a2bf408 ("torture: Fix hang during
> > > kthread shutdown phase").
> > > 
> > > Recently again it showed up in torture's stutter code as well. The behavior is
> > > the the function may instantly return and never go to sleep preempting whatever
> > > was running under it.
> > > 
> > > To prevent future issues, apply the same fix mentioned in the above
> > > commit d52d3a2bf408 to more places.
> > > 
> > > I took care to only apply it to places where I thought it made sense.
> > > 
> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > 
> > Hearing no objections, I applied this for further review and testing.
> > 
> > In the future, should some of these become hrtimers?
> 
> Yes they certainly could be but we don't need these ones to be high
> resolution AFAICS and the jiffie granularity should be good.

True, but wouldn't avoiding the repeated wakeups, even at the lower
frequency, be of some value?

							Thanx, Paul
diff mbox series

Patch

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 8dd52ea78b52..a31297f32a2a 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1153,7 +1153,7 @@  static int rcu_torture_boost(void *arg)
 				mutex_unlock(&boost_mutex);
 				break;
 			}
-			schedule_timeout_uninterruptible(1);
+			schedule_timeout_uninterruptible(HZ / 20);
 		}
 
 		/* Go do the stutter. */
@@ -1164,7 +1164,7 @@  checkwait:	if (stutter_wait("rcu_torture_boost"))
 	/* Clean up and exit. */
 	while (!kthread_should_stop()) {
 		torture_shutdown_absorb("rcu_torture_boost");
-		schedule_timeout_uninterruptible(1);
+		schedule_timeout_uninterruptible(HZ / 20);
 	}
 	torture_kthread_stopping("rcu_torture_boost");
 	return 0;
@@ -1187,7 +1187,7 @@  rcu_torture_fqs(void *arg)
 		fqs_resume_time = jiffies + fqs_stutter * HZ;
 		while (time_before(jiffies, fqs_resume_time) &&
 		       !kthread_should_stop()) {
-			schedule_timeout_interruptible(1);
+			schedule_timeout_interruptible(HZ / 20);
 		}
 		fqs_burst_remaining = fqs_duration;
 		while (fqs_burst_remaining > 0 &&
@@ -2903,7 +2903,7 @@  static int rcu_torture_fwd_prog(void *args)
 			WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1);
 		} else {
 			while (READ_ONCE(rcu_fwd_seq) == oldseq && !torture_must_stop())
-				schedule_timeout_interruptible(1);
+				schedule_timeout_interruptible(HZ / 20);
 			oldseq = READ_ONCE(rcu_fwd_seq);
 		}
 		pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
@@ -3204,7 +3204,7 @@  static int rcu_torture_read_exit_child(void *trsp_in)
 	set_user_nice(current, MAX_NICE);
 	// Minimize time between reading and exiting.
 	while (!kthread_should_stop())
-		schedule_timeout_uninterruptible(1);
+		schedule_timeout_uninterruptible(HZ / 20);
 	(void)rcu_torture_one_read(trsp, -1);
 	return 0;
 }
@@ -3252,7 +3252,7 @@  static int rcu_torture_read_exit(void *unused)
 	smp_mb(); // Store before wakeup.
 	wake_up(&read_exit_wq);
 	while (!torture_must_stop())
-		schedule_timeout_uninterruptible(1);
+		schedule_timeout_uninterruptible(HZ / 20);
 	torture_kthread_stopping("rcu_torture_read_exit");
 	return 0;
 }