diff mbox series

[12/12] srcu: Block less aggressively for expedited grace periods

Message ID 20220620222032.3839547-12-paulmck@kernel.org (mailing list archive)
State Accepted
Commit 640a7d37c3f42ce70d5ebb216a4e0c70b8f23d97
Headers show
Series Miscellaneous fixes for v5.20 | expand

Commit Message

Paul E. McKenney June 20, 2022, 10:20 p.m. UTC
Commit 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers
from consuming CPU") fixed a problem where a long-running expedited SRCU
grace period could block kernel live patching.  It did so by giving up
on expediting once a given SRCU expedited grace period grew too old.

Unfortunately, this added excessive delays to boots of embedded systems
running on qemu that use the ARM IORT RMR feature.  This commit therefore
makes the transition away from expediting less aggressive, increasing
the per-grace-period phase number of non-sleeping polls of readers from
one to three and increasing the required grace-period age from one jiffy
(actually from zero to one jiffies) to two jiffies (actually from one
to two jiffies).

Fixes: 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers from consuming CPU")
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reported-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Reported-by: chenxiang (M)" <chenxiang66@hisilicon.com>
Cc: Shameerali Kolothum Thodi  <shameerali.kolothum.thodi@huawei.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lore.kernel.org/all/20615615-0013-5adc-584f-2b1d5c03ebfc@linaro.org/
---
 kernel/rcu/srcutree.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

Comments

Zhangfei Gao June 21, 2022, 2 a.m. UTC | #1
On 2022/6/21 上午6:20, Paul E. McKenney wrote:
> Commit 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers
> from consuming CPU") fixed a problem where a long-running expedited SRCU
> grace period could block kernel live patching.  It did so by giving up
> on expediting once a given SRCU expedited grace period grew too old.
>
> Unfortunately, this added excessive delays to boots of embedded systems
> running on qemu that use the ARM IORT RMR feature.  This commit therefore
> makes the transition away from expediting less aggressive, increasing
> the per-grace-period phase number of non-sleeping polls of readers from
> one to three and increasing the required grace-period age from one jiffy
> (actually from zero to one jiffies) to two jiffies (actually from one
> to two jiffies).
>
> Fixes: 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers from consuming CPU")
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> Reported-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> Reported-by: chenxiang (M)" <chenxiang66@hisilicon.com>
> Cc: Shameerali Kolothum Thodi  <shameerali.kolothum.thodi@huawei.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Link: https://lore.kernel.org/all/20615615-0013-5adc-584f-2b1d5c03ebfc@linaro.org/

Test on 5.19-rc1 with this patch with qemu boot with -bios 
QEMU_EFI-2022.fd, seems not work, same as rc1.

real    2m42.948s
user    0m2.843s
sys     0m1.170s

qemu: stable-6.1

build/aarch64-softmmu/qemu-system-aarch64 -machine 
virt,gic-version=3,iommu=smmuv3 \
-enable-kvm -cpu host -m 1024 \
-kernel /home/linaro/Image -initrd /home/linaro/tmp/ramdisk-new.img 
-nographic -append \
"rdinit=init console=ttyAMA0 earlycon=pl011,0x9000000 kpti=off acpi=force" \
-bios QEMU_EFI-2022.fd

Thanks
> ---
>   kernel/rcu/srcutree.c | 20 +++++++++++++-------
>   1 file changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 50ba70f019dea..0db7873f4e95b 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -513,7 +513,7 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
>   
>   #define SRCU_INTERVAL		1	// Base delay if no expedited GPs pending.
>   #define SRCU_MAX_INTERVAL	10	// Maximum incremental delay from slow readers.
> -#define SRCU_MAX_NODELAY_PHASE	1	// Maximum per-GP-phase consecutive no-delay instances.
> +#define SRCU_MAX_NODELAY_PHASE	3	// Maximum per-GP-phase consecutive no-delay instances.
>   #define SRCU_MAX_NODELAY	100	// Maximum consecutive no-delay instances.
>   
>   /*
> @@ -522,16 +522,22 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
>    */
>   static unsigned long srcu_get_delay(struct srcu_struct *ssp)
>   {
> +	unsigned long gpstart;
> +	unsigned long j;
>   	unsigned long jbase = SRCU_INTERVAL;
>   
>   	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
>   		jbase = 0;
> -	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
> -		jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
> -	if (!jbase) {
> -		WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
> -		if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
> -			jbase = 1;
> +	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
> +		j = jiffies - 1;
> +		gpstart = READ_ONCE(ssp->srcu_gp_start);
> +		if (time_after(j, gpstart))
> +			jbase += j - gpstart;
> +		if (!jbase) {
> +			WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
> +			if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
> +				jbase = 1;
> +		}
>   	}
>   	return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
>   }
Paul E. McKenney June 21, 2022, 3:15 a.m. UTC | #2
On Tue, Jun 21, 2022 at 10:00:07AM +0800, Zhangfei Gao wrote:
> 
> 
> On 2022/6/21 上午6:20, Paul E. McKenney wrote:
> > Commit 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers
> > from consuming CPU") fixed a problem where a long-running expedited SRCU
> > grace period could block kernel live patching.  It did so by giving up
> > on expediting once a given SRCU expedited grace period grew too old.
> > 
> > Unfortunately, this added excessive delays to boots of embedded systems
> > running on qemu that use the ARM IORT RMR feature.  This commit therefore
> > makes the transition away from expediting less aggressive, increasing
> > the per-grace-period phase number of non-sleeping polls of readers from
> > one to three and increasing the required grace-period age from one jiffy
> > (actually from zero to one jiffies) to two jiffies (actually from one
> > to two jiffies).
> > 
> > Fixes: 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers from consuming CPU")
> > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > Reported-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> > Reported-by: chenxiang (M)" <chenxiang66@hisilicon.com>
> > Cc: Shameerali Kolothum Thodi  <shameerali.kolothum.thodi@huawei.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Link: https://lore.kernel.org/all/20615615-0013-5adc-584f-2b1d5c03ebfc@linaro.org/
> 
> Test on 5.19-rc1 with this patch with qemu boot with -bios QEMU_EFI-2022.fd,
> seems not work, same as rc1.
> 
> real    2m42.948s
> user    0m2.843s
> sys     0m1.170s
> 
> qemu: stable-6.1
> 
> build/aarch64-softmmu/qemu-system-aarch64 -machine
> virt,gic-version=3,iommu=smmuv3 \
> -enable-kvm -cpu host -m 1024 \
> -kernel /home/linaro/Image -initrd /home/linaro/tmp/ramdisk-new.img
> -nographic -append \
> "rdinit=init console=ttyAMA0 earlycon=pl011,0x9000000 kpti=off acpi=force" \
> -bios QEMU_EFI-2022.fd

Understood.  This patch fixes some cases, but not your case.  Which is
why you guys are experimenting with additional changes.  In the meantime,
this patch helps at least some people.  I look forward to you guys have
an appropriate solution that I can pull in on top of this one.

Or, if the solution shows up quickly enough, I can replace this patch
with your guys' solution.

							Thanx, Paul

> Thanks
> > ---
> >   kernel/rcu/srcutree.c | 20 +++++++++++++-------
> >   1 file changed, 13 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > index 50ba70f019dea..0db7873f4e95b 100644
> > --- a/kernel/rcu/srcutree.c
> > +++ b/kernel/rcu/srcutree.c
> > @@ -513,7 +513,7 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
> >   #define SRCU_INTERVAL		1	// Base delay if no expedited GPs pending.
> >   #define SRCU_MAX_INTERVAL	10	// Maximum incremental delay from slow readers.
> > -#define SRCU_MAX_NODELAY_PHASE	1	// Maximum per-GP-phase consecutive no-delay instances.
> > +#define SRCU_MAX_NODELAY_PHASE	3	// Maximum per-GP-phase consecutive no-delay instances.
> >   #define SRCU_MAX_NODELAY	100	// Maximum consecutive no-delay instances.
> >   /*
> > @@ -522,16 +522,22 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
> >    */
> >   static unsigned long srcu_get_delay(struct srcu_struct *ssp)
> >   {
> > +	unsigned long gpstart;
> > +	unsigned long j;
> >   	unsigned long jbase = SRCU_INTERVAL;
> >   	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
> >   		jbase = 0;
> > -	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
> > -		jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
> > -	if (!jbase) {
> > -		WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
> > -		if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
> > -			jbase = 1;
> > +	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
> > +		j = jiffies - 1;
> > +		gpstart = READ_ONCE(ssp->srcu_gp_start);
> > +		if (time_after(j, gpstart))
> > +			jbase += j - gpstart;
> > +		if (!jbase) {
> > +			WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
> > +			if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
> > +				jbase = 1;
> > +		}
> >   	}
> >   	return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
> >   }
>
Shameerali Kolothum Thodi June 21, 2022, 7:43 a.m. UTC | #3
> -----Original Message-----
> From: Paul E. McKenney [mailto:paulmck@kernel.org]
> Sent: 20 June 2022 23:21
> To: rcu@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org; kernel-team@fb.com;
> rostedt@goodmis.org; Paul E. McKenney <paulmck@kernel.org>; Zhangfei
> Gao <zhangfei.gao@linaro.org>; Shameerali Kolothum Thodi
> <shameerali.kolothum.thodi@huawei.com>; Paolo Bonzini
> <pbonzini@redhat.com>
> Subject: [PATCH rcu 12/12] srcu: Block less aggressively for expedited grace
> periods
> 
> Commit 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers
> from consuming CPU") fixed a problem where a long-running expedited
> SRCU
> grace period could block kernel live patching.  It did so by giving up
> on expediting once a given SRCU expedited grace period grew too old.
> 
> Unfortunately, this added excessive delays to boots of embedded systems
> running on qemu that use the ARM IORT RMR feature. 

As pointed out here[0]/[1], this delay has nothing to do with ARM IORT RMR
feature. The delay is due to the "-bios QEMU_EFI.fd" line which emulates flash
devices and requires excessive memslot ops during boot.

Thanks,
Shameer

[0] https://lore.kernel.org/all/110bbec5cee74efba0aad64360069a12@huawei.com/
[1] https://lore.kernel.org/all/8735g649ew.wl-maz@kernel.org/


 This commit
> therefore
> makes the transition away from expediting less aggressive, increasing
> the per-grace-period phase number of non-sleeping polls of readers from
> one to three and increasing the required grace-period age from one jiffy
> (actually from zero to one jiffies) to two jiffies (actually from one
> to two jiffies).
> 
> Fixes: 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers
> from consuming CPU")
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> Reported-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> Reported-by: chenxiang (M)" <chenxiang66@hisilicon.com>
> Cc: Shameerali Kolothum Thodi  <shameerali.kolothum.thodi@huawei.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Link:
> https://lore.kernel.org/all/20615615-0013-5adc-584f-2b1d5c03ebfc@linaro
> .org/
> ---
>  kernel/rcu/srcutree.c | 20 +++++++++++++-------
>  1 file changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 50ba70f019dea..0db7873f4e95b 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -513,7 +513,7 @@ static bool srcu_readers_active(struct srcu_struct
> *ssp)
> 
>  #define SRCU_INTERVAL		1	// Base delay if no expedited GPs
> pending.
>  #define SRCU_MAX_INTERVAL	10	// Maximum incremental delay from
> slow readers.
> -#define SRCU_MAX_NODELAY_PHASE	1	// Maximum per-GP-phase
> consecutive no-delay instances.
> +#define SRCU_MAX_NODELAY_PHASE	3	// Maximum per-GP-phase
> consecutive no-delay instances.
>  #define SRCU_MAX_NODELAY	100	// Maximum consecutive no-delay
> instances.
> 
>  /*
> @@ -522,16 +522,22 @@ static bool srcu_readers_active(struct srcu_struct
> *ssp)
>   */
>  static unsigned long srcu_get_delay(struct srcu_struct *ssp)
>  {
> +	unsigned long gpstart;
> +	unsigned long j;
>  	unsigned long jbase = SRCU_INTERVAL;
> 
>  	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq),
> READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
>  		jbase = 0;
> -	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
> -		jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
> -	if (!jbase) {
> -		WRITE_ONCE(ssp->srcu_n_exp_nodelay,
> READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
> -		if (READ_ONCE(ssp->srcu_n_exp_nodelay) >
> SRCU_MAX_NODELAY_PHASE)
> -			jbase = 1;
> +	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
> +		j = jiffies - 1;
> +		gpstart = READ_ONCE(ssp->srcu_gp_start);
> +		if (time_after(j, gpstart))
> +			jbase += j - gpstart;
> +		if (!jbase) {
> +			WRITE_ONCE(ssp->srcu_n_exp_nodelay,
> READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
> +			if (READ_ONCE(ssp->srcu_n_exp_nodelay) >
> SRCU_MAX_NODELAY_PHASE)
> +				jbase = 1;
> +		}
>  	}
>  	return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
>  }
> --
> 2.31.1.189.g2e36527f23
Neeraj Upadhyay June 21, 2022, 10:13 a.m. UTC | #4
On 6/21/2022 3:50 AM, Paul E. McKenney wrote:
> Commit 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers
> from consuming CPU") fixed a problem where a long-running expedited SRCU
> grace period could block kernel live patching.  It did so by giving up
> on expediting once a given SRCU expedited grace period grew too old.
> 
> Unfortunately, this added excessive delays to boots of embedded systems
> running on qemu that use the ARM IORT RMR feature.  This commit therefore
> makes the transition away from expediting less aggressive, increasing
> the per-grace-period phase number of non-sleeping polls of readers from
> one to three and increasing the required grace-period age from one jiffy
> (actually from zero to one jiffies) to two jiffies (actually from one
> to two jiffies).
> 
> Fixes: 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers from consuming CPU")
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> Reported-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> Reported-by: chenxiang (M)" <chenxiang66@hisilicon.com>
> Cc: Shameerali Kolothum Thodi  <shameerali.kolothum.thodi@huawei.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Link: https://lore.kernel.org/all/20615615-0013-5adc-584f-2b1d5c03ebfc@linaro.org/
> ---


Reviewed-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>


Thanks
Neeraj

>   kernel/rcu/srcutree.c | 20 +++++++++++++-------
>   1 file changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 50ba70f019dea..0db7873f4e95b 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -513,7 +513,7 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
>   
>   #define SRCU_INTERVAL		1	// Base delay if no expedited GPs pending.
>   #define SRCU_MAX_INTERVAL	10	// Maximum incremental delay from slow readers.
> -#define SRCU_MAX_NODELAY_PHASE	1	// Maximum per-GP-phase consecutive no-delay instances.
> +#define SRCU_MAX_NODELAY_PHASE	3	// Maximum per-GP-phase consecutive no-delay instances.
>   #define SRCU_MAX_NODELAY	100	// Maximum consecutive no-delay instances.
>   
>   /*
> @@ -522,16 +522,22 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
>    */
>   static unsigned long srcu_get_delay(struct srcu_struct *ssp)
>   {
> +	unsigned long gpstart;
> +	unsigned long j;
>   	unsigned long jbase = SRCU_INTERVAL;
>   
>   	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
>   		jbase = 0;
> -	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
> -		jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
> -	if (!jbase) {
> -		WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
> -		if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
> -			jbase = 1;
> +	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
> +		j = jiffies - 1;
> +		gpstart = READ_ONCE(ssp->srcu_gp_start);
> +		if (time_after(j, gpstart))
> +			jbase += j - gpstart;
> +		if (!jbase) {
> +			WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
> +			if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
> +				jbase = 1;
> +		}
>   	}
>   	return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
>   }
Paul E. McKenney June 21, 2022, 7:36 p.m. UTC | #5
On Tue, Jun 21, 2022 at 07:43:08AM +0000, Shameerali Kolothum Thodi wrote:
> 
> 
> > -----Original Message-----
> > From: Paul E. McKenney [mailto:paulmck@kernel.org]
> > Sent: 20 June 2022 23:21
> > To: rcu@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org; kernel-team@fb.com;
> > rostedt@goodmis.org; Paul E. McKenney <paulmck@kernel.org>; Zhangfei
> > Gao <zhangfei.gao@linaro.org>; Shameerali Kolothum Thodi
> > <shameerali.kolothum.thodi@huawei.com>; Paolo Bonzini
> > <pbonzini@redhat.com>
> > Subject: [PATCH rcu 12/12] srcu: Block less aggressively for expedited grace
> > periods
> > 
> > Commit 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers
> > from consuming CPU") fixed a problem where a long-running expedited
> > SRCU
> > grace period could block kernel live patching.  It did so by giving up
> > on expediting once a given SRCU expedited grace period grew too old.
> > 
> > Unfortunately, this added excessive delays to boots of embedded systems
> > running on qemu that use the ARM IORT RMR feature. 
> 
> As pointed out here[0]/[1], this delay has nothing to do with ARM IORT RMR
> feature. The delay is due to the "-bios QEMU_EFI.fd" line which emulates flash
> devices and requires excessive memslot ops during boot.

Good eyes, and I will update.

Are we stuck with the excessive memslot ops?  If not, great.

If we are stuck with them, there probably needs to be a way of adjusting
the SRCU delay parameters.  Making "-bios QEMU_EFI.fd" go quickly seems
to require quite a bit of spinning, more than is likely to be helpful
in general.

							Thanx, Paul

> Thanks,
> Shameer
> 
> [0] https://lore.kernel.org/all/110bbec5cee74efba0aad64360069a12@huawei.com/
> [1] https://lore.kernel.org/all/8735g649ew.wl-maz@kernel.org/
> 
> 
>  This commit
> > therefore
> > makes the transition away from expediting less aggressive, increasing
> > the per-grace-period phase number of non-sleeping polls of readers from
> > one to three and increasing the required grace-period age from one jiffy
> > (actually from zero to one jiffies) to two jiffies (actually from one
> > to two jiffies).
> > 
> > Fixes: 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers
> > from consuming CPU")
> > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > Reported-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> > Reported-by: chenxiang (M)" <chenxiang66@hisilicon.com>
> > Cc: Shameerali Kolothum Thodi  <shameerali.kolothum.thodi@huawei.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Link:
> > https://lore.kernel.org/all/20615615-0013-5adc-584f-2b1d5c03ebfc@linaro
> > .org/
> > ---
> >  kernel/rcu/srcutree.c | 20 +++++++++++++-------
> >  1 file changed, 13 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > index 50ba70f019dea..0db7873f4e95b 100644
> > --- a/kernel/rcu/srcutree.c
> > +++ b/kernel/rcu/srcutree.c
> > @@ -513,7 +513,7 @@ static bool srcu_readers_active(struct srcu_struct
> > *ssp)
> > 
> >  #define SRCU_INTERVAL		1	// Base delay if no expedited GPs
> > pending.
> >  #define SRCU_MAX_INTERVAL	10	// Maximum incremental delay from
> > slow readers.
> > -#define SRCU_MAX_NODELAY_PHASE	1	// Maximum per-GP-phase
> > consecutive no-delay instances.
> > +#define SRCU_MAX_NODELAY_PHASE	3	// Maximum per-GP-phase
> > consecutive no-delay instances.
> >  #define SRCU_MAX_NODELAY	100	// Maximum consecutive no-delay
> > instances.
> > 
> >  /*
> > @@ -522,16 +522,22 @@ static bool srcu_readers_active(struct srcu_struct
> > *ssp)
> >   */
> >  static unsigned long srcu_get_delay(struct srcu_struct *ssp)
> >  {
> > +	unsigned long gpstart;
> > +	unsigned long j;
> >  	unsigned long jbase = SRCU_INTERVAL;
> > 
> >  	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq),
> > READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
> >  		jbase = 0;
> > -	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
> > -		jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
> > -	if (!jbase) {
> > -		WRITE_ONCE(ssp->srcu_n_exp_nodelay,
> > READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
> > -		if (READ_ONCE(ssp->srcu_n_exp_nodelay) >
> > SRCU_MAX_NODELAY_PHASE)
> > -			jbase = 1;
> > +	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
> > +		j = jiffies - 1;
> > +		gpstart = READ_ONCE(ssp->srcu_gp_start);
> > +		if (time_after(j, gpstart))
> > +			jbase += j - gpstart;
> > +		if (!jbase) {
> > +			WRITE_ONCE(ssp->srcu_n_exp_nodelay,
> > READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
> > +			if (READ_ONCE(ssp->srcu_n_exp_nodelay) >
> > SRCU_MAX_NODELAY_PHASE)
> > +				jbase = 1;
> > +		}
> >  	}
> >  	return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
> >  }
> > --
> > 2.31.1.189.g2e36527f23
>
Paul E. McKenney June 21, 2022, 10:25 p.m. UTC | #6
On Tue, Jun 21, 2022 at 03:43:30PM +0530, Neeraj Upadhyay wrote:
> 
> 
> On 6/21/2022 3:50 AM, Paul E. McKenney wrote:
> > Commit 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers
> > from consuming CPU") fixed a problem where a long-running expedited SRCU
> > grace period could block kernel live patching.  It did so by giving up
> > on expediting once a given SRCU expedited grace period grew too old.
> > 
> > Unfortunately, this added excessive delays to boots of embedded systems
> > running on qemu that use the ARM IORT RMR feature.  This commit therefore
> > makes the transition away from expediting less aggressive, increasing
> > the per-grace-period phase number of non-sleeping polls of readers from
> > one to three and increasing the required grace-period age from one jiffy
> > (actually from zero to one jiffies) to two jiffies (actually from one
> > to two jiffies).
> > 
> > Fixes: 282d8998e997 ("srcu: Prevent expedited GPs and blocking readers from consuming CPU")
> > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > Reported-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> > Reported-by: chenxiang (M)" <chenxiang66@hisilicon.com>
> > Cc: Shameerali Kolothum Thodi  <shameerali.kolothum.thodi@huawei.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Link: https://lore.kernel.org/all/20615615-0013-5adc-584f-2b1d5c03ebfc@linaro.org/
> > ---
> 
> 
> Reviewed-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>

And I applied your Reviewed-by to the other commits you gave it for,
and thank you very much!  Much nicer to fix the bugs now than to have
to do separate patches for them later.  ;-)

							Thanx, Paul

> Thanks
> Neeraj
> 
> >   kernel/rcu/srcutree.c | 20 +++++++++++++-------
> >   1 file changed, 13 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > index 50ba70f019dea..0db7873f4e95b 100644
> > --- a/kernel/rcu/srcutree.c
> > +++ b/kernel/rcu/srcutree.c
> > @@ -513,7 +513,7 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
> >   #define SRCU_INTERVAL		1	// Base delay if no expedited GPs pending.
> >   #define SRCU_MAX_INTERVAL	10	// Maximum incremental delay from slow readers.
> > -#define SRCU_MAX_NODELAY_PHASE	1	// Maximum per-GP-phase consecutive no-delay instances.
> > +#define SRCU_MAX_NODELAY_PHASE	3	// Maximum per-GP-phase consecutive no-delay instances.
> >   #define SRCU_MAX_NODELAY	100	// Maximum consecutive no-delay instances.
> >   /*
> > @@ -522,16 +522,22 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
> >    */
> >   static unsigned long srcu_get_delay(struct srcu_struct *ssp)
> >   {
> > +	unsigned long gpstart;
> > +	unsigned long j;
> >   	unsigned long jbase = SRCU_INTERVAL;
> >   	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
> >   		jbase = 0;
> > -	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
> > -		jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
> > -	if (!jbase) {
> > -		WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
> > -		if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
> > -			jbase = 1;
> > +	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
> > +		j = jiffies - 1;
> > +		gpstart = READ_ONCE(ssp->srcu_gp_start);
> > +		if (time_after(j, gpstart))
> > +			jbase += j - gpstart;
> > +		if (!jbase) {
> > +			WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
> > +			if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
> > +				jbase = 1;
> > +		}
> >   	}
> >   	return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
> >   }
diff mbox series

Patch

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 50ba70f019dea..0db7873f4e95b 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -513,7 +513,7 @@  static bool srcu_readers_active(struct srcu_struct *ssp)
 
 #define SRCU_INTERVAL		1	// Base delay if no expedited GPs pending.
 #define SRCU_MAX_INTERVAL	10	// Maximum incremental delay from slow readers.
-#define SRCU_MAX_NODELAY_PHASE	1	// Maximum per-GP-phase consecutive no-delay instances.
+#define SRCU_MAX_NODELAY_PHASE	3	// Maximum per-GP-phase consecutive no-delay instances.
 #define SRCU_MAX_NODELAY	100	// Maximum consecutive no-delay instances.
 
 /*
@@ -522,16 +522,22 @@  static bool srcu_readers_active(struct srcu_struct *ssp)
  */
 static unsigned long srcu_get_delay(struct srcu_struct *ssp)
 {
+	unsigned long gpstart;
+	unsigned long j;
 	unsigned long jbase = SRCU_INTERVAL;
 
 	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
 		jbase = 0;
-	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
-		jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
-	if (!jbase) {
-		WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
-		if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
-			jbase = 1;
+	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
+		j = jiffies - 1;
+		gpstart = READ_ONCE(ssp->srcu_gp_start);
+		if (time_after(j, gpstart))
+			jbase += j - gpstart;
+		if (!jbase) {
+			WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
+			if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
+				jbase = 1;
+		}
 	}
 	return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
 }