[9/9] rcu: Support lazy callbacks with CONFIG_RCU_NOCB_CPU=n

Message ID	20230531101736.12981-10-frederic@kernel.org (mailing list archive)
State	New, archived
Headers	show Return-Path: <rcu-owner@vger.kernel.org> From: Frederic Weisbecker <frederic@kernel.org> To: "Paul E . McKenney" <paulmck@kernel.org> Cc: LKML <linux-kernel@vger.kernel.org>, Frederic Weisbecker <frederic@kernel.org>, rcu <rcu@vger.kernel.org>, Uladzislau Rezki <urezki@gmail.com>, Neeraj Upadhyay <quic_neeraju@quicinc.com>, Joel Fernandes <joel@joelfernandes.org>, Giovanni Gherdovich <ggherdovich@suse.cz> Subject: [PATCH 9/9] rcu: Support lazy callbacks with CONFIG_RCU_NOCB_CPU=n Date: Wed, 31 May 2023 12:17:36 +0200 Message-Id: <20230531101736.12981-10-frederic@kernel.org> In-Reply-To: <20230531101736.12981-1-frederic@kernel.org> References: <20230531101736.12981-1-frederic@kernel.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	rcu: Support for lazy callbacks on !CONFIG_RCU_NOCB_CPU \| expand [0/9] rcu: Support for lazy callbacks on !CONFIG_RCU_NOCB_CPU [1/9] rcu: Assume IRQS disabled from rcu_report_dead() [2/9] rcu: Use rcu_segcblist_segempty() instead of open coding it [3/9] rcu: Rename jiffies_till_flush to jiffies_lazy_flush [4/9] rcu: Introduce lazy queue's own qhimark [5/9] rcu: Add rcutree.lazy_enabled boot parameter [6/9] rcu/nocb: Rename was_alldone to was_pending [7/9] rcu: Implement lazyness on the main segcblist level [8/9] rcu: Make segcblist flags test strict [9/9] rcu: Support lazy callbacks with CONFIG_RCU_NOCB_CPU=n

diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index bdd7eadb33d8..b5e45c3a77e5 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -308,7 +308,7 @@ config TASKS_TRACE_RCU_READ_MB config RCU_LAZY bool "RCU callback lazy invocation functionality" - depends on RCU_NOCB_CPU + depends on TREE_RCU default n help To save power, batch RCU callbacks and flush after delay, memory diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index 36245efdf800..0033da9a42fa 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h @@ -104,6 +104,15 @@ static inline bool rcu_segcblist_completely_offloaded(struct rcu_segcblist *rscl return false; } +static inline bool rcu_segcblist_nocb_transitioning(struct rcu_segcblist *rsclp) +{ + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && + rcu_segcblist_test_flags(rsclp, SEGCBLIST_LOCKING | SEGCBLIST_RCU_CORE)) + return true; + + return false; +} + static inline bool rcu_segcblist_next_is_lazy(struct rcu_segcblist *rsclp) { if (IS_ENABLED(CONFIG_RCU_LAZY) && diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e48ccbe0f2f6..467a9cda7e71 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -669,8 +669,19 @@ NOKPROBE_SYMBOL(__rcu_irq_enter_check_tick); */ int rcu_needs_cpu(void) { - return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && - !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + + if (rcu_segcblist_empty(&rdp->cblist)) + return false; + + if (rcu_rdp_is_offloaded(rdp)) + return false; + + if (IS_ENABLED(CONFIG_RCU_LAZY) && + rcu_segcblist_n_cbs_lazy(&rdp->cblist) == rcu_segcblist_n_cbs(&rdp->cblist)) + return false; + + return true; } /* @@ -1086,7 +1097,7 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp) gp_seq_req = rcu_seq_snap(&rcu_state.gp_seq); if (rcu_segcblist_accelerate(&rdp->cblist, gp_seq_req)) { /* The RCU_NEXT_TAIL has been flushed, reset the lazy bit accordingly */ - if (IS_ENABLED(CONFIG_RCU_LAZY) && qhimark_lazy && rcu_segcblist_completely_offloaded(&rdp->cblist)) + if (IS_ENABLED(CONFIG_RCU_LAZY) && qhimark_lazy && !rcu_segcblist_nocb_transitioning(&rdp->cblist)) rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_NEXT_TAIL_LAZY); ret = rcu_start_this_gp(rnp, rdp, gp_seq_req); } @@ -1121,7 +1132,7 @@ static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp, /* Old request still live, so mark recent callbacks. */ if (rcu_segcblist_accelerate(&rdp->cblist, c)) { /* The RCU_NEXT_TAIL has been flushed, reset the lazy bit accordingly */ - if (IS_ENABLED(CONFIG_RCU_LAZY) && qhimark_lazy && rcu_segcblist_completely_offloaded(&rdp->cblist)) + if (IS_ENABLED(CONFIG_RCU_LAZY) && qhimark_lazy && !rcu_segcblist_nocb_transitioning(&rdp->cblist)) rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_NEXT_TAIL_LAZY); } return; @@ -2556,6 +2567,14 @@ static int __init rcu_spawn_core_kthreads(void) static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, unsigned long flags) { +#ifdef CONFIG_RCU_LAZY + if (rcu_segcblist_n_cbs_lazy(&rdp->cblist) == 1) { + if (!timer_pending(&rdp->lazy_timer)) { + rdp->lazy_timer.expires = jiffies + jiffies_lazy_flush; + add_timer_on(&rdp->lazy_timer, smp_processor_id()); + } + } +#endif /* * If called from an extended quiescent state, invoke the RCU * core in order to force a re-evaluation of RCU's idleness. @@ -2577,6 +2596,7 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) > rdp->qlen_last_fqs_check + qhimark)) { + rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_NEXT_TAIL_LAZY); /* Are we ignoring a completed grace period? */ note_gp_changes(rdp); @@ -2644,6 +2664,110 @@ static void check_cb_ovld(struct rcu_data *rdp) raw_spin_unlock_rcu_node(rnp); } +#ifdef CONFIG_RCU_LAZY +static unsigned long +lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + int cpu; + unsigned long count = 0; + + /* Snapshot count of all CPUs */ + for_each_possible_cpu(cpu) { + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + + count += rcu_segcblist_n_cbs_lazy(&rdp->cblist); + } + + return count ? count : SHRINK_EMPTY; +} + +static unsigned long +lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + int cpu; + unsigned long flags; + unsigned long count = 0; + + /* Protect against concurrent (de-)offloading. */ + if (!mutex_trylock(&rcu_state.barrier_mutex)) { + /* + * But really don't insist if barrier_mutex is contended since we + * can't guarantee that it will never engage in a dependency + * chain involving memory allocation. The lock is seldom contended + * anyway. + */ + return 0; + } + + /* Snapshot count of all CPUs */ + for_each_possible_cpu(cpu) { + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + int _count; + + if (!rcu_segcblist_n_cbs_lazy(&rdp->cblist)) + continue; + + rcu_nocb_lock_irqsave(rdp, flags); + _count = rcu_segcblist_n_cbs_lazy(&rdp->cblist); + if (!_count) { + rcu_nocb_unlock_irqrestore(rdp, flags); + continue; + } + rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_NEXT_TAIL_LAZY); + rcu_nocb_unlock_irqrestore(rdp, flags); + + if (rcu_rdp_is_offloaded(rdp)) + wake_nocb_gp(rdp, false); + sc->nr_to_scan -= _count; + count += _count; + if (sc->nr_to_scan <= 0) + break; + } + + mutex_unlock(&rcu_state.barrier_mutex); + + return count ? count : SHRINK_STOP; +} + +static struct shrinker lazy_rcu_shrinker = { + .count_objects = lazy_rcu_shrink_count, + .scan_objects = lazy_rcu_shrink_scan, + .batch = 0, + .seeks = DEFAULT_SEEKS, +}; + +/* Lazy timer expiration callback for non-offloaded rdp */ +static void rcu_lazy_timer(struct timer_list *timer) +{ + unsigned long flags; + struct rcu_data *rdp = container_of(timer, struct rcu_data, lazy_timer); + unsigned long delta; + unsigned long jiff; + + WARN_ON_ONCE(rdp->cpu != smp_processor_id()); + /* + * Protect against concurrent (de-)offloading on -RT where softirqs + * are preemptible. + */ + local_irq_save(flags); + if (rcu_rdp_is_offloaded(rdp)) + goto out; + + if (!rcu_segcblist_n_cbs_lazy(&rdp->cblist)) + goto out; + + jiff = READ_ONCE(jiffies); + delta = jiff - rdp->lazy_firstq; + + if (delta >= LAZY_FLUSH_JIFFIES) + rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_NEXT_TAIL_LAZY); + else + mod_timer(timer, jiff + (LAZY_FLUSH_JIFFIES - delta)); +out: + local_irq_restore(flags); +} +#endif + /* * Handle lazy callbacks. Return true if no further handling is needed (unlocks nocb then). * Return false if further treatment is needed (wake rcuog kthread, set the nocb timer, etc...). @@ -2667,7 +2791,11 @@ static bool __call_rcu_lazy(struct rcu_data *rdp, bool was_pending, bool lazy, u /* First lazy callback on an empty queue, set the timer if necessary */ if (lazy_len == 1) { WRITE_ONCE(rdp->lazy_firstq, jiffies); - if (!was_pending) + /* + * nocb_gp_wait() will set the timer for us if it is already tracking + * pending callbacks. + */ + if (!rcu_rdp_is_offloaded(rdp) || !was_pending) return false; else goto out; @@ -3958,7 +4086,8 @@ static int rcu_pending(int user) /* Has RCU gone idle with this CPU needing another grace period? */ if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) && !rcu_rdp_is_offloaded(rdp) && - !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) + !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL) && + !rcu_segcblist_next_is_lazy(&rdp->cblist)) return 1; /* Have RCU grace period completed or started? */ @@ -4363,6 +4492,9 @@ rcu_boot_init_percpu_data(int cpu) rdp->rcu_onl_gp_flags = RCU_GP_CLEANED; rdp->last_sched_clock = jiffies; rdp->cpu = cpu; +#ifdef CONFIG_RCU_LAZY + timer_setup(&rdp->lazy_timer, rcu_lazy_timer, TIMER_PINNED); +#endif rcu_boot_init_nocb_percpu_data(rdp); } @@ -4588,6 +4720,9 @@ void rcu_report_dead(unsigned int cpu) WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); arch_spin_unlock(&rcu_state.ofl_lock); +#ifdef CONFIG_RCU_LAZY + del_timer(&rdp->lazy_timer); +#endif rdp->cpu_started = false; } @@ -5098,6 +5233,12 @@ void __init rcu_init(void) (void)start_poll_synchronize_rcu_expedited(); rcu_test_sync_prims(); + +#ifdef CONFIG_RCU_LAZY + if (register_shrinker(&lazy_rcu_shrinker, "rcu-lazy")) + pr_err("Failed to register lazy_rcu shrinker!\n"); +#endif // #ifdef CONFIG_RCU_LAZY + } #include "tree_stall.h" diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 90b39ff8ad70..e21698260fac 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -282,6 +282,7 @@ struct rcu_data { struct rcu_snap_record snap_record; /* Snapshot of core stats at half of */ /* the first RCU stall timeout */ unsigned long lazy_firstq; + struct timer_list lazy_timer; int cpu; }; diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index fbd54a2e1f17..327a480d606c 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1055,6 +1055,9 @@ static int rdp_offload_toggle(struct rcu_data *rdp, struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; bool wake_gp = false; + /* Unlazy pending callbacks (don't bother arming the right lazy timer) */ + rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_NEXT_TAIL_LAZY); + rcu_segcblist_offload(cblist, offload); if (rdp->nocb_cb_sleep) @@ -1116,9 +1119,6 @@ static long rcu_nocb_rdp_deoffload(void *arg) */ rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE); invoke_rcu_core(); - /* Deoffloaded doesn't support lazyness yet */ - rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_NEXT_TAIL_LAZY); - wake_gp = rdp_offload_toggle(rdp, false, flags); mutex_lock(&rdp_gp->nocb_gp_kthread_mutex); @@ -1259,6 +1259,12 @@ static long rcu_nocb_rdp_offload(void *arg) rcu_segcblist_clear_flags(cblist, SEGCBLIST_RCU_CORE); rcu_nocb_unlock_irqrestore(rdp, flags); + /* + * The lazy timer is protected against concurrent (de-)offloading. + * Still, no need to keep it around. + */ + del_timer(&rdp->lazy_timer); + return 0; } @@ -1286,99 +1292,6 @@ int rcu_nocb_cpu_offload(int cpu) } EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload); -#ifdef CONFIG_RCU_LAZY -static unsigned long -lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) -{ - int cpu; - unsigned long count = 0; - - if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask))) - return 0; - - /* Protect rcu_nocb_mask against concurrent (de-)offloading. */ - if (!mutex_trylock(&rcu_state.barrier_mutex)) - return 0; - - /* Snapshot count of all CPUs */ - for_each_cpu(cpu, rcu_nocb_mask) { - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - - count += rcu_segcblist_n_cbs_lazy(&rdp->cblist); - } - - mutex_unlock(&rcu_state.barrier_mutex); - - return count ? count : SHRINK_EMPTY; -} - -static unsigned long -lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) -{ - int cpu; - unsigned long flags; - unsigned long count = 0; - - if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask))) - return 0; - /* - * Protect against concurrent (de-)offloading. Otherwise nocb locking - * may be ignored or imbalanced. - */ - if (!mutex_trylock(&rcu_state.barrier_mutex)) { - /* - * But really don't insist if barrier_mutex is contended since we - * can't guarantee that it will never engage in a dependency - * chain involving memory allocation. The lock is seldom contended - * anyway. - */ - return 0; - } - - /* Snapshot count of all CPUs */ - for_each_cpu(cpu, rcu_nocb_mask) { - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - int _count; - - if (WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp))) - continue; - - if (!rcu_segcblist_n_cbs_lazy(&rdp->cblist)) - continue; - - rcu_nocb_lock_irqsave(rdp, flags); - /* - * Recheck under the nocb lock. Since we are not holding the bypass - * lock we may still race with increments from the enqueuer but still - * we know for sure if there is at least one lazy callback. - */ - _count = rcu_segcblist_n_cbs_lazy(&rdp->cblist); - if (!_count) { - rcu_nocb_unlock_irqrestore(rdp, flags); - continue; - } - rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_NEXT_TAIL_LAZY); - rcu_nocb_unlock_irqrestore(rdp, flags); - wake_nocb_gp(rdp, false); - sc->nr_to_scan -= _count; - count += _count; - if (sc->nr_to_scan <= 0) - break; - } - - mutex_unlock(&rcu_state.barrier_mutex); - - return count ? count : SHRINK_STOP; -} - -static struct shrinker lazy_rcu_shrinker = { - .count_objects = lazy_rcu_shrink_count, - .scan_objects = lazy_rcu_shrink_scan, - .batch = 0, - .seeks = DEFAULT_SEEKS, -}; -#endif // #ifdef CONFIG_RCU_LAZY - void __init rcu_init_nohz(void) { int cpu; @@ -1409,11 +1322,6 @@ void __init rcu_init_nohz(void) if (!rcu_state.nocb_is_setup) return; -#ifdef CONFIG_RCU_LAZY - if (register_shrinker(&lazy_rcu_shrinker, "rcu-lazy")) - pr_err("Failed to register lazy_rcu shrinker!\n"); -#endif // #ifdef CONFIG_RCU_LAZY - if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n"); cpumask_and(rcu_nocb_mask, cpu_possible_mask,

[9/9] rcu: Support lazy callbacks with CONFIG_RCU_NOCB_CPU=n

Commit Message

Patch