diff mbox series

[4/4] rcu/tree: Use schedule_delayed_work() instead of WQ_HIGHPRI queue

Message ID 20200918194817.48921-5-urezki@gmail.com (mailing list archive)
State New, archived
Headers show
Series kvfree_rcu() and _LOCK_NESTING/_PREEMPT_RT | expand

Commit Message

Uladzislau Rezki Sept. 18, 2020, 7:48 p.m. UTC
Recently the separate worker thread has been introduced to
maintain the local page cache from the regular kernel context,
instead of kvfree_rcu() contexts. That was done because a caller
of the k[v]free_rcu() can be any context type what is a problem
from the allocation point of view.

From the other hand, the lock-less way of obtaining a page has
been introduced and directly injected to the k[v]free_rcu() path.

Therefore it is not important anymore to use a high priority "wq"
for the external job that used to fill a page cache ASAP when it
was empty.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 kernel/rcu/tree.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

Comments

Paul E. McKenney Sept. 20, 2020, 3:06 p.m. UTC | #1
On Fri, Sep 18, 2020 at 09:48:17PM +0200, Uladzislau Rezki (Sony) wrote:
> Recently the separate worker thread has been introduced to
> maintain the local page cache from the regular kernel context,
> instead of kvfree_rcu() contexts. That was done because a caller
> of the k[v]free_rcu() can be any context type what is a problem
> from the allocation point of view.
> 
> >From the other hand, the lock-less way of obtaining a page has
> been introduced and directly injected to the k[v]free_rcu() path.
> 
> Therefore it is not important anymore to use a high priority "wq"
> for the external job that used to fill a page cache ASAP when it
> was empty.
> 
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>

And I needed to apply the patch below to make this one pass rcutorture
scenarios SRCU-P and TREE05.  Repeat by:

tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 3 --configs "SRCU-P TREE05" --trust-make

Without the patch below, the system hangs very early in boot.

Please let me know if some other fix would be better.

						Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8ce1ea4..2424e2a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3481,7 +3481,8 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 	success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
 	if (!success) {
 		// Use delayed work, so we do not deadlock with rq->lock.
-		if (!atomic_xchg(&krcp->work_in_progress, 1))
+		if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
+		    !atomic_xchg(&krcp->work_in_progress, 1))
 			schedule_delayed_work(&krcp->page_cache_work, 1);
 
 		if (head == NULL)
Uladzislau Rezki Sept. 21, 2020, 1:27 p.m. UTC | #2
On Sun, Sep 20, 2020 at 08:06:38AM -0700, Paul E. McKenney wrote:
> On Fri, Sep 18, 2020 at 09:48:17PM +0200, Uladzislau Rezki (Sony) wrote:
> > Recently the separate worker thread has been introduced to
> > maintain the local page cache from the regular kernel context,
> > instead of kvfree_rcu() contexts. That was done because a caller
> > of the k[v]free_rcu() can be any context type what is a problem
> > from the allocation point of view.
> > 
> > >From the other hand, the lock-less way of obtaining a page has
> > been introduced and directly injected to the k[v]free_rcu() path.
> > 
> > Therefore it is not important anymore to use a high priority "wq"
> > for the external job that used to fill a page cache ASAP when it
> > was empty.
> > 
> > Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> 
> And I needed to apply the patch below to make this one pass rcutorture
> scenarios SRCU-P and TREE05.  Repeat by:
> 
> tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 3 --configs "SRCU-P TREE05" --trust-make
> 
> Without the patch below, the system hangs very early in boot.
> 
> Please let me know if some other fix would be better.
> 
> 						Thanx, Paul
> 
> ------------------------------------------------------------------------
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 8ce1ea4..2424e2a 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -3481,7 +3481,8 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
>  	success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
>  	if (!success) {
>  		// Use delayed work, so we do not deadlock with rq->lock.
> -		if (!atomic_xchg(&krcp->work_in_progress, 1))
> +		if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
> +		    !atomic_xchg(&krcp->work_in_progress, 1))
>  			schedule_delayed_work(&krcp->page_cache_work, 1);
>  
>  		if (head == NULL)
I will double check!

--
Vlad Rezki
diff mbox series

Patch

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d51209343029..f2b4215631f7 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3100,7 +3100,7 @@  struct kfree_rcu_cpu {
 	 * lockless an access has to be protected by the
 	 * per-cpu lock.
 	 */
-	struct work_struct page_cache_work;
+	struct delayed_work page_cache_work;
 	atomic_t work_in_progress;
 	struct llist_head bkvcache;
 	int nr_bkv_objs;
@@ -3354,7 +3354,7 @@  static void fill_page_cache_func(struct work_struct *work)
 	struct kvfree_rcu_bulk_data *bnode;
 	struct kfree_rcu_cpu *krcp =
 		container_of(work, struct kfree_rcu_cpu,
-			page_cache_work);
+			page_cache_work.work);
 	unsigned long flags;
 	bool pushed;
 	int i;
@@ -3440,7 +3440,6 @@  void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
 	unsigned long flags;
 	struct kfree_rcu_cpu *krcp;
-	bool irq_disabled = irqs_disabled();
 	bool success;
 	void *ptr;
 
@@ -3473,9 +3472,9 @@  void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 
 	success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
 	if (!success) {
-		// TODO: schedule the work from the hrtimer.
-		if (!irq_disabled && !atomic_xchg(&krcp->work_in_progress, 1))
-			queue_work(system_highpri_wq, &krcp->page_cache_work);
+		// Use delayed work, so we do not deadlock with rq->lock.
+		if (!atomic_xchg(&krcp->work_in_progress, 1))
+			schedule_delayed_work(&krcp->page_cache_work, 1);
 
 		if (head == NULL)
 			// Inline if kvfree_rcu(one_arg) call.
@@ -4475,7 +4474,7 @@  static void __init kfree_rcu_batch_init(void)
 		}
 
 		INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
-		INIT_WORK(&krcp->page_cache_work, fill_page_cache_func);
+		INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func);
 		krcp->initialized = true;
 	}
 	if (register_shrinker(&kfree_rcu_shrinker))