diff mbox series

[1/2] mm: protect local lock sections with rcu_read_lock (on RT)

Message ID 20220222144907.023121407@redhat.com (mailing list archive)
State New
Headers show
Series replace work queue synchronization with synchronize_rcu | expand

Commit Message

Marcelo Tosatti Feb. 22, 2022, 2:47 p.m. UTC
For the per-CPU LRU page vectors, augment the local lock protected
code sections with rcu_read_lock.

This makes it possible to replace the queueing of work items on all 
CPUs by synchronize_rcu (which is necessary to run FIFO:1 applications
uninterrupted on isolated CPUs).

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Comments

Nicolas Saenz Julienne Feb. 22, 2022, 3:21 p.m. UTC | #1
On Tue, 2022-02-22 at 11:47 -0300, Marcelo Tosatti wrote:
> For the per-CPU LRU page vectors, augment the local lock protected
> code sections with rcu_read_lock.
> 
> This makes it possible to replace the queueing of work items on all 
> CPUs by synchronize_rcu (which is necessary to run FIFO:1 applications
> uninterrupted on isolated CPUs).

I don't think this is needed. In RT local_locks use a spinlock. See
kernel/locking/spinlock_rt.c:

"The RT [spinlock] substitutions explicitly disable migration and take
rcu_read_lock() across the lock held section."

Regards,
Marcelo Tosatti Feb. 22, 2022, 3:51 p.m. UTC | #2
On Tue, Feb 22, 2022 at 04:21:26PM +0100, Nicolas Saenz Julienne wrote:
> On Tue, 2022-02-22 at 11:47 -0300, Marcelo Tosatti wrote:
> > For the per-CPU LRU page vectors, augment the local lock protected
> > code sections with rcu_read_lock.
> > 
> > This makes it possible to replace the queueing of work items on all 
> > CPUs by synchronize_rcu (which is necessary to run FIFO:1 applications
> > uninterrupted on isolated CPUs).
> 
> I don't think this is needed. In RT local_locks use a spinlock. See
> kernel/locking/spinlock_rt.c:
> 
> "The RT [spinlock] substitutions explicitly disable migration and take
> rcu_read_lock() across the lock held section."

Nice! Then the migrate_disable from __local_lock and friends seems unnecessary as
well

#define __local_lock(__lock)                                    \
        do {                                                    \
                migrate_disable();                              \
                spin_lock(this_cpu_ptr((__lock)));              \
        } while (0)

Since:

static __always_inline void __rt_spin_lock(spinlock_t *lock)
{
        rtlock_might_resched();
        rtlock_lock(&lock->lock); 
        rcu_read_lock();
        migrate_disable();
}

Will resend -v2.
Nicolas Saenz Julienne Feb. 22, 2022, 4:16 p.m. UTC | #3
On Tue, 2022-02-22 at 12:51 -0300, Marcelo Tosatti wrote:
> On Tue, Feb 22, 2022 at 04:21:26PM +0100, Nicolas Saenz Julienne wrote:
> > On Tue, 2022-02-22 at 11:47 -0300, Marcelo Tosatti wrote:
> > > For the per-CPU LRU page vectors, augment the local lock protected
> > > code sections with rcu_read_lock.
> > > 
> > > This makes it possible to replace the queueing of work items on all 
> > > CPUs by synchronize_rcu (which is necessary to run FIFO:1 applications
> > > uninterrupted on isolated CPUs).
> > 
> > I don't think this is needed. In RT local_locks use a spinlock. See
> > kernel/locking/spinlock_rt.c:
> > 
> > "The RT [spinlock] substitutions explicitly disable migration and take
> > rcu_read_lock() across the lock held section."
> 
> Nice! Then the migrate_disable from __local_lock and friends seems unnecessary as
> well
>
> #define __local_lock(__lock)                                    \
>         do {                                                    \
>                 migrate_disable();                              \
>                 spin_lock(this_cpu_ptr((__lock)));              \
>         } while (0)
> 

It's needed as you might migrate between:

	cpu1_lock = this_cpu_ptr(__lock);
	// migrate here to cpu2
	spin_lock(cpu1_lock);
	// unprotected write into cpu2 lists

Regards,
diff mbox series

Patch

Index: linux-rt-devel/mm/swap.c
===================================================================
--- linux-rt-devel.orig/mm/swap.c
+++ linux-rt-devel/mm/swap.c
@@ -73,6 +73,48 @@  static DEFINE_PER_CPU(struct lru_pvecs,
 	.lock = INIT_LOCAL_LOCK(lock),
 };
 
+#ifdef CONFIG_PREEMPT_RT
+
+#define lru_local_lock(lock)		\
+	do {				\
+		rcu_read_lock();	\
+		local_lock(lock);	\
+	} while (0)
+
+#define lru_local_unlock(lock)		\
+	do {				\
+		local_unlock(lock);	\
+		rcu_read_unlock();	\
+	} while (0)
+
+#define lru_local_lock_irqsave(lock, flags)		\
+	do {						\
+		rcu_read_lock();			\
+		local_lock_irqsave(lock, flags);	\
+	} while (0)
+
+#define lru_local_unlock_irqrestore(lock, flags)		\
+	do {							\
+		local_unlock_irqrestore(lock, flags);		\
+		rcu_read_unlock();				\
+	} while (0)
+
+#else
+
+#define lru_local_lock(lock)		\
+	local_lock(lock)
+
+#define lru_local_unlock(lock)		\
+	local_unlock(lock)
+
+#define lru_local_lock_irqsave(lock, flag)		\
+	local_lock_irqsave(lock, flags)
+
+#define lru_local_unlock_irqrestore(lock, flags)	\
+	local_unlock_irqrestore(lock, flags)
+
+#endif
+
 /*
  * This path almost never happens for VM activity - pages are normally
  * freed via pagevecs.  But it gets used by networking.
@@ -255,11 +297,11 @@  void folio_rotate_reclaimable(struct fol
 		unsigned long flags;
 
 		folio_get(folio);
-		local_lock_irqsave(&lru_rotate.lock, flags);
+		lru_local_lock_irqsave(&lru_rotate.lock, flags);
 		pvec = this_cpu_ptr(&lru_rotate.pvec);
 		if (pagevec_add_and_need_flush(pvec, &folio->page))
 			pagevec_lru_move_fn(pvec, pagevec_move_tail_fn);
-		local_unlock_irqrestore(&lru_rotate.lock, flags);
+		lru_local_unlock_irqrestore(&lru_rotate.lock, flags);
 	}
 }
 
@@ -351,11 +393,11 @@  static void folio_activate(struct folio
 		struct pagevec *pvec;
 
 		folio_get(folio);
-		local_lock(&lru_pvecs.lock);
+		lru_local_lock(&lru_pvecs.lock);
 		pvec = this_cpu_ptr(&lru_pvecs.activate_page);
 		if (pagevec_add_and_need_flush(pvec, &folio->page))
 			pagevec_lru_move_fn(pvec, __activate_page);
-		local_unlock(&lru_pvecs.lock);
+		lru_local_unlock(&lru_pvecs.lock);
 	}
 }
 
@@ -382,7 +424,7 @@  static void __lru_cache_activate_folio(s
 	struct pagevec *pvec;
 	int i;
 
-	local_lock(&lru_pvecs.lock);
+	lru_local_lock(&lru_pvecs.lock);
 	pvec = this_cpu_ptr(&lru_pvecs.lru_add);
 
 	/*
@@ -404,7 +446,7 @@  static void __lru_cache_activate_folio(s
 		}
 	}
 
-	local_unlock(&lru_pvecs.lock);
+	lru_local_unlock(&lru_pvecs.lock);
 }
 
 /*
@@ -463,11 +505,11 @@  void folio_add_lru(struct folio *folio)
 	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
 
 	folio_get(folio);
-	local_lock(&lru_pvecs.lock);
+	lru_local_lock(&lru_pvecs.lock);
 	pvec = this_cpu_ptr(&lru_pvecs.lru_add);
 	if (pagevec_add_and_need_flush(pvec, &folio->page))
 		__pagevec_lru_add(pvec);
-	local_unlock(&lru_pvecs.lock);
+	lru_local_unlock(&lru_pvecs.lock);
 }
 EXPORT_SYMBOL(folio_add_lru);
 
@@ -618,9 +660,9 @@  void lru_add_drain_cpu(int cpu)
 		unsigned long flags;
 
 		/* No harm done if a racing interrupt already did this */
-		local_lock_irqsave(&lru_rotate.lock, flags);
+		lru_local_lock_irqsave(&lru_rotate.lock, flags);
 		pagevec_lru_move_fn(pvec, pagevec_move_tail_fn);
-		local_unlock_irqrestore(&lru_rotate.lock, flags);
+		lru_local_unlock_irqrestore(&lru_rotate.lock, flags);
 	}
 
 	pvec = &per_cpu(lru_pvecs.lru_deactivate_file, cpu);
@@ -658,12 +700,12 @@  void deactivate_file_page(struct page *p
 	if (likely(get_page_unless_zero(page))) {
 		struct pagevec *pvec;
 
-		local_lock(&lru_pvecs.lock);
+		lru_local_lock(&lru_pvecs.lock);
 		pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
 
 		if (pagevec_add_and_need_flush(pvec, page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_file_fn);
-		local_unlock(&lru_pvecs.lock);
+		lru_local_unlock(&lru_pvecs.lock);
 	}
 }
 
@@ -680,12 +722,12 @@  void deactivate_page(struct page *page)
 	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
 		struct pagevec *pvec;
 
-		local_lock(&lru_pvecs.lock);
+		lru_local_lock(&lru_pvecs.lock);
 		pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate);
 		get_page(page);
 		if (pagevec_add_and_need_flush(pvec, page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_fn);
-		local_unlock(&lru_pvecs.lock);
+		lru_local_unlock(&lru_pvecs.lock);
 	}
 }
 
@@ -702,20 +744,20 @@  void mark_page_lazyfree(struct page *pag
 	    !PageSwapCache(page) && !PageUnevictable(page)) {
 		struct pagevec *pvec;
 
-		local_lock(&lru_pvecs.lock);
+		lru_local_lock(&lru_pvecs.lock);
 		pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree);
 		get_page(page);
 		if (pagevec_add_and_need_flush(pvec, page))
 			pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
-		local_unlock(&lru_pvecs.lock);
+		lru_local_unlock(&lru_pvecs.lock);
 	}
 }
 
 void lru_add_drain(void)
 {
-	local_lock(&lru_pvecs.lock);
+	lru_local_lock(&lru_pvecs.lock);
 	lru_add_drain_cpu(smp_processor_id());
-	local_unlock(&lru_pvecs.lock);
+	lru_local_unlock(&lru_pvecs.lock);
 }
 
 /*
@@ -726,18 +768,18 @@  void lru_add_drain(void)
  */
 static void lru_add_and_bh_lrus_drain(void)
 {
-	local_lock(&lru_pvecs.lock);
+	lru_local_lock(&lru_pvecs.lock);
 	lru_add_drain_cpu(smp_processor_id());
-	local_unlock(&lru_pvecs.lock);
+	lru_local_unlock(&lru_pvecs.lock);
 	invalidate_bh_lrus_cpu();
 }
 
 void lru_add_drain_cpu_zone(struct zone *zone)
 {
-	local_lock(&lru_pvecs.lock);
+	lru_local_lock(&lru_pvecs.lock);
 	lru_add_drain_cpu(smp_processor_id());
 	drain_local_pages(zone);
-	local_unlock(&lru_pvecs.lock);
+	lru_local_unlock(&lru_pvecs.lock);
 }
 
 #ifdef CONFIG_SMP