[2/6] mm/swap: Introduce alternative per-cpu LRU cache locking

Message ID	20210921161323.607817-3-nsaenzju@redhat.com (mailing list archive)
State	New
Headers	show Return-Path: <SRS0=tPO9=OL=kvack.org=owner-linux-mm@kernel.org> DMARC-Filter: OpenDMARC Filter v1.4.1 mail.kernel.org 3C1426126A From: Nicolas Saenz Julienne <nsaenzju@redhat.com> To: akpm@linux-foundation.org, frederic@kernel.org Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org, tglx@linutronix.de, cl@linux.com, peterz@infradead.org, juri.lelli@redhat.com, mingo@redhat.com, mtosatti@redhat.com, nilal@redhat.com, mgorman@suse.de, ppandit@redhat.com, williams@redhat.com, bigeasy@linutronix.de, anna-maria@linutronix.de, linux-rt-users@vger.kernel.org, Nicolas Saenz Julienne <nsaenzju@redhat.com> Subject: [PATCH 2/6] mm/swap: Introduce alternative per-cpu LRU cache locking Date: Tue, 21 Sep 2021 18:13:20 +0200 Message-Id: <20210921161323.607817-3-nsaenzju@redhat.com> In-Reply-To: <20210921161323.607817-1-nsaenzju@redhat.com> References: <20210921161323.607817-1-nsaenzju@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: quoted-printable Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	mm: Remote LRU per-cpu pagevec cache/per-cpu page list drain support \| expand [0/6] mm: Remote LRU per-cpu pagevec cache/per-cpu page list drain support [1/6] mm/swap: Introduce lru_cpu_needs_drain() [2/6] mm/swap: Introduce alternative per-cpu LRU cache locking [3/6] mm/swap: Allow remote LRU cache draining [4/6] mm/page_alloc: Introduce alternative per-cpu list locking [5/6] mm/page_alloc: Allow remote per-cpu page list draining [6/6] sched/isolation: Enable 'remote_pcpu_cache_access' on NOHZ_FULL systems

diff --git a/mm/internal.h b/mm/internal.h index 18256e32a14c..5a2cef7cd394 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -32,6 +32,8 @@ /* Do not use these with a slab allocator */ #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) +extern struct static_key_false remote_pcpu_cache_access; + void page_writeback_init(void); static inline void *folio_raw_mapping(struct folio *folio) diff --git a/mm/swap.c b/mm/swap.c index e7f9e4018ccf..bcf73bd563a6 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -46,13 +46,27 @@ /* How many pages do we try to swap or page in/out together? */ int page_cluster; +/* + * On some setups, like with nohz_full, CPUs might be too busy to handle + * per-cpu drain work, leading to unwarranted interruptions and hangs. This + * key, when enabled, allows for remote draining of these per-cpu caches/page + * lists at the cost of more constraining locking. + */ +__ro_after_init DEFINE_STATIC_KEY_FALSE(remote_pcpu_cache_access); + +struct lru_cache_locks { + local_lock_t local; + spinlock_t spin; +}; + /* Protecting only lru_rotate.pvec which requires disabling interrupts */ struct lru_rotate { - local_lock_t lock; + struct lru_cache_locks locks; struct pagevec pvec; }; static DEFINE_PER_CPU(struct lru_rotate, lru_rotate) = { - .lock = INIT_LOCAL_LOCK(lock), + .locks.local = INIT_LOCAL_LOCK(lru_rotate.locks.local), + .locks.spin = __SPIN_LOCK_UNLOCKED(lru_rotate.locks.spin), }; /* @@ -60,7 +74,7 @@ static DEFINE_PER_CPU(struct lru_rotate, lru_rotate) = { * by disabling preemption (and interrupts remain enabled). */ struct lru_pvecs { - local_lock_t lock; + struct lru_cache_locks locks; struct pagevec lru_add; struct pagevec lru_deactivate_file; struct pagevec lru_deactivate; @@ -70,9 +84,94 @@ struct lru_pvecs { #endif }; static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = { - .lock = INIT_LOCAL_LOCK(lock), + .locks.local = INIT_LOCAL_LOCK(lru_pvecs.locks.local), + .locks.spin = __SPIN_LOCK_UNLOCKED(lru_pvecs.locks.spin), }; +static inline void lru_cache_lock(struct lru_cache_locks *locks) +{ + if (static_branch_unlikely(&remote_pcpu_cache_access)) { + /* Avoid migration between this_cpu_ptr() and spin_lock() */ + migrate_disable(); + spin_lock(this_cpu_ptr(&locks->spin)); + } else { + local_lock(&locks->local); + } +} + +static inline void lru_cache_lock_irqsave(struct lru_cache_locks *locks, + unsigned long *flagsp) +{ + if (static_branch_unlikely(&remote_pcpu_cache_access)) { + /* Avoid migration between this_cpu_ptr() and spin_lock_irqsave() */ + migrate_disable(); + spin_lock_irqsave(this_cpu_ptr(&locks->spin), *flagsp); + } else { + local_lock_irqsave(&locks->local, *flagsp); + } +} + +/* + * The lru_cache_lock_cpu()/lru_cache_lock_irqsave_cpu() flavor of functions + * should only be used from remote CPUs when 'remote_pcpu_cache_access' is + * enabled or the target CPU is dead. Otherwise, it can still be called on the + * local CPU with migration disabled. + */ +static inline void lru_cache_lock_cpu(struct lru_cache_locks *locks, int cpu) +{ + if (static_branch_unlikely(&remote_pcpu_cache_access)) + spin_lock(per_cpu_ptr(&locks->spin, cpu)); + else + local_lock(&locks->local); +} + +static inline void lru_cache_lock_irqsave_cpu(struct lru_cache_locks *locks, + unsigned long *flagsp, int cpu) +{ + if (static_branch_unlikely(&remote_pcpu_cache_access)) + spin_lock_irqsave(per_cpu_ptr(&locks->spin, cpu), *flagsp); + else + local_lock_irqsave(&locks->local, *flagsp); +} + +static inline void lru_cache_unlock(struct lru_cache_locks *locks) +{ + if (static_branch_unlikely(&remote_pcpu_cache_access)) { + spin_unlock(this_cpu_ptr(&locks->spin)); + migrate_enable(); + } else { + local_unlock(&locks->local); + } +} + +static inline void lru_cache_unlock_irqrestore(struct lru_cache_locks *locks, + unsigned long flags) +{ + if (static_branch_unlikely(&remote_pcpu_cache_access)) { + spin_unlock_irqrestore(this_cpu_ptr(&locks->spin), flags); + migrate_enable(); + } else { + local_unlock_irqrestore(&locks->local, flags); + } +} + +static inline void lru_cache_unlock_cpu(struct lru_cache_locks *locks, int cpu) +{ + if (static_branch_unlikely(&remote_pcpu_cache_access)) + spin_unlock(per_cpu_ptr(&locks->spin, cpu)); + else + local_unlock(&locks->local); +} + +static inline void lru_cache_unlock_irqrestore_cpu(struct lru_cache_locks *locks, + unsigned long flags, int cpu) +{ + if (static_branch_unlikely(&remote_pcpu_cache_access)) + spin_unlock_irqrestore(per_cpu_ptr(&locks->spin, cpu), flags); + else + local_unlock_irqrestore(&locks->local, flags); +} + /* * This path almost never happens for VM activity - pages are normally * freed via pagevecs. But it gets used by networking. @@ -245,11 +344,11 @@ void folio_rotate_reclaimable(struct folio *folio) unsigned long flags; folio_get(folio); - local_lock_irqsave(&lru_rotate.lock, flags); + lru_cache_lock_irqsave(&lru_rotate.locks, &flags); pvec = this_cpu_ptr(&lru_rotate.pvec); if (pagevec_add_and_need_flush(pvec, &folio->page)) pagevec_lru_move_fn(pvec, pagevec_move_tail_fn); - local_unlock_irqrestore(&lru_rotate.lock, flags); + lru_cache_unlock_irqrestore(&lru_rotate.locks, flags); } } @@ -341,11 +440,11 @@ static void folio_activate(struct folio *folio) struct pagevec *pvec; folio_get(folio); - local_lock(&lru_pvecs.lock); + lru_cache_lock(&lru_pvecs.locks); pvec = this_cpu_ptr(&lru_pvecs.activate_page); if (pagevec_add_and_need_flush(pvec, &folio->page)) pagevec_lru_move_fn(pvec, __activate_page); - local_unlock(&lru_pvecs.lock); + lru_cache_unlock(&lru_pvecs.locks); } } @@ -372,7 +471,7 @@ static void __lru_cache_activate_folio(struct folio *folio) struct pagevec *pvec; int i; - local_lock(&lru_pvecs.lock); + lru_cache_lock(&lru_pvecs.locks); pvec = this_cpu_ptr(&lru_pvecs.lru_add); /* @@ -394,7 +493,7 @@ static void __lru_cache_activate_folio(struct folio *folio) } } - local_unlock(&lru_pvecs.lock); + lru_cache_unlock(&lru_pvecs.locks); } /* @@ -453,11 +552,11 @@ void folio_add_lru(struct folio *folio) VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); folio_get(folio); - local_lock(&lru_pvecs.lock); + lru_cache_lock(&lru_pvecs.locks); pvec = this_cpu_ptr(&lru_pvecs.lru_add); if (pagevec_add_and_need_flush(pvec, &folio->page)) __pagevec_lru_add(pvec); - local_unlock(&lru_pvecs.lock); + lru_cache_unlock(&lru_pvecs.locks); } EXPORT_SYMBOL(folio_add_lru); @@ -592,8 +691,9 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec) /* * Drain pages out of the cpu's pagevecs. - * Either "cpu" is the current CPU, and preemption has already been - * disabled; or "cpu" is being hot-unplugged, and is already dead. + * Either "cpu" is the current CPU, and preemption has already been disabled, + * or we're remotely flushing pvecs with the 'remote_pcpu_cache_access' key + * enabled, or "cpu" is being hot-unplugged and is already dead. */ void lru_add_drain_cpu(int cpu) { @@ -608,9 +708,9 @@ void lru_add_drain_cpu(int cpu) unsigned long flags; /* No harm done if a racing interrupt already did this */ - local_lock_irqsave(&lru_rotate.lock, flags); + lru_cache_lock_irqsave_cpu(&lru_rotate.locks, &flags, cpu); pagevec_lru_move_fn(pvec, pagevec_move_tail_fn); - local_unlock_irqrestore(&lru_rotate.lock, flags); + lru_cache_unlock_irqrestore_cpu(&lru_rotate.locks, flags, cpu); } pvec = &per_cpu(lru_pvecs.lru_deactivate_file, cpu); @@ -649,12 +749,12 @@ void deactivate_file_page(struct page *page) if (likely(get_page_unless_zero(page))) { struct pagevec *pvec; - local_lock(&lru_pvecs.lock); + lru_cache_lock(&lru_pvecs.locks); pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file); if (pagevec_add_and_need_flush(pvec, page)) pagevec_lru_move_fn(pvec, lru_deactivate_file_fn); - local_unlock(&lru_pvecs.lock); + lru_cache_unlock(&lru_pvecs.locks); } } @@ -671,12 +771,12 @@ void deactivate_page(struct page *page) if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { struct pagevec *pvec; - local_lock(&lru_pvecs.lock); + lru_cache_lock(&lru_pvecs.locks); pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate); get_page(page); if (pagevec_add_and_need_flush(pvec, page)) pagevec_lru_move_fn(pvec, lru_deactivate_fn); - local_unlock(&lru_pvecs.lock); + lru_cache_unlock(&lru_pvecs.locks); } } @@ -693,28 +793,28 @@ void mark_page_lazyfree(struct page *page) !PageSwapCache(page) && !PageUnevictable(page)) { struct pagevec *pvec; - local_lock(&lru_pvecs.lock); + lru_cache_lock(&lru_pvecs.locks); pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree); get_page(page); if (pagevec_add_and_need_flush(pvec, page)) pagevec_lru_move_fn(pvec, lru_lazyfree_fn); - local_unlock(&lru_pvecs.lock); + lru_cache_unlock(&lru_pvecs.locks); } } void lru_add_drain(void) { - local_lock(&lru_pvecs.lock); + lru_cache_lock(&lru_pvecs.locks); lru_add_drain_cpu(smp_processor_id()); - local_unlock(&lru_pvecs.lock); + lru_cache_unlock(&lru_pvecs.locks); } void lru_add_drain_cpu_zone(struct zone *zone) { - local_lock(&lru_pvecs.lock); + lru_cache_lock(&lru_pvecs.locks); lru_add_drain_cpu(smp_processor_id()); drain_local_pages(zone); - local_unlock(&lru_pvecs.lock); + lru_cache_unlock(&lru_pvecs.locks); } #ifdef CONFIG_SMP

[2/6] mm/swap: Introduce alternative per-cpu LRU cache locking

Commit Message

Comments

Patch