diff mbox series

[v2,2/2] mm: fs: Invalidate BH LRU during page migration

Message ID 20210309051628.3105973-2-minchan@kernel.org (mailing list archive)
State New, archived
Headers show
Series [v2,1/2] mm: disable LRU pagevec during the migration temporarily | expand

Commit Message

Minchan Kim March 9, 2021, 5:16 a.m. UTC
Pages containing buffer_heads that are in one of the per-CPU
buffer_head LRU caches will be pinned and thus cannot be migrated.
This can prevent CMA allocations from succeeding, which are often used
on platforms with co-processors (such as a DSP) that can only use
physically contiguous memory. It can also prevent memory
hot-unplugging from succeeding, which involves migrating at least
MIN_MEMORY_BLOCK_SIZE bytes of memory, which ranges from 8 MiB to 1
GiB based on the architecture in use.

Correspondingly, invalidate the BH LRU caches before a migration
starts and stop any buffer_head from being cached in the LRU caches,
until migration has finished.

Signed-off-by: Chris Goldsworthy <cgoldswo@codeaurora.org>
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 fs/buffer.c                 | 12 ++++++++++--
 include/linux/buffer_head.h |  3 +++
 include/linux/swap.h        |  1 +
 mm/swap.c                   |  5 ++++-
 4 files changed, 18 insertions(+), 3 deletions(-)

Comments

kernel test robot March 9, 2021, 11:11 a.m. UTC | #1
Hi Minchan,

I love your patch! Yet something to improve:

[auto build test ERROR on linux/master]
[also build test ERROR on linus/master v5.12-rc2 next-20210309]
[cannot apply to hnaz-linux-mm/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-disable-LRU-pagevec-during-the-migration-temporarily/20210309-131826
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 144c79ef33536b4ecb4951e07dbc1f2b7fa99d32
config: openrisc-randconfig-r026-20210308 (attached as .config)
compiler: or1k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/dfca8699b8fb8cf3bed2297e261fca53c0fc523c
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Minchan-Kim/mm-disable-LRU-pagevec-during-the-migration-temporarily/20210309-131826
        git checkout dfca8699b8fb8cf3bed2297e261fca53c0fc523c
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=openrisc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/swap.c:745:6: warning: no previous prototype for '__lru_add_drain_all' [-Wmissing-prototypes]
     745 | void __lru_add_drain_all(bool force_all_cpus)
         |      ^~~~~~~~~~~~~~~~~~~
   mm/swap.c: In function '__lru_add_drain_all':
>> mm/swap.c:827:7: error: implicit declaration of function 'has_bh_in_lru' [-Werror=implicit-function-declaration]
     827 |       has_bh_in_lru(cpu, NULL)) {
         |       ^~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +/has_bh_in_lru +827 mm/swap.c

   744	
   745	void __lru_add_drain_all(bool force_all_cpus)
   746	{
   747		/*
   748		 * lru_drain_gen - Global pages generation number
   749		 *
   750		 * (A) Definition: global lru_drain_gen = x implies that all generations
   751		 *     0 < n <= x are already *scheduled* for draining.
   752		 *
   753		 * This is an optimization for the highly-contended use case where a
   754		 * user space workload keeps constantly generating a flow of pages for
   755		 * each CPU.
   756		 */
   757		static unsigned int lru_drain_gen;
   758		static struct cpumask has_work;
   759		static DEFINE_MUTEX(lock);
   760		unsigned cpu, this_gen;
   761	
   762		/*
   763		 * Make sure nobody triggers this path before mm_percpu_wq is fully
   764		 * initialized.
   765		 */
   766		if (WARN_ON(!mm_percpu_wq))
   767			return;
   768	
   769		/*
   770		 * Guarantee pagevec counter stores visible by this CPU are visible to
   771		 * other CPUs before loading the current drain generation.
   772		 */
   773		smp_mb();
   774	
   775		/*
   776		 * (B) Locally cache global LRU draining generation number
   777		 *
   778		 * The read barrier ensures that the counter is loaded before the mutex
   779		 * is taken. It pairs with smp_mb() inside the mutex critical section
   780		 * at (D).
   781		 */
   782		this_gen = smp_load_acquire(&lru_drain_gen);
   783	
   784		mutex_lock(&lock);
   785	
   786		/*
   787		 * (C) Exit the draining operation if a newer generation, from another
   788		 * lru_add_drain_all(), was already scheduled for draining. Check (A).
   789		 */
   790		if (unlikely(this_gen != lru_drain_gen && !force_all_cpus))
   791			goto done;
   792	
   793		/*
   794		 * (D) Increment global generation number
   795		 *
   796		 * Pairs with smp_load_acquire() at (B), outside of the critical
   797		 * section. Use a full memory barrier to guarantee that the new global
   798		 * drain generation number is stored before loading pagevec counters.
   799		 *
   800		 * This pairing must be done here, before the for_each_online_cpu loop
   801		 * below which drains the page vectors.
   802		 *
   803		 * Let x, y, and z represent some system CPU numbers, where x < y < z.
   804		 * Assume CPU #z is is in the middle of the for_each_online_cpu loop
   805		 * below and has already reached CPU #y's per-cpu data. CPU #x comes
   806		 * along, adds some pages to its per-cpu vectors, then calls
   807		 * lru_add_drain_all().
   808		 *
   809		 * If the paired barrier is done at any later step, e.g. after the
   810		 * loop, CPU #x will just exit at (C) and miss flushing out all of its
   811		 * added pages.
   812		 */
   813		WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1);
   814		smp_mb();
   815	
   816		cpumask_clear(&has_work);
   817		for_each_online_cpu(cpu) {
   818			struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
   819	
   820			if (force_all_cpus ||
   821			    pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
   822			    data_race(pagevec_count(&per_cpu(lru_rotate.pvec, cpu))) ||
   823			    pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
   824			    pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
   825			    pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
   826			    need_activate_page_drain(cpu) ||
 > 827			    has_bh_in_lru(cpu, NULL)) {
   828				INIT_WORK(work, lru_add_drain_per_cpu);
   829				queue_work_on(cpu, mm_percpu_wq, work);
   830				__cpumask_set_cpu(cpu, &has_work);
   831			}
   832		}
   833	
   834		for_each_cpu(cpu, &has_work)
   835			flush_work(&per_cpu(lru_add_drain_work, cpu));
   836	
   837	done:
   838		mutex_unlock(&lock);
   839	}
   840	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/fs/buffer.c b/fs/buffer.c
index 96c7604f69b3..4492e9d4c9d3 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1301,6 +1301,14 @@  static void bh_lru_install(struct buffer_head *bh)
 	int i;
 
 	check_irqs_on();
+	/*
+	 * buffer_head in bh_lru could increase refcount of the page
+	 * until it will be invalidated. It causes page migraion failure.
+	 * Skip putting upcoming bh into bh_lru until migration is done.
+	 */
+	if (lru_cache_disabled())
+		return;
+
 	bh_lru_lock();
 
 	b = this_cpu_ptr(&bh_lrus);
@@ -1446,7 +1454,7 @@  EXPORT_SYMBOL(__bread_gfp);
  * This doesn't race because it runs in each cpu either in irq
  * or with preempt disabled.
  */
-static void invalidate_bh_lru(void *arg)
+void invalidate_bh_lru(void *arg)
 {
 	struct bh_lru *b = &get_cpu_var(bh_lrus);
 	int i;
@@ -1458,7 +1466,7 @@  static void invalidate_bh_lru(void *arg)
 	put_cpu_var(bh_lrus);
 }
 
-static bool has_bh_in_lru(int cpu, void *dummy)
+bool has_bh_in_lru(int cpu, void *dummy)
 {
 	struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
 	int i;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 6b47f94378c5..3ae62f3f788e 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -194,6 +194,8 @@  void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size,
 struct buffer_head *__bread_gfp(struct block_device *,
 				sector_t block, unsigned size, gfp_t gfp);
 void invalidate_bh_lrus(void);
+void invalidate_bh_lru(void *arg);
+bool has_bh_in_lru(int cpu, void *dummy);
 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
 void unlock_buffer(struct buffer_head *bh);
@@ -406,6 +408,7 @@  static inline int inode_has_buffers(struct inode *inode) { return 0; }
 static inline void invalidate_inode_buffers(struct inode *inode) {}
 static inline int remove_inode_buffers(struct inode *inode) { return 1; }
 static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
+static inline void invalidate_bh_lru(void *arg) {}
 #define buffer_heads_over_limit 0
 
 #endif /* CONFIG_BLOCK */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index aaa6b9cc3f8a..5386cce1a26d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -341,6 +341,7 @@  extern void lru_cache_add(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_cache_disable(void);
 extern void lru_cache_enable(void);
+extern bool lru_cache_disabled(void);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_cpu_zone(struct zone *zone);
diff --git a/mm/swap.c b/mm/swap.c
index fc8acccb882b..d599d6449154 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -36,6 +36,7 @@ 
 #include <linux/hugetlb.h>
 #include <linux/page_idle.h>
 #include <linux/local_lock.h>
+#include <linux/buffer_head.h>
 
 #include "internal.h"
 
@@ -641,6 +642,7 @@  void lru_add_drain_cpu(int cpu)
 		pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
 
 	activate_page_drain(cpu);
+	invalidate_bh_lru(NULL);
 }
 
 /**
@@ -821,7 +823,8 @@  void __lru_add_drain_all(bool force_all_cpus)
 		    pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
 		    pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
 		    pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
-		    need_activate_page_drain(cpu)) {
+		    need_activate_page_drain(cpu) ||
+		    has_bh_in_lru(cpu, NULL)) {
 			INIT_WORK(work, lru_add_drain_per_cpu);
 			queue_work_on(cpu, mm_percpu_wq, work);
 			__cpumask_set_cpu(cpu, &has_work);