diff mbox

[v2,5/7] mm:swap: use on-stack-bio for BDI_CAP_SYNCHRONOUS device

Message ID 1502428647-28928-6-git-send-email-minchan@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Minchan Kim Aug. 11, 2017, 5:17 a.m. UTC
There is no need to use dynamic bio allocation for BDI_CAP_SYNCHRONOUS
devices. They can live with on-stack-bio without concern about
waiting bio allocation from mempool under heavy memory pressure.

It would be much better for swap devices because the bio mempool
for swap IO have been used with fs. It means super-fast swap
IO like zram don't need to depends on slow eMMC read/write
completion.

Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 include/linux/swap.h |  3 ++-
 mm/page_io.c         | 70 +++++++++++++++++++++++++++++++++++++---------------
 mm/swapfile.c        |  3 +++
 3 files changed, 55 insertions(+), 21 deletions(-)

Comments

kernel test robot Aug. 12, 2017, 8:21 a.m. UTC | #1
Hi Minchan,

[auto build test WARNING on mmotm/master]
[also build test WARNING on next-20170811]
[cannot apply to linus/master v4.13-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Minchan-Kim/Replace-rw_page-with-on-stack-bio/20170812-152541
base:   git://git.cmpxchg.org/linux-mmotm.git master
config: xtensa-allmodconfig (attached as .config)
compiler: xtensa-linux-gcc (GCC) 4.9.0
reproduce:
        wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=xtensa 

All warnings (new ones prefixed by >>):

   mm/page_io.c: In function '__swap_writepage':
>> mm/page_io.c:345:3: warning: passing argument 1 of 'bio_get' from incompatible pointer type
      bio_get(&bio);
      ^
   In file included from include/linux/writeback.h:205:0,
                    from include/linux/memcontrol.h:31,
                    from include/linux/swap.h:8,
                    from mm/page_io.c:17:
   include/linux/bio.h:252:20: note: expected 'struct bio *' but argument is of type 'struct bio **'
    static inline void bio_get(struct bio *bio)
                       ^

vim +/bio_get +345 mm/page_io.c

   275	
   276	int __swap_writepage(struct page *page, struct writeback_control *wbc)
   277	{
   278		int ret;
   279		struct swap_info_struct *sis = page_swap_info(page);
   280		struct bio *bio;
   281		/* on-stack-bio */
   282		struct bio sbio;
   283		struct bio_vec sbvec;
   284	
   285		VM_BUG_ON_PAGE(!PageSwapCache(page), page);
   286		if (sis->flags & SWP_FILE) {
   287			struct kiocb kiocb;
   288			struct file *swap_file = sis->swap_file;
   289			struct address_space *mapping = swap_file->f_mapping;
   290			struct bio_vec bv = {
   291				.bv_page = page,
   292				.bv_len  = PAGE_SIZE,
   293				.bv_offset = 0
   294			};
   295			struct iov_iter from;
   296	
   297			iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
   298			init_sync_kiocb(&kiocb, swap_file);
   299			kiocb.ki_pos = page_file_offset(page);
   300	
   301			set_page_writeback(page);
   302			unlock_page(page);
   303			ret = mapping->a_ops->direct_IO(&kiocb, &from);
   304			if (ret == PAGE_SIZE) {
   305				count_vm_event(PSWPOUT);
   306				ret = 0;
   307			} else {
   308				/*
   309				 * In the case of swap-over-nfs, this can be a
   310				 * temporary failure if the system has limited
   311				 * memory for allocating transmit buffers.
   312				 * Mark the page dirty and avoid
   313				 * rotate_reclaimable_page but rate-limit the
   314				 * messages but do not flag PageError like
   315				 * the normal direct-to-bio case as it could
   316				 * be temporary.
   317				 */
   318				set_page_dirty(page);
   319				ClearPageReclaim(page);
   320				pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
   321						   page_file_offset(page));
   322			}
   323			end_page_writeback(page);
   324			return ret;
   325		}
   326	
   327		ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
   328		if (!ret) {
   329			count_swpout_vm_event(page);
   330			return 0;
   331		}
   332	
   333		ret = 0;
   334		if (!(sis->flags & SWP_SYNC_IO)) {
   335	
   336			bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
   337			if (bio == NULL) {
   338				set_page_dirty(page);
   339				unlock_page(page);
   340				ret = -ENOMEM;
   341				goto out;
   342			}
   343		} else {
   344			bio = &sbio;
 > 345			bio_get(&bio);
   346	
   347			bio_init(&sbio, &sbvec, 1);
   348			sbio.bi_bdev = sis->bdev;
   349			sbio.bi_iter.bi_sector = swap_page_sector(page);
   350			sbio.bi_end_io = end_swap_bio_write;
   351			bio_add_page(&sbio, page, PAGE_SIZE, 0);
   352		}
   353	
   354		bio_set_op_attrs(bio, REQ_OP_WRITE, wbc_to_write_flags(wbc));
   355		set_page_writeback(page);
   356		unlock_page(page);
   357		submit_bio(bio);
   358		count_swpout_vm_event(page);
   359	out:
   360		return ret;
   361	}
   362	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Aug. 12, 2017, 8:46 a.m. UTC | #2
Hi Minchan,

[auto build test ERROR on mmotm/master]
[also build test ERROR on next-20170811]
[cannot apply to linus/master v4.13-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Minchan-Kim/Replace-rw_page-with-on-stack-bio/20170812-152541
base:   git://git.cmpxchg.org/linux-mmotm.git master
config: sparc64-allmodconfig (attached as .config)
compiler: sparc64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
        wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=sparc64 

All errors (new ones prefixed by >>):

   mm/page_io.c: In function '__swap_writepage':
>> mm/page_io.c:345:11: error: passing argument 1 of 'bio_get' from incompatible pointer type [-Werror=incompatible-pointer-types]
      bio_get(&bio);
              ^
   In file included from include/linux/writeback.h:205:0,
                    from include/linux/memcontrol.h:31,
                    from include/linux/swap.h:8,
                    from mm/page_io.c:17:
   include/linux/bio.h:252:20: note: expected 'struct bio *' but argument is of type 'struct bio **'
    static inline void bio_get(struct bio *bio)
                       ^~~~~~~
   cc1: some warnings being treated as errors

vim +/bio_get +345 mm/page_io.c

   275	
   276	int __swap_writepage(struct page *page, struct writeback_control *wbc)
   277	{
   278		int ret;
   279		struct swap_info_struct *sis = page_swap_info(page);
   280		struct bio *bio;
   281		/* on-stack-bio */
   282		struct bio sbio;
   283		struct bio_vec sbvec;
   284	
   285		VM_BUG_ON_PAGE(!PageSwapCache(page), page);
   286		if (sis->flags & SWP_FILE) {
   287			struct kiocb kiocb;
   288			struct file *swap_file = sis->swap_file;
   289			struct address_space *mapping = swap_file->f_mapping;
   290			struct bio_vec bv = {
   291				.bv_page = page,
   292				.bv_len  = PAGE_SIZE,
   293				.bv_offset = 0
   294			};
   295			struct iov_iter from;
   296	
   297			iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
   298			init_sync_kiocb(&kiocb, swap_file);
   299			kiocb.ki_pos = page_file_offset(page);
   300	
   301			set_page_writeback(page);
   302			unlock_page(page);
   303			ret = mapping->a_ops->direct_IO(&kiocb, &from);
   304			if (ret == PAGE_SIZE) {
   305				count_vm_event(PSWPOUT);
   306				ret = 0;
   307			} else {
   308				/*
   309				 * In the case of swap-over-nfs, this can be a
   310				 * temporary failure if the system has limited
   311				 * memory for allocating transmit buffers.
   312				 * Mark the page dirty and avoid
   313				 * rotate_reclaimable_page but rate-limit the
   314				 * messages but do not flag PageError like
   315				 * the normal direct-to-bio case as it could
   316				 * be temporary.
   317				 */
   318				set_page_dirty(page);
   319				ClearPageReclaim(page);
   320				pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
   321						   page_file_offset(page));
   322			}
   323			end_page_writeback(page);
   324			return ret;
   325		}
   326	
   327		ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
   328		if (!ret) {
   329			count_swpout_vm_event(page);
   330			return 0;
   331		}
   332	
   333		ret = 0;
   334		if (!(sis->flags & SWP_SYNC_IO)) {
   335	
   336			bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
   337			if (bio == NULL) {
   338				set_page_dirty(page);
   339				unlock_page(page);
   340				ret = -ENOMEM;
   341				goto out;
   342			}
   343		} else {
   344			bio = &sbio;
 > 345			bio_get(&bio);
   346	
   347			bio_init(&sbio, &sbvec, 1);
   348			sbio.bi_bdev = sis->bdev;
   349			sbio.bi_iter.bi_sector = swap_page_sector(page);
   350			sbio.bi_end_io = end_swap_bio_write;
   351			bio_add_page(&sbio, page, PAGE_SIZE, 0);
   352		}
   353	
   354		bio_set_op_attrs(bio, REQ_OP_WRITE, wbc_to_write_flags(wbc));
   355		set_page_writeback(page);
   356		unlock_page(page);
   357		submit_bio(bio);
   358		count_swpout_vm_event(page);
   359	out:
   360		return ret;
   361	}
   362	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Minchan Kim Aug. 14, 2017, 8:41 a.m. UTC | #3
On Sat, Aug 12, 2017 at 04:46:33PM +0800, kbuild test robot wrote:
> Hi Minchan,
> 
> [auto build test ERROR on mmotm/master]
> [also build test ERROR on next-20170811]
> [cannot apply to linus/master v4.13-rc4]
> [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
> 
> url:    https://github.com/0day-ci/linux/commits/Minchan-Kim/Replace-rw_page-with-on-stack-bio/20170812-152541
> base:   git://git.cmpxchg.org/linux-mmotm.git master
> config: sparc64-allmodconfig (attached as .config)
> compiler: sparc64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
> reproduce:
>         wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # save the attached .config to linux build tree
>         make.cross ARCH=sparc64 
> 
> All errors (new ones prefixed by >>):
> 
>    mm/page_io.c: In function '__swap_writepage':
> >> mm/page_io.c:345:11: error: passing argument 1 of 'bio_get' from incompatible pointer type [-Werror=incompatible-pointer-types]
>       bio_get(&bio);
>               ^
>    In file included from include/linux/writeback.h:205:0,
>                     from include/linux/memcontrol.h:31,
>                     from include/linux/swap.h:8,
>                     from mm/page_io.c:17:
>    include/linux/bio.h:252:20: note: expected 'struct bio *' but argument is of type 'struct bio **'
>     static inline void bio_get(struct bio *bio)
>                        ^~~~~~~
>    cc1: some warnings being treated as errors
> 
> vim +/bio_get +345 mm/page_io.c
> 
>    275	
>    276	int __swap_writepage(struct page *page, struct writeback_control *wbc)
>    277	{
>    278		int ret;
>    279		struct swap_info_struct *sis = page_swap_info(page);
>    280		struct bio *bio;
>    281		/* on-stack-bio */
>    282		struct bio sbio;
>    283		struct bio_vec sbvec;
>    284	
>    285		VM_BUG_ON_PAGE(!PageSwapCache(page), page);
>    286		if (sis->flags & SWP_FILE) {
>    287			struct kiocb kiocb;
>    288			struct file *swap_file = sis->swap_file;
>    289			struct address_space *mapping = swap_file->f_mapping;
>    290			struct bio_vec bv = {
>    291				.bv_page = page,
>    292				.bv_len  = PAGE_SIZE,
>    293				.bv_offset = 0
>    294			};
>    295			struct iov_iter from;
>    296	
>    297			iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
>    298			init_sync_kiocb(&kiocb, swap_file);
>    299			kiocb.ki_pos = page_file_offset(page);
>    300	
>    301			set_page_writeback(page);
>    302			unlock_page(page);
>    303			ret = mapping->a_ops->direct_IO(&kiocb, &from);
>    304			if (ret == PAGE_SIZE) {
>    305				count_vm_event(PSWPOUT);
>    306				ret = 0;
>    307			} else {
>    308				/*
>    309				 * In the case of swap-over-nfs, this can be a
>    310				 * temporary failure if the system has limited
>    311				 * memory for allocating transmit buffers.
>    312				 * Mark the page dirty and avoid
>    313				 * rotate_reclaimable_page but rate-limit the
>    314				 * messages but do not flag PageError like
>    315				 * the normal direct-to-bio case as it could
>    316				 * be temporary.
>    317				 */
>    318				set_page_dirty(page);
>    319				ClearPageReclaim(page);
>    320				pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
>    321						   page_file_offset(page));
>    322			}
>    323			end_page_writeback(page);
>    324			return ret;
>    325		}
>    326	
>    327		ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
>    328		if (!ret) {
>    329			count_swpout_vm_event(page);
>    330			return 0;
>    331		}
>    332	
>    333		ret = 0;
>    334		if (!(sis->flags & SWP_SYNC_IO)) {
>    335	
>    336			bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
>    337			if (bio == NULL) {
>    338				set_page_dirty(page);
>    339				unlock_page(page);
>    340				ret = -ENOMEM;
>    341				goto out;
>    342			}
>    343		} else {
>    344			bio = &sbio;
>  > 345			bio_get(&bio);

Hi kbuild,

I will respin with fixing it.
Thanks for the catching up!
diff mbox

Patch

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ae3da979a7b7..6ed9b6423f7d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -152,8 +152,9 @@  enum {
 	SWP_AREA_DISCARD = (1 << 8),	/* single-time swap area discards */
 	SWP_PAGE_DISCARD = (1 << 9),	/* freed swap page-cluster discards */
 	SWP_STABLE_WRITES = (1 << 10),	/* no overwrite PG_writeback pages */
+	SWP_SYNC_IO	= (1<<11),	/* synchronous IO is efficient */
 					/* add others here before... */
-	SWP_SCANNING	= (1 << 11),	/* refcount in scan_swap_map */
+	SWP_SCANNING	= (1 << 12),	/* refcount in scan_swap_map */
 };
 
 #define SWAP_CLUSTER_MAX 32UL
diff --git a/mm/page_io.c b/mm/page_io.c
index 3502a97f7c48..64330c751548 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -119,8 +119,8 @@  static void swap_slot_free_notify(struct page *page)
 
 static void end_swap_bio_read(struct bio *bio)
 {
-	struct page *page = bio->bi_io_vec[0].bv_page;
 	struct task_struct *waiter = bio->bi_private;
+	struct page *page = bio->bi_io_vec[0].bv_page;
 
 	if (bio->bi_status) {
 		SetPageError(page);
@@ -275,9 +275,12 @@  static inline void count_swpout_vm_event(struct page *page)
 
 int __swap_writepage(struct page *page, struct writeback_control *wbc)
 {
-	struct bio *bio;
 	int ret;
 	struct swap_info_struct *sis = page_swap_info(page);
+	struct bio *bio;
+	/* on-stack-bio */
+	struct bio sbio;
+	struct bio_vec sbvec;
 
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 	if (sis->flags & SWP_FILE) {
@@ -328,29 +331,45 @@  int __swap_writepage(struct page *page, struct writeback_control *wbc)
 	}
 
 	ret = 0;
-	bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
-	if (bio == NULL) {
-		set_page_dirty(page);
-		unlock_page(page);
-		ret = -ENOMEM;
-		goto out;
+	if (!(sis->flags & SWP_SYNC_IO)) {
+
+		bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
+		if (bio == NULL) {
+			set_page_dirty(page);
+			unlock_page(page);
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else {
+		bio = &sbio;
+		bio_get(&bio);
+
+		bio_init(&sbio, &sbvec, 1);
+		sbio.bi_bdev = sis->bdev;
+		sbio.bi_iter.bi_sector = swap_page_sector(page);
+		sbio.bi_end_io = end_swap_bio_write;
+		bio_add_page(&sbio, page, PAGE_SIZE, 0);
 	}
-	bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
-	count_swpout_vm_event(page);
+
+	bio_set_op_attrs(bio, REQ_OP_WRITE, wbc_to_write_flags(wbc));
 	set_page_writeback(page);
 	unlock_page(page);
 	submit_bio(bio);
+	count_swpout_vm_event(page);
 out:
 	return ret;
 }
 
 int swap_readpage(struct page *page, bool do_poll)
 {
-	struct bio *bio;
 	int ret = 0;
 	struct swap_info_struct *sis = page_swap_info(page);
 	blk_qc_t qc;
 	struct block_device *bdev;
+	struct bio *bio;
+	/* on-stack-bio */
+	struct bio sbio;
+	struct bio_vec sbvec;
 
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -383,21 +402,33 @@  int swap_readpage(struct page *page, bool do_poll)
 	}
 
 	ret = 0;
-	bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
-	if (bio == NULL) {
-		unlock_page(page);
-		ret = -ENOMEM;
-		goto out;
+	count_vm_event(PSWPIN);
+	if (!(sis->flags & SWP_SYNC_IO)) {
+		bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
+		if (bio == NULL) {
+			unlock_page(page);
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else {
+		bio = &sbio;
+		bio_get(bio);
+
+		bio_init(&sbio, &sbvec, 1);
+		sbio.bi_bdev = sis->bdev;
+		sbio.bi_iter.bi_sector = swap_page_sector(page);
+		bio->bi_end_io = end_swap_bio_read;
+		bio_add_page(&sbio, page, PAGE_SIZE, 0);
 	}
 	bdev = bio->bi_bdev;
 	/*
-	 * Keep this task valid during swap readpage because the oom killer may
-	 * attempt to access it in the page fault retry time check.
+	 * Keep this task valid during swap readpage because
+	 * the oom killer may attempt to access it
+	 * in the page fault retry time check.
 	 */
 	get_task_struct(current);
 	bio->bi_private = current;
 	bio_set_op_attrs(bio, REQ_OP_READ, 0);
-	count_vm_event(PSWPIN);
 	bio_get(bio);
 	qc = submit_bio(bio);
 	while (do_poll) {
@@ -410,7 +441,6 @@  int swap_readpage(struct page *page, bool do_poll)
 	}
 	__set_current_state(TASK_RUNNING);
 	bio_put(bio);
-
 out:
 	return ret;
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 42eff9e4e972..e916b325b0b7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3113,6 +3113,9 @@  SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
 		p->flags |= SWP_STABLE_WRITES;
 
+	if (bdi_cap_synchronous_io(inode_to_bdi(inode)))
+		p->flags |= SWP_SYNC_IO;
+
 	if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
 		int cpu;
 		unsigned long ci, nr_cluster;