diff mbox series

[v1,12/15] fs/direct-io: keep track of wether a page is coming from GUP or not

Message ID 20190411210834.4105-13-jglisse@redhat.com (mailing list archive)
State New, archived
Headers show
Series Keep track of GUPed pages in fs and block | expand

Commit Message

Jerome Glisse April 11, 2019, 9:08 p.m. UTC
From: Jérôme Glisse <jglisse@redhat.com>

We want to keep track of how we got a reference on page when doing DIO,
ie wether the page was reference through GUP (get_user_page*) or not.
For that this patch rework the way page reference is taken and handed
over between DIO code and BIO. Instead of taking a reference for page
that have been successfuly added to a BIO we just steal the reference
we have when we lookup the page (either through GUP or for ZERO_PAGE).

So this patch keep track of wether the reference has been stolen by the
BIO or not. This avoids a bunch of get_page()/put_page() so this limit
the number of atomic operations.

Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-block@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
---
 fs/direct-io.c | 82 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 60 insertions(+), 22 deletions(-)

Comments

Dave Chinner April 11, 2019, 11:14 p.m. UTC | #1
On Thu, Apr 11, 2019 at 05:08:31PM -0400, jglisse@redhat.com wrote:
> From: Jérôme Glisse <jglisse@redhat.com>
> 
> We want to keep track of how we got a reference on page when doing DIO,
> ie wether the page was reference through GUP (get_user_page*) or not.
> For that this patch rework the way page reference is taken and handed
> over between DIO code and BIO. Instead of taking a reference for page
> that have been successfuly added to a BIO we just steal the reference
> we have when we lookup the page (either through GUP or for ZERO_PAGE).
> 
> So this patch keep track of wether the reference has been stolen by the
> BIO or not. This avoids a bunch of get_page()/put_page() so this limit
> the number of atomic operations.

Is the asme set of changes appropriate for the fs/iomap.c direct IO
path (i.e. XFS)?

-Dave.
Jerome Glisse April 12, 2019, 12:08 a.m. UTC | #2
On Fri, Apr 12, 2019 at 09:14:43AM +1000, Dave Chinner wrote:
> On Thu, Apr 11, 2019 at 05:08:31PM -0400, jglisse@redhat.com wrote:
> > From: Jérôme Glisse <jglisse@redhat.com>
> > 
> > We want to keep track of how we got a reference on page when doing DIO,
> > ie wether the page was reference through GUP (get_user_page*) or not.
> > For that this patch rework the way page reference is taken and handed
> > over between DIO code and BIO. Instead of taking a reference for page
> > that have been successfuly added to a BIO we just steal the reference
> > we have when we lookup the page (either through GUP or for ZERO_PAGE).
> > 
> > So this patch keep track of wether the reference has been stolen by the
> > BIO or not. This avoids a bunch of get_page()/put_page() so this limit
> > the number of atomic operations.
> 
> Is the asme set of changes appropriate for the fs/iomap.c direct IO
> path (i.e. XFS)?

Yes and it is part of this patchset AFAICT iomap use bio_iov_iter_get_pages()
which is updated to pass down wether page are coming from GUP or not. The
bio you get out of that is then release through iomap_dio_bio_end_io() which
calls bvec_put_page() which will use put_user_page() for GUPed page.

I may have miss a case and review are welcome.

Note that while the convertion is happening put_user_page is exactly the same
as put_page() in fact the implementation just call put_page() with nothing
else.

The tricky part is making sure that before we diverge with a put_user_page()
that does something else that put_page() we will need to be sure that we did
not left a path that do GUP but does call put_page() and not put_user_page().
We have some plan to catch that in debug build.

In any case i believe we will be very careful when the times come to change
put_user_page() to something different.

Cheers,
Jérôme
diff mbox series

Patch

diff --git a/fs/direct-io.c b/fs/direct-io.c
index b8b5d8e31aeb..ef9fc7703a78 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -100,6 +100,7 @@  struct dio_submit {
 	unsigned cur_page_len;		/* Nr of bytes at cur_page_offset */
 	sector_t cur_page_block;	/* Where it starts */
 	loff_t cur_page_fs_offset;	/* Offset in file */
+	bool cur_page_from_gup;		/* Current page is coming from GUP */
 
 	struct iov_iter *iter;
 	/*
@@ -148,6 +149,8 @@  struct dio {
 		struct page *pages[DIO_PAGES];	/* page buffer */
 		struct work_struct complete_work;/* deferred AIO completion */
 	};
+
+	bool gup;			/* pages are coming from GUP */
 } ____cacheline_aligned_in_smp;
 
 static struct kmem_cache *dio_cache __read_mostly;
@@ -167,6 +170,7 @@  static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 {
 	ssize_t ret;
 
+	dio->gup = iov_iter_get_pages_use_gup(sdio->iter);
 	ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
 				&sdio->from);
 
@@ -181,6 +185,7 @@  static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 			dio->page_errors = ret;
 		get_page(page);
 		dio->pages[0] = page;
+		dio->gup = false;
 		sdio->head = 0;
 		sdio->tail = 1;
 		sdio->from = 0;
@@ -490,8 +495,12 @@  static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
  */
 static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
 {
-	while (sdio->head < sdio->tail)
-		put_page(dio->pages[sdio->head++]);
+	while (sdio->head < sdio->tail) {
+		if (dio->gup)
+			put_user_page(dio->pages[sdio->head++]);
+		else
+			put_page(dio->pages[sdio->head++]);
+	}
 }
 
 /*
@@ -760,15 +769,19 @@  static inline int dio_bio_add_page(struct dio_submit *sdio)
 {
 	int ret;
 
-	ret = bio_add_page(sdio->bio, sdio->cur_page,
-			sdio->cur_page_len, sdio->cur_page_offset, false);
+	/*
+	 * The bio is stealing the page reference and that is fine we can add a
+	 * page only once ie when dio_send_cur_page() is call and each call to
+	 * dio_send_cur_page() clear the cur_page (on success).
+	 */
+	ret = bio_add_page(sdio->bio, sdio->cur_page, sdio->cur_page_len,
+			 sdio->cur_page_offset, sdio->cur_page_from_gup);
 	if (ret == sdio->cur_page_len) {
 		/*
 		 * Decrement count only, if we are done with this page
 		 */
 		if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
 			sdio->pages_in_io--;
-		get_page(sdio->cur_page);
 		sdio->final_block_in_bio = sdio->cur_page_block +
 			(sdio->cur_page_len >> sdio->blkbits);
 		ret = 0;
@@ -828,9 +841,14 @@  static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
 		ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
 		if (ret == 0) {
 			ret = dio_bio_add_page(sdio);
+			if (!ret)
+				/* Clear the current page. */
+				sdio->cur_page = NULL;
 			BUG_ON(ret != 0);
 		}
-	}
+	} else
+		/* Clear the current page. */
+		sdio->cur_page = NULL;
 out:
 	return ret;
 }
@@ -855,7 +873,7 @@  static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
 static inline int
 submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 		    unsigned offset, unsigned len, sector_t blocknr,
-		    struct buffer_head *map_bh)
+		    struct buffer_head *map_bh, bool gup)
 {
 	int ret = 0;
 
@@ -882,14 +900,13 @@  submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 	 */
 	if (sdio->cur_page) {
 		ret = dio_send_cur_page(dio, sdio, map_bh);
-		put_page(sdio->cur_page);
-		sdio->cur_page = NULL;
 		if (ret)
 			return ret;
 	}
 
-	get_page(page);		/* It is in dio */
+	/* Steal page reference and GUP flag */
 	sdio->cur_page = page;
+	sdio->cur_page_from_gup = gup;
 	sdio->cur_page_offset = offset;
 	sdio->cur_page_len = len;
 	sdio->cur_page_block = blocknr;
@@ -903,8 +920,6 @@  submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 		ret = dio_send_cur_page(dio, sdio, map_bh);
 		if (sdio->bio)
 			dio_bio_submit(dio, sdio);
-		put_page(sdio->cur_page);
-		sdio->cur_page = NULL;
 	}
 	return ret;
 }
@@ -946,13 +961,29 @@  static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio,
 	this_chunk_bytes = this_chunk_blocks << sdio->blkbits;
 
 	page = ZERO_PAGE(0);
+	get_page(page);
 	if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
-				sdio->next_block_for_io, map_bh))
+				sdio->next_block_for_io, map_bh, false)) {
+		put_page(page);
 		return;
+	}
 
 	sdio->next_block_for_io += this_chunk_blocks;
 }
 
+static inline void dio_put_page(const struct dio *dio, bool stolen,
+				struct page *page)
+{
+	/* If page reference was stolen then nothing to do. */
+	if (stolen)
+		return;
+
+	if (dio->gup)
+		put_user_page(page);
+	else
+		put_page(page);
+}
+
 /*
  * Walk the user pages, and the file, mapping blocks to disk and generating
  * a sequence of (page,offset,len,block) mappings.  These mappings are injected
@@ -977,6 +1008,7 @@  static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 	int ret = 0;
 
 	while (sdio->block_in_file < sdio->final_block_in_request) {
+		bool stolen = false;
 		struct page *page;
 		size_t from, to;
 
@@ -1003,7 +1035,7 @@  static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 
 				ret = get_more_blocks(dio, sdio, map_bh);
 				if (ret) {
-					put_page(page);
+					dio_put_page(dio, stolen, page);
 					goto out;
 				}
 				if (!buffer_mapped(map_bh))
@@ -1048,7 +1080,7 @@  static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 
 				/* AKPM: eargh, -ENOTBLK is a hack */
 				if (dio->op == REQ_OP_WRITE) {
-					put_page(page);
+					dio_put_page(dio, stolen, page);
 					return -ENOTBLK;
 				}
 
@@ -1061,7 +1093,7 @@  static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 				if (sdio->block_in_file >=
 						i_size_aligned >> blkbits) {
 					/* We hit eof */
-					put_page(page);
+					dio_put_page(dio, stolen, page);
 					goto out;
 				}
 				zero_user(page, from, 1 << blkbits);
@@ -1099,11 +1131,13 @@  static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 						  from,
 						  this_chunk_bytes,
 						  sdio->next_block_for_io,
-						  map_bh);
+						  map_bh, dio->gup);
 			if (ret) {
-				put_page(page);
+				dio_put_page(dio, stolen, page);
 				goto out;
-			}
+			} else
+				/* The page reference has been  stolen ... */
+				stolen = true;
 			sdio->next_block_for_io += this_chunk_blocks;
 
 			sdio->block_in_file += this_chunk_blocks;
@@ -1117,7 +1151,7 @@  static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 		}
 
 		/* Drop the ref which was taken in get_user_pages() */
-		put_page(page);
+		dio_put_page(dio, stolen, page);
 	}
 out:
 	return ret;
@@ -1356,8 +1390,12 @@  do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
 		if (retval == 0)
 			retval = ret2;
-		put_page(sdio.cur_page);
-		sdio.cur_page = NULL;
+		else {
+			if (sdio.cur_page_from_gup)
+				put_user_page(sdio.cur_page);
+			else
+				put_page(sdio.cur_page);
+		}
 	}
 	if (sdio.bio)
 		dio_bio_submit(dio, &sdio);