diff mbox series

[v6,19/34] fuse: Pin pages rather than ref'ing if appropriate

Message ID 167391061826.2311931.4301280201217181104.stgit@warthog.procyon.org.uk (mailing list archive)
State New, archived
Headers show
Series iov_iter: Improve page extraction (ref, pin or just list) | expand

Commit Message

David Howells Jan. 16, 2023, 11:10 p.m. UTC
Convert the fuse code to use iov_iter_extract_pages() instead of
iov_iter_get_pages().  This will pin pages or leave them unaltered rather
than getting a ref on them as appropriate to the iterator.

The pages need to be pinned for DIO-read rather than having refs taken on
them to prevent VM copy-on-write from malfunctioning during a concurrent
fork() (the result of the I/O would otherwise end up only visible to the
child process and not the parent).

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Miklos Szeredi <miklos@szeredi.hu>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: Christoph Hellwig <hch@lst.de>
cc: linux-fsdevel@vger.kernel.org
---

 fs/fuse/dev.c    |   25 +++++++++++++++++++------
 fs/fuse/file.c   |   26 ++++++++++++++++++--------
 fs/fuse/fuse_i.h |    1 +
 3 files changed, 38 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e3d8443e24a6..107497e68726 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -641,6 +641,7 @@  static int unlock_request(struct fuse_req *req)
 
 struct fuse_copy_state {
 	int write;
+	unsigned int cleanup_mode;	/* Page cleanup mode (0/FOLL_GET/PIN) */
 	struct fuse_req *req;
 	struct iov_iter *iter;
 	struct pipe_buffer *pipebufs;
@@ -661,6 +662,11 @@  static void fuse_copy_init(struct fuse_copy_state *cs, int write,
 	cs->iter = iter;
 }
 
+static void fuse_release_copy_page(struct fuse_copy_state *cs, struct page *page)
+{
+	page_put_unpin(page, cs->cleanup_mode);
+}
+
 /* Unmap and put previous page of userspace buffer */
 static void fuse_copy_finish(struct fuse_copy_state *cs)
 {
@@ -675,7 +681,7 @@  static void fuse_copy_finish(struct fuse_copy_state *cs)
 			flush_dcache_page(cs->pg);
 			set_page_dirty_lock(cs->pg);
 		}
-		put_page(cs->pg);
+		fuse_release_copy_page(cs, cs->pg);
 	}
 	cs->pg = NULL;
 }
@@ -704,6 +710,7 @@  static int fuse_copy_fill(struct fuse_copy_state *cs)
 
 			BUG_ON(!cs->nr_segs);
 			cs->currbuf = buf;
+			cs->cleanup_mode = FOLL_GET;
 			cs->pg = buf->page;
 			cs->offset = buf->offset;
 			cs->len = buf->len;
@@ -722,6 +729,7 @@  static int fuse_copy_fill(struct fuse_copy_state *cs)
 			buf->len = 0;
 
 			cs->currbuf = buf;
+			cs->cleanup_mode = FOLL_GET;
 			cs->pg = page;
 			cs->offset = 0;
 			cs->len = PAGE_SIZE;
@@ -729,15 +737,18 @@  static int fuse_copy_fill(struct fuse_copy_state *cs)
 			cs->nr_segs++;
 		}
 	} else {
+		unsigned int gup_flags = cs->write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF;
+		struct page **pages = &cs->pg;
 		size_t off;
-		err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off,
-					 cs->write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF);
+
+		err = iov_iter_extract_pages(cs->iter, &pages, PAGE_SIZE, 1,
+					     gup_flags, &off);
 		if (err < 0)
 			return err;
 		BUG_ON(!err);
 		cs->len = err;
 		cs->offset = off;
-		cs->pg = page;
+		cs->cleanup_mode = iov_iter_extract_mode(cs->iter, gup_flags);
 	}
 
 	return lock_request(cs->req);
@@ -899,10 +910,12 @@  static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
 	if (cs->nr_segs >= cs->pipe->max_usage)
 		return -EIO;
 
-	get_page(page);
+	err = try_grab_page(page, cs->cleanup_mode);
+	if (err < 0)
+		return err;
 	err = unlock_request(cs->req);
 	if (err) {
-		put_page(page);
+		fuse_release_copy_page(cs, page);
 		return err;
 	}
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 68c196437306..c317300e757a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -624,6 +624,11 @@  void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
 	args->out_args[0].size = count;
 }
 
+static void fuse_release_page(struct fuse_args_pages *ap, struct page *page)
+{
+	page_put_unpin(page, ap->cleanup_mode);
+}
+
 static void fuse_release_user_pages(struct fuse_args_pages *ap,
 				    bool should_dirty)
 {
@@ -632,7 +637,7 @@  static void fuse_release_user_pages(struct fuse_args_pages *ap,
 	for (i = 0; i < ap->num_pages; i++) {
 		if (should_dirty)
 			set_page_dirty_lock(ap->pages[i]);
-		put_page(ap->pages[i]);
+		fuse_release_page(ap, ap->pages[i]);
 	}
 }
 
@@ -920,7 +925,7 @@  static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
 		else
 			SetPageError(page);
 		unlock_page(page);
-		put_page(page);
+		fuse_release_page(ap, page);
 	}
 	if (ia->ff)
 		fuse_file_put(ia->ff, false, false);
@@ -1153,7 +1158,7 @@  static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
 		}
 		if (ia->write.page_locked && (i == ap->num_pages - 1))
 			unlock_page(page);
-		put_page(page);
+		fuse_release_page(ap, page);
 	}
 
 	return err;
@@ -1172,6 +1177,7 @@  static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
 
 	ap->args.in_pages = true;
 	ap->descs[0].offset = offset;
+	ap->cleanup_mode = FOLL_GET;
 
 	do {
 		size_t tmp;
@@ -1200,7 +1206,7 @@  static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
 
 		if (!tmp) {
 			unlock_page(page);
-			put_page(page);
+			fuse_release_page(ap, page);
 			goto again;
 		}
 
@@ -1393,9 +1399,12 @@  static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
 			       size_t *nbytesp, int write,
 			       unsigned int max_pages)
 {
+	unsigned int gup_flags = write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF;
 	size_t nbytes = 0;  /* # bytes already packed in req */
 	ssize_t ret = 0;
 
+	ap->cleanup_mode = iov_iter_extract_mode(ii, gup_flags);
+
 	/* Special case for kernel I/O: can copy directly into the buffer */
 	if (iov_iter_is_kvec(ii)) {
 		unsigned long user_addr = fuse_get_user_addr(ii);
@@ -1412,12 +1421,13 @@  static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
 	}
 
 	while (nbytes < *nbytesp && ap->num_pages < max_pages) {
+		struct page **pages = &ap->pages[ap->num_pages];
 		unsigned npages;
 		size_t start;
-		ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages],
-					 *nbytesp - nbytes,
-					 max_pages - ap->num_pages,
-					 &start, write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF);
+		ret = iov_iter_extract_pages(ii, &pages,
+					     *nbytesp - nbytes,
+					     max_pages - ap->num_pages,
+					     gup_flags, &start);
 		if (ret < 0)
 			break;
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index c673faefdcb9..7b6be1dd7593 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -271,6 +271,7 @@  struct fuse_args_pages {
 	struct page **pages;
 	struct fuse_page_desc *descs;
 	unsigned int num_pages;
+	unsigned int cleanup_mode;
 };
 
 #define FUSE_ARGS(args) struct fuse_args args = {}