diff mbox series

[v6,22/34] nfs: Pin pages rather than ref'ing if appropriate

Message ID 167391063989.2311931.13252453380684759087.stgit@warthog.procyon.org.uk (mailing list archive)
State New, archived
Headers show
Series iov_iter: Improve page extraction (ref, pin or just list) | expand

Commit Message

David Howells Jan. 16, 2023, 11:10 p.m. UTC
Convert the NFS direct I/O code to use iov_iter_extract_pages() instead of
iov_iter_get_pages().  This will pin pages or leave them unaltered rather
than getting a ref on them as appropriate to the iterator.

The pages need to be pinned for DIO-read rather than having refs taken on
them to prevent VM copy-on-write from malfunctioning during a concurrent
fork() (the result of the I/O would otherwise end up only visible to the
child process and not the parent).

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: Anna Schumaker <anna@kernel.org>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-nfs@vger.kernel.org
---

 fs/nfs/direct.c |   32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 42af84685f20..4a3108db2cb6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -142,11 +142,15 @@  int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
 	return 0;
 }
 
-static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
+static void nfs_direct_release_pages(struct page **pages, unsigned int npages,
+				     unsigned int cleanup_mode)
 {
 	unsigned int i;
-	for (i = 0; i < npages; i++)
-		put_page(pages[i]);
+
+	if (cleanup_mode) {
+		for (i = 0; i < npages; i++)
+			page_put_unpin(pages[i], cleanup_mode);
+	}
 }
 
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
@@ -327,17 +331,16 @@  static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 	inode_dio_begin(inode);
 
 	while (iov_iter_count(iter)) {
-		struct page **pagevec;
+		struct page **pagevec = NULL;
 		size_t bytes;
 		size_t pgbase;
 		unsigned npages, i;
 
-		result = iov_iter_get_pages_alloc(iter, &pagevec,
-						  rsize, &pgbase,
-						  FOLL_DEST_BUF);
+		result = iov_iter_extract_pages(iter, &pagevec, rsize, INT_MAX,
+						FOLL_DEST_BUF, &pgbase);
 		if (result < 0)
 			break;
-	
+
 		bytes = result;
 		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
 		for (i = 0; i < npages; i++) {
@@ -363,7 +366,8 @@  static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 			pos += req_len;
 			dreq->bytes_left -= req_len;
 		}
-		nfs_direct_release_pages(pagevec, npages);
+		nfs_direct_release_pages(pagevec, npages,
+					 iov_iter_extract_mode(iter, FOLL_DEST_BUF));
 		kvfree(pagevec);
 		if (result < 0)
 			break;
@@ -787,14 +791,13 @@  static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 
 	NFS_I(inode)->write_io += iov_iter_count(iter);
 	while (iov_iter_count(iter)) {
-		struct page **pagevec;
+		struct page **pagevec = NULL;
 		size_t bytes;
 		size_t pgbase;
 		unsigned npages, i;
 
-		result = iov_iter_get_pages_alloc(iter, &pagevec,
-						  wsize, &pgbase,
-						  FOLL_SOURCE_BUF);
+		result = iov_iter_extract_pages(iter, &pagevec, wsize, INT_MAX,
+						FOLL_SOURCE_BUF, &pgbase);
 		if (result < 0)
 			break;
 
@@ -831,7 +834,8 @@  static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 			pos += req_len;
 			dreq->bytes_left -= req_len;
 		}
-		nfs_direct_release_pages(pagevec, npages);
+		nfs_direct_release_pages(pagevec, npages,
+					 iov_iter_extract_mode(iter, FOLL_SOURCE_BUF));
 		kvfree(pagevec);
 		if (result < 0)
 			break;