diff mbox series

[04/18] MM: perform async writes to SWP_FS_OPS swap-space

Message ID 163969850292.20885.16191050558510542930.stgit@noble.brown (mailing list archive)
State New, archived
Headers show
Series Repair SWAP-over-NFS | expand

Commit Message

NeilBrown Dec. 16, 2021, 11:48 p.m. UTC
Writes to SWP_FS_OPS swapspace is currently synchronous.  To make it
async we need to allocate the kiocb struct which may block, but won't
block as long as waiting for the write to complete would block.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 mm/page_io.c |   69 +++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 39 insertions(+), 30 deletions(-)

Comments

Christoph Hellwig Dec. 21, 2021, 8:41 a.m. UTC | #1
On Fri, Dec 17, 2021 at 10:48:22AM +1100, NeilBrown wrote:
> Writes to SWP_FS_OPS swapspace is currently synchronous.  To make it
> async we need to allocate the kiocb struct which may block, but won't
> block as long as waiting for the write to complete would block.

Against a little helper for the SWP_FS_OPS case of __swap_writepage
would be nice.  But otherwise this looks good to me.
diff mbox series

Patch

diff --git a/mm/page_io.c b/mm/page_io.c
index 47d7e7866e33..84859132c9c6 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -300,6 +300,32 @@  int sio_pool_init(void)
 		return -ENOMEM;
 }
 
+static void sio_write_complete(struct kiocb *iocb, long ret)
+{
+	struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
+	struct page *page = sio->bvec.bv_page;
+
+	if (ret != 0 && ret != PAGE_SIZE) {
+		/*
+		 * In the case of swap-over-nfs, this can be a
+		 * temporary failure if the system has limited
+		 * memory for allocating transmit buffers.
+		 * Mark the page dirty and avoid
+		 * folio_rotate_reclaimable but rate-limit the
+		 * messages but do not flag PageError like
+		 * the normal direct-to-bio case as it could
+		 * be temporary.
+		 */
+		set_page_dirty(page);
+		ClearPageReclaim(page);
+		pr_err_ratelimited("Write error %ld on dio swapfile (%llu)\n",
+				   ret, page_file_offset(page));
+	} else
+		count_vm_event(PSWPOUT);
+	end_page_writeback(page);
+	mempool_free(sio, sio_pool);
+}
+
 int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		bio_end_io_t end_write_func)
 {
@@ -309,42 +335,25 @@  int __swap_writepage(struct page *page, struct writeback_control *wbc,
 
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 	if (data_race(sis->flags & SWP_FS_OPS)) {
-		struct kiocb kiocb;
+		struct swap_iocb *sio;
 		struct file *swap_file = sis->swap_file;
 		struct address_space *mapping = swap_file->f_mapping;
-		struct bio_vec bv = {
-			.bv_page = page,
-			.bv_len  = PAGE_SIZE,
-			.bv_offset = 0
-		};
 		struct iov_iter from;
 
-		iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
-		init_sync_kiocb(&kiocb, swap_file);
-		kiocb.ki_pos = page_file_offset(page);
-
 		set_page_writeback(page);
 		unlock_page(page);
-		ret = mapping->a_ops->swap_rw(&kiocb, &from);
-		if (ret == 0) {
-			count_vm_event(PSWPOUT);
-		} else {
-			/*
-			 * In the case of swap-over-nfs, this can be a
-			 * temporary failure if the system has limited
-			 * memory for allocating transmit buffers.
-			 * Mark the page dirty and avoid
-			 * folio_rotate_reclaimable but rate-limit the
-			 * messages but do not flag PageError like
-			 * the normal direct-to-bio case as it could
-			 * be temporary.
-			 */
-			set_page_dirty(page);
-			ClearPageReclaim(page);
-			pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
-					   page_file_offset(page));
-		}
-		end_page_writeback(page);
+		sio = mempool_alloc(sio_pool, GFP_NOIO);
+		init_sync_kiocb(&sio->iocb, swap_file);
+		sio->iocb.ki_complete = sio_write_complete;
+		sio->iocb.ki_pos = page_file_offset(page);
+		sio->bvec.bv_page = page;
+		sio->bvec.bv_len = PAGE_SIZE;
+		sio->bvec.bv_offset = 0;
+		iov_iter_bvec(&from, WRITE, &sio->bvec, 1, PAGE_SIZE);
+		ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
+		if (ret != -EIOCBQUEUED)
+			sio_write_complete(&sio->iocb, ret);
+
 		return ret;
 	}