diff mbox series

[03/18] MM: use ->swap_rw for reads from SWP_FS_OPS swap-space

Message ID 163969850289.20885.1044395970457169316.stgit@noble.brown (mailing list archive)
State New, archived
Headers show
Series Repair SWAP-over-NFS | expand

Commit Message

NeilBrown Dec. 16, 2021, 11:48 p.m. UTC
To submit an async read with ->swap_rw() we need to allocate
a structure to hold the kiocb and other details.  swap_readpage() cannot
handle transient failure, so create a mempool to provide the structures.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 mm/page_io.c  |   58 +++++++++++++++++++++++++++++++++++++++++++++++++++------
 mm/swap.h     |    1 +
 mm/swapfile.c |    5 +++++
 3 files changed, 58 insertions(+), 6 deletions(-)

Comments

Mark Hemment Dec. 20, 2021, 12:16 p.m. UTC | #1
On Thu, 16 Dec 2021 at 23:54, NeilBrown <neilb@suse.de> wrote:
>
> To submit an async read with ->swap_rw() we need to allocate
> a structure to hold the kiocb and other details.  swap_readpage() cannot
> handle transient failure, so create a mempool to provide the structures.
>
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  mm/page_io.c  |   58 +++++++++++++++++++++++++++++++++++++++++++++++++++------
>  mm/swap.h     |    1 +
>  mm/swapfile.c |    5 +++++
>  3 files changed, 58 insertions(+), 6 deletions(-)
...
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index f23d9ff21cf8..43539be38e68 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -2401,6 +2401,11 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
>                 if (ret < 0)
>                         return ret;
>                 sis->flags |= SWP_ACTIVATED;
> +               if ((sis->flags & SWP_FS_OPS) &&
> +                   sio_pool_init() != 0) {
> +                       destroy_swap_extents(sis);
> +                       return -ENOMEM;
> +               }
>                 return ret;
>         }

This code is called before 'swapon_mutex' is taken in the swapon code
path, so possible for multiple swapons to race here creating two (or
more) memory pools.

Mark
Christoph Hellwig Dec. 21, 2021, 8:40 a.m. UTC | #2
> +int sio_pool_init(void)
> +{
> +	if (!sio_pool)
> +		sio_pool = mempool_create_kmalloc_pool(
> +			SWAP_CLUSTER_MAX, sizeof(struct swap_iocb));

I can't see anything serializing access here, so we'll need a lock or
cmpxchg dance.

> +	if (sio_pool)
> +		return 0;
> +	else
> +		return -ENOMEM;

Nit: This would flow much nicer as:

	if (!sio_pool)
		return -ENOMEM;
	return 0;

>  int swap_readpage(struct page *page, bool synchronous)
>  {
>  	struct bio *bio;
> @@ -378,13 +412,25 @@ int swap_readpage(struct page *page, bool synchronous)
>  	}
>  
>  	if (data_race(sis->flags & SWP_FS_OPS)) {
> -		//struct file *swap_file = sis->swap_file;
> -		//struct address_space *mapping = swap_file->f_mapping;

This should not be left by the previous patch.  In fact I suspect the
part of the previous patch that adds ->swap_rw should probably be folded
into this patch.

> +		struct file *swap_file = sis->swap_file;
> +		struct address_space *mapping = swap_file->f_mapping;
> +		struct iov_iter from;
> +		struct swap_iocb *sio;
> +		loff_t pos = page_file_offset(page);
> +
> +		sio = mempool_alloc(sio_pool, GFP_KERNEL);
> +		init_sync_kiocb(&sio->iocb, swap_file);
> +		sio->iocb.ki_pos = pos;
> +		sio->iocb.ki_complete = sio_read_complete;
> +		sio->bvec.bv_page = page;
> +		sio->bvec.bv_len = PAGE_SIZE;
> +		sio->bvec.bv_offset = 0;
> +
> +		iov_iter_bvec(&from, READ, &sio->bvec, 1, PAGE_SIZE);
> +		ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
> +		if (ret != -EIOCBQUEUED)
> +			sio_read_complete(&sio->iocb, ret);
>  
>  		goto out;

I'd be tempted to split the SWP_FS_OPS into a helper to keep the
code tidy.
diff mbox series

Patch

diff --git a/mm/page_io.c b/mm/page_io.c
index a9fe5de5dc32..47d7e7866e33 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -283,6 +283,23 @@  static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
 #define bio_associate_blkg_from_page(bio, page)		do { } while (0)
 #endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
 
+struct swap_iocb {
+	struct kiocb		iocb;
+	struct bio_vec		bvec;
+};
+static mempool_t *sio_pool;
+
+int sio_pool_init(void)
+{
+	if (!sio_pool)
+		sio_pool = mempool_create_kmalloc_pool(
+			SWAP_CLUSTER_MAX, sizeof(struct swap_iocb));
+	if (sio_pool)
+		return 0;
+	else
+		return -ENOMEM;
+}
+
 int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		bio_end_io_t end_write_func)
 {
@@ -353,6 +370,23 @@  int __swap_writepage(struct page *page, struct writeback_control *wbc,
 	return 0;
 }
 
+static void sio_read_complete(struct kiocb *iocb, long ret)
+{
+	struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
+	struct page *page = sio->bvec.bv_page;
+
+	if (ret != 0 && ret != PAGE_SIZE) {
+		SetPageError(page);
+		ClearPageUptodate(page);
+		pr_alert_ratelimited("Read-error on swap-device\n");
+	} else {
+		SetPageUptodate(page);
+		count_vm_event(PSWPIN);
+	}
+	unlock_page(page);
+	mempool_free(sio, sio_pool);
+}
+
 int swap_readpage(struct page *page, bool synchronous)
 {
 	struct bio *bio;
@@ -378,13 +412,25 @@  int swap_readpage(struct page *page, bool synchronous)
 	}
 
 	if (data_race(sis->flags & SWP_FS_OPS)) {
-		//struct file *swap_file = sis->swap_file;
-		//struct address_space *mapping = swap_file->f_mapping;
+		struct file *swap_file = sis->swap_file;
+		struct address_space *mapping = swap_file->f_mapping;
+		struct iov_iter from;
+		struct swap_iocb *sio;
+		loff_t pos = page_file_offset(page);
+
+		sio = mempool_alloc(sio_pool, GFP_KERNEL);
+		init_sync_kiocb(&sio->iocb, swap_file);
+		sio->iocb.ki_pos = pos;
+		sio->iocb.ki_complete = sio_read_complete;
+		sio->bvec.bv_page = page;
+		sio->bvec.bv_len = PAGE_SIZE;
+		sio->bvec.bv_offset = 0;
+
+		iov_iter_bvec(&from, READ, &sio->bvec, 1, PAGE_SIZE);
+		ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
+		if (ret != -EIOCBQUEUED)
+			sio_read_complete(&sio->iocb, ret);
 
-		/* This needs to use ->swap_rw() */
-		ret = -EINVAL;
-		if (!ret)
-			count_vm_event(PSWPIN);
 		goto out;
 	}
 
diff --git a/mm/swap.h b/mm/swap.h
index 13e72a5023aa..128a1d3e5558 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -3,6 +3,7 @@ 
 #include <linux/blk_types.h> /* for bio_end_io_t */
 
 /* linux/mm/page_io.c */
+int sio_pool_init(void);
 int swap_readpage(struct page *page, bool do_poll);
 int swap_writepage(struct page *page, struct writeback_control *wbc);
 void end_swap_bio_write(struct bio *bio);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f23d9ff21cf8..43539be38e68 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2401,6 +2401,11 @@  static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
 		if (ret < 0)
 			return ret;
 		sis->flags |= SWP_ACTIVATED;
+		if ((sis->flags & SWP_FS_OPS) &&
+		    sio_pool_init() != 0) {
+			destroy_swap_extents(sis);
+			return -ENOMEM;
+		}
 		return ret;
 	}