diff mbox series

[V2,2/2] fs: Move swap_[de]activate to file_operations

Message ID 20191113004244.9981-3-ira.weiny@intel.com (mailing list archive)
State New, archived
Headers show
Series Move swap functions out of address space operations | expand

Commit Message

Ira Weiny Nov. 13, 2019, 12:42 a.m. UTC
From: Ira Weiny <ira.weiny@intel.com>

swap_activate() and swap_deactivate() have nothing to do with address
spaces.  We want to be able to change the address space operations on
the fly to allow changing inode flags dynamically.

Switching address space operations can be difficult to do reliably.[1]
Therefore, to simplify switching address space operations we reduce the
number of functions in those operations by moving swap_activate() and
swap_deactivate() out of the address space operations.

No functionality is changed with this patch.

This has been tested with XFS but not NFS, f2fs, or btrfs.

Also note we move some functions to facilitate compilation.  But there
are no functional changes are contained within those diffs.

[1] https://lkml.org/lkml/2019/11/11/572

Cc: Dave Chinner <david@fromorbit.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes from V0:
	Update cover letter.
	fix btrfs as per Andrew's comments
	change xfs_iomap_swapfile_activate() to xfs_file_swap_activate()

Changes from V1:
	Update recipients list


 fs/btrfs/file.c    | 341 +++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/inode.c   | 340 --------------------------------------------
 fs/f2fs/data.c     | 122 ----------------
 fs/f2fs/file.c     | 122 ++++++++++++++++
 fs/nfs/file.c      |   4 +-
 fs/xfs/xfs_aops.c  |  13 --
 fs/xfs/xfs_file.c  |  12 ++
 include/linux/fs.h |  10 +-
 mm/swapfile.c      |  12 +-
 9 files changed, 487 insertions(+), 489 deletions(-)

Comments

Darrick J. Wong Nov. 13, 2019, 12:57 a.m. UTC | #1
On Tue, Nov 12, 2019 at 04:42:44PM -0800, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> swap_activate() and swap_deactivate() have nothing to do with address
> spaces.  We want to be able to change the address space operations on
> the fly to allow changing inode flags dynamically.
> 
> Switching address space operations can be difficult to do reliably.[1]
> Therefore, to simplify switching address space operations we reduce the
> number of functions in those operations by moving swap_activate() and
> swap_deactivate() out of the address space operations.
> 
> No functionality is changed with this patch.
> 
> This has been tested with XFS but not NFS, f2fs, or btrfs.
> 
> Also note we move some functions to facilitate compilation.  But there
> are no functional changes are contained within those diffs.
> 
> [1] https://lkml.org/lkml/2019/11/11/572
> 
> Cc: Dave Chinner <david@fromorbit.com>
> Cc: linux-fsdevel@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes from V0:
> 	Update cover letter.
> 	fix btrfs as per Andrew's comments
> 	change xfs_iomap_swapfile_activate() to xfs_file_swap_activate()
> 
> Changes from V1:
> 	Update recipients list
> 
> 
>  fs/btrfs/file.c    | 341 +++++++++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/inode.c   | 340 --------------------------------------------
>  fs/f2fs/data.c     | 122 ----------------
>  fs/f2fs/file.c     | 122 ++++++++++++++++
>  fs/nfs/file.c      |   4 +-
>  fs/xfs/xfs_aops.c  |  13 --
>  fs/xfs/xfs_file.c  |  12 ++
>  include/linux/fs.h |  10 +-
>  mm/swapfile.c      |  12 +-

For /only/ the xfs and mm parts,
Acked-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

>  9 files changed, 487 insertions(+), 489 deletions(-)
> 
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 0cb43b682789..117502311fe0 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -16,6 +16,7 @@
>  #include <linux/btrfs.h>
>  #include <linux/uio.h>
>  #include <linux/iversion.h>
> +#include <linux/swap.h>
>  #include "ctree.h"
>  #include "disk-io.h"
>  #include "transaction.h"
> @@ -27,6 +28,7 @@
>  #include "qgroup.h"
>  #include "compression.h"
>  #include "delalloc-space.h"
> +#include "block-group.h"
>  
>  static struct kmem_cache *btrfs_inode_defrag_cachep;
>  /*
> @@ -3444,6 +3446,343 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
>  	return generic_file_open(inode, filp);
>  }
>  
> +#ifdef CONFIG_SWAP
> +/*
> + * Add an entry indicating a block group or device which is pinned by a
> + * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a
> + * negative errno on failure.
> + */
> +static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
> +				  bool is_block_group)
> +{
> +	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> +	struct btrfs_swapfile_pin *sp, *entry;
> +	struct rb_node **p;
> +	struct rb_node *parent = NULL;
> +
> +	sp = kmalloc(sizeof(*sp), GFP_NOFS);
> +	if (!sp)
> +		return -ENOMEM;
> +	sp->ptr = ptr;
> +	sp->inode = inode;
> +	sp->is_block_group = is_block_group;
> +
> +	spin_lock(&fs_info->swapfile_pins_lock);
> +	p = &fs_info->swapfile_pins.rb_node;
> +	while (*p) {
> +		parent = *p;
> +		entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
> +		if (sp->ptr < entry->ptr ||
> +		    (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
> +			p = &(*p)->rb_left;
> +		} else if (sp->ptr > entry->ptr ||
> +			   (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
> +			p = &(*p)->rb_right;
> +		} else {
> +			spin_unlock(&fs_info->swapfile_pins_lock);
> +			kfree(sp);
> +			return 1;
> +		}
> +	}
> +	rb_link_node(&sp->node, parent, p);
> +	rb_insert_color(&sp->node, &fs_info->swapfile_pins);
> +	spin_unlock(&fs_info->swapfile_pins_lock);
> +	return 0;
> +}
> +
> +/* Free all of the entries pinned by this swapfile. */
> +static void btrfs_free_swapfile_pins(struct inode *inode)
> +{
> +	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> +	struct btrfs_swapfile_pin *sp;
> +	struct rb_node *node, *next;
> +
> +	spin_lock(&fs_info->swapfile_pins_lock);
> +	node = rb_first(&fs_info->swapfile_pins);
> +	while (node) {
> +		next = rb_next(node);
> +		sp = rb_entry(node, struct btrfs_swapfile_pin, node);
> +		if (sp->inode == inode) {
> +			rb_erase(&sp->node, &fs_info->swapfile_pins);
> +			if (sp->is_block_group)
> +				btrfs_put_block_group(sp->ptr);
> +			kfree(sp);
> +		}
> +		node = next;
> +	}
> +	spin_unlock(&fs_info->swapfile_pins_lock);
> +}
> +
> +struct btrfs_swap_info {
> +	u64 start;
> +	u64 block_start;
> +	u64 block_len;
> +	u64 lowest_ppage;
> +	u64 highest_ppage;
> +	unsigned long nr_pages;
> +	int nr_extents;
> +};
> +
> +static int btrfs_add_swap_extent(struct swap_info_struct *sis,
> +				 struct btrfs_swap_info *bsi)
> +{
> +	unsigned long nr_pages;
> +	u64 first_ppage, first_ppage_reported, next_ppage;
> +	int ret;
> +
> +	first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
> +	next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
> +				PAGE_SIZE) >> PAGE_SHIFT;
> +
> +	if (first_ppage >= next_ppage)
> +		return 0;
> +	nr_pages = next_ppage - first_ppage;
> +
> +	first_ppage_reported = first_ppage;
> +	if (bsi->start == 0)
> +		first_ppage_reported++;
> +	if (bsi->lowest_ppage > first_ppage_reported)
> +		bsi->lowest_ppage = first_ppage_reported;
> +	if (bsi->highest_ppage < (next_ppage - 1))
> +		bsi->highest_ppage = next_ppage - 1;
> +
> +	ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
> +	if (ret < 0)
> +		return ret;
> +	bsi->nr_extents += ret;
> +	bsi->nr_pages += nr_pages;
> +	return 0;
> +}
> +
> +static void btrfs_swap_deactivate(struct file *file)
> +{
> +	struct inode *inode = file_inode(file);
> +
> +	btrfs_free_swapfile_pins(inode);
> +	atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
> +}
> +
> +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> +			       sector_t *span)
> +{
> +	struct inode *inode = file_inode(file);
> +	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> +	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
> +	struct extent_state *cached_state = NULL;
> +	struct extent_map *em = NULL;
> +	struct btrfs_device *device = NULL;
> +	struct btrfs_swap_info bsi = {
> +		.lowest_ppage = (sector_t)-1ULL,
> +	};
> +	int ret = 0;
> +	u64 isize;
> +	u64 start;
> +
> +	/*
> +	 * If the swap file was just created, make sure delalloc is done. If the
> +	 * file changes again after this, the user is doing something stupid and
> +	 * we don't really care.
> +	 */
> +	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * The inode is locked, so these flags won't change after we check them.
> +	 */
> +	if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
> +		btrfs_warn(fs_info, "swapfile must not be compressed");
> +		return -EINVAL;
> +	}
> +	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
> +		btrfs_warn(fs_info, "swapfile must not be copy-on-write");
> +		return -EINVAL;
> +	}
> +	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
> +		btrfs_warn(fs_info, "swapfile must not be checksummed");
> +		return -EINVAL;
> +	}
> +
> +	/*
> +	 * Balance or device remove/replace/resize can move stuff around from
> +	 * under us. The EXCL_OP flag makes sure they aren't running/won't run
> +	 * concurrently while we are mapping the swap extents, and
> +	 * fs_info->swapfile_pins prevents them from running while the swap file
> +	 * is active and moving the extents. Note that this also prevents a
> +	 * concurrent device add which isn't actually necessary, but it's not
> +	 * really worth the trouble to allow it.
> +	 */
> +	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
> +		btrfs_warn(fs_info,
> +	   "cannot activate swapfile while exclusive operation is running");
> +		return -EBUSY;
> +	}
> +	/*
> +	 * Snapshots can create extents which require COW even if NODATACOW is
> +	 * set. We use this counter to prevent snapshots. We must increment it
> +	 * before walking the extents because we don't want a concurrent
> +	 * snapshot to run after we've already checked the extents.
> +	 */
> +	atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
> +
> +	isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
> +
> +	lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
> +	start = 0;
> +	while (start < isize) {
> +		u64 logical_block_start, physical_block_start;
> +		struct btrfs_block_group_cache *bg;
> +		u64 len = isize - start;
> +
> +		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
> +		if (IS_ERR(em)) {
> +			ret = PTR_ERR(em);
> +			goto out;
> +		}
> +
> +		if (em->block_start == EXTENT_MAP_HOLE) {
> +			btrfs_warn(fs_info, "swapfile must not have holes");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +		if (em->block_start == EXTENT_MAP_INLINE) {
> +			/*
> +			 * It's unlikely we'll ever actually find ourselves
> +			 * here, as a file small enough to fit inline won't be
> +			 * big enough to store more than the swap header, but in
> +			 * case something changes in the future, let's catch it
> +			 * here rather than later.
> +			 */
> +			btrfs_warn(fs_info, "swapfile must not be inline");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
> +			btrfs_warn(fs_info, "swapfile must not be compressed");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		logical_block_start = em->block_start + (start - em->start);
> +		len = min(len, em->len - (start - em->start));
> +		free_extent_map(em);
> +		em = NULL;
> +
> +		ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
> +		if (ret < 0) {
> +			goto out;
> +		} else if (ret) {
> +			ret = 0;
> +		} else {
> +			btrfs_warn(fs_info,
> +				   "swapfile must not be copy-on-write");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
> +		if (IS_ERR(em)) {
> +			ret = PTR_ERR(em);
> +			goto out;
> +		}
> +
> +		if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> +			btrfs_warn(fs_info,
> +				   "swapfile must have single data profile");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		if (device == NULL) {
> +			device = em->map_lookup->stripes[0].dev;
> +			ret = btrfs_add_swapfile_pin(inode, device, false);
> +			if (ret == 1)
> +				ret = 0;
> +			else if (ret)
> +				goto out;
> +		} else if (device != em->map_lookup->stripes[0].dev) {
> +			btrfs_warn(fs_info, "swapfile must be on one device");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		physical_block_start = (em->map_lookup->stripes[0].physical +
> +					(logical_block_start - em->start));
> +		len = min(len, em->len - (logical_block_start - em->start));
> +		free_extent_map(em);
> +		em = NULL;
> +
> +		bg = btrfs_lookup_block_group(fs_info, logical_block_start);
> +		if (!bg) {
> +			btrfs_warn(fs_info,
> +			   "could not find block group containing swapfile");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		ret = btrfs_add_swapfile_pin(inode, bg, true);
> +		if (ret) {
> +			btrfs_put_block_group(bg);
> +			if (ret == 1)
> +				ret = 0;
> +			else
> +				goto out;
> +		}
> +
> +		if (bsi.block_len &&
> +		    bsi.block_start + bsi.block_len == physical_block_start) {
> +			bsi.block_len += len;
> +		} else {
> +			if (bsi.block_len) {
> +				ret = btrfs_add_swap_extent(sis, &bsi);
> +				if (ret)
> +					goto out;
> +			}
> +			bsi.start = start;
> +			bsi.block_start = physical_block_start;
> +			bsi.block_len = len;
> +		}
> +
> +		start += len;
> +	}
> +
> +	if (bsi.block_len)
> +		ret = btrfs_add_swap_extent(sis, &bsi);
> +
> +out:
> +	if (!IS_ERR_OR_NULL(em))
> +		free_extent_map(em);
> +
> +	unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
> +
> +	if (ret)
> +		btrfs_swap_deactivate(file);
> +
> +	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
> +
> +	if (ret)
> +		return ret;
> +
> +	if (device)
> +		sis->bdev = device->bdev;
> +	*span = bsi.highest_ppage - bsi.lowest_ppage + 1;
> +	sis->max = bsi.nr_pages;
> +	sis->pages = bsi.nr_pages - 1;
> +	sis->highest_bit = bsi.nr_pages - 1;
> +	return bsi.nr_extents;
> +}
> +#else
> +static void btrfs_swap_deactivate(struct file *file)
> +{
> +}
> +
> +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> +			       sector_t *span)
> +{
> +	return -EOPNOTSUPP;
> +}
> +#endif
> +
>  const struct file_operations btrfs_file_operations = {
>  	.llseek		= btrfs_file_llseek,
>  	.read_iter      = generic_file_read_iter,
> @@ -3459,6 +3798,8 @@ const struct file_operations btrfs_file_operations = {
>  	.compat_ioctl	= btrfs_compat_ioctl,
>  #endif
>  	.remap_file_range = btrfs_remap_file_range,
> +	.swap_activate	= btrfs_swap_activate,
> +	.swap_deactivate = btrfs_swap_deactivate,
>  };
>  
>  void __cold btrfs_auto_defrag_exit(void)
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 6d159df7b536..c11b86f2bf24 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -27,7 +27,6 @@
>  #include <linux/uio.h>
>  #include <linux/magic.h>
>  #include <linux/iversion.h>
> -#include <linux/swap.h>
>  #include <linux/sched/mm.h>
>  #include <asm/unaligned.h>
>  #include "misc.h"
> @@ -10629,343 +10628,6 @@ void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
>  	}
>  }
>  
> -#ifdef CONFIG_SWAP
> -/*
> - * Add an entry indicating a block group or device which is pinned by a
> - * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a
> - * negative errno on failure.
> - */
> -static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
> -				  bool is_block_group)
> -{
> -	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> -	struct btrfs_swapfile_pin *sp, *entry;
> -	struct rb_node **p;
> -	struct rb_node *parent = NULL;
> -
> -	sp = kmalloc(sizeof(*sp), GFP_NOFS);
> -	if (!sp)
> -		return -ENOMEM;
> -	sp->ptr = ptr;
> -	sp->inode = inode;
> -	sp->is_block_group = is_block_group;
> -
> -	spin_lock(&fs_info->swapfile_pins_lock);
> -	p = &fs_info->swapfile_pins.rb_node;
> -	while (*p) {
> -		parent = *p;
> -		entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
> -		if (sp->ptr < entry->ptr ||
> -		    (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
> -			p = &(*p)->rb_left;
> -		} else if (sp->ptr > entry->ptr ||
> -			   (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
> -			p = &(*p)->rb_right;
> -		} else {
> -			spin_unlock(&fs_info->swapfile_pins_lock);
> -			kfree(sp);
> -			return 1;
> -		}
> -	}
> -	rb_link_node(&sp->node, parent, p);
> -	rb_insert_color(&sp->node, &fs_info->swapfile_pins);
> -	spin_unlock(&fs_info->swapfile_pins_lock);
> -	return 0;
> -}
> -
> -/* Free all of the entries pinned by this swapfile. */
> -static void btrfs_free_swapfile_pins(struct inode *inode)
> -{
> -	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> -	struct btrfs_swapfile_pin *sp;
> -	struct rb_node *node, *next;
> -
> -	spin_lock(&fs_info->swapfile_pins_lock);
> -	node = rb_first(&fs_info->swapfile_pins);
> -	while (node) {
> -		next = rb_next(node);
> -		sp = rb_entry(node, struct btrfs_swapfile_pin, node);
> -		if (sp->inode == inode) {
> -			rb_erase(&sp->node, &fs_info->swapfile_pins);
> -			if (sp->is_block_group)
> -				btrfs_put_block_group(sp->ptr);
> -			kfree(sp);
> -		}
> -		node = next;
> -	}
> -	spin_unlock(&fs_info->swapfile_pins_lock);
> -}
> -
> -struct btrfs_swap_info {
> -	u64 start;
> -	u64 block_start;
> -	u64 block_len;
> -	u64 lowest_ppage;
> -	u64 highest_ppage;
> -	unsigned long nr_pages;
> -	int nr_extents;
> -};
> -
> -static int btrfs_add_swap_extent(struct swap_info_struct *sis,
> -				 struct btrfs_swap_info *bsi)
> -{
> -	unsigned long nr_pages;
> -	u64 first_ppage, first_ppage_reported, next_ppage;
> -	int ret;
> -
> -	first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
> -	next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
> -				PAGE_SIZE) >> PAGE_SHIFT;
> -
> -	if (first_ppage >= next_ppage)
> -		return 0;
> -	nr_pages = next_ppage - first_ppage;
> -
> -	first_ppage_reported = first_ppage;
> -	if (bsi->start == 0)
> -		first_ppage_reported++;
> -	if (bsi->lowest_ppage > first_ppage_reported)
> -		bsi->lowest_ppage = first_ppage_reported;
> -	if (bsi->highest_ppage < (next_ppage - 1))
> -		bsi->highest_ppage = next_ppage - 1;
> -
> -	ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
> -	if (ret < 0)
> -		return ret;
> -	bsi->nr_extents += ret;
> -	bsi->nr_pages += nr_pages;
> -	return 0;
> -}
> -
> -static void btrfs_swap_deactivate(struct file *file)
> -{
> -	struct inode *inode = file_inode(file);
> -
> -	btrfs_free_swapfile_pins(inode);
> -	atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
> -}
> -
> -static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> -			       sector_t *span)
> -{
> -	struct inode *inode = file_inode(file);
> -	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> -	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
> -	struct extent_state *cached_state = NULL;
> -	struct extent_map *em = NULL;
> -	struct btrfs_device *device = NULL;
> -	struct btrfs_swap_info bsi = {
> -		.lowest_ppage = (sector_t)-1ULL,
> -	};
> -	int ret = 0;
> -	u64 isize;
> -	u64 start;
> -
> -	/*
> -	 * If the swap file was just created, make sure delalloc is done. If the
> -	 * file changes again after this, the user is doing something stupid and
> -	 * we don't really care.
> -	 */
> -	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
> -	if (ret)
> -		return ret;
> -
> -	/*
> -	 * The inode is locked, so these flags won't change after we check them.
> -	 */
> -	if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
> -		btrfs_warn(fs_info, "swapfile must not be compressed");
> -		return -EINVAL;
> -	}
> -	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
> -		btrfs_warn(fs_info, "swapfile must not be copy-on-write");
> -		return -EINVAL;
> -	}
> -	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
> -		btrfs_warn(fs_info, "swapfile must not be checksummed");
> -		return -EINVAL;
> -	}
> -
> -	/*
> -	 * Balance or device remove/replace/resize can move stuff around from
> -	 * under us. The EXCL_OP flag makes sure they aren't running/won't run
> -	 * concurrently while we are mapping the swap extents, and
> -	 * fs_info->swapfile_pins prevents them from running while the swap file
> -	 * is active and moving the extents. Note that this also prevents a
> -	 * concurrent device add which isn't actually necessary, but it's not
> -	 * really worth the trouble to allow it.
> -	 */
> -	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
> -		btrfs_warn(fs_info,
> -	   "cannot activate swapfile while exclusive operation is running");
> -		return -EBUSY;
> -	}
> -	/*
> -	 * Snapshots can create extents which require COW even if NODATACOW is
> -	 * set. We use this counter to prevent snapshots. We must increment it
> -	 * before walking the extents because we don't want a concurrent
> -	 * snapshot to run after we've already checked the extents.
> -	 */
> -	atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
> -
> -	isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
> -
> -	lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
> -	start = 0;
> -	while (start < isize) {
> -		u64 logical_block_start, physical_block_start;
> -		struct btrfs_block_group_cache *bg;
> -		u64 len = isize - start;
> -
> -		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
> -		if (IS_ERR(em)) {
> -			ret = PTR_ERR(em);
> -			goto out;
> -		}
> -
> -		if (em->block_start == EXTENT_MAP_HOLE) {
> -			btrfs_warn(fs_info, "swapfile must not have holes");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -		if (em->block_start == EXTENT_MAP_INLINE) {
> -			/*
> -			 * It's unlikely we'll ever actually find ourselves
> -			 * here, as a file small enough to fit inline won't be
> -			 * big enough to store more than the swap header, but in
> -			 * case something changes in the future, let's catch it
> -			 * here rather than later.
> -			 */
> -			btrfs_warn(fs_info, "swapfile must not be inline");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
> -			btrfs_warn(fs_info, "swapfile must not be compressed");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -
> -		logical_block_start = em->block_start + (start - em->start);
> -		len = min(len, em->len - (start - em->start));
> -		free_extent_map(em);
> -		em = NULL;
> -
> -		ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
> -		if (ret < 0) {
> -			goto out;
> -		} else if (ret) {
> -			ret = 0;
> -		} else {
> -			btrfs_warn(fs_info,
> -				   "swapfile must not be copy-on-write");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -
> -		em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
> -		if (IS_ERR(em)) {
> -			ret = PTR_ERR(em);
> -			goto out;
> -		}
> -
> -		if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> -			btrfs_warn(fs_info,
> -				   "swapfile must have single data profile");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -
> -		if (device == NULL) {
> -			device = em->map_lookup->stripes[0].dev;
> -			ret = btrfs_add_swapfile_pin(inode, device, false);
> -			if (ret == 1)
> -				ret = 0;
> -			else if (ret)
> -				goto out;
> -		} else if (device != em->map_lookup->stripes[0].dev) {
> -			btrfs_warn(fs_info, "swapfile must be on one device");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -
> -		physical_block_start = (em->map_lookup->stripes[0].physical +
> -					(logical_block_start - em->start));
> -		len = min(len, em->len - (logical_block_start - em->start));
> -		free_extent_map(em);
> -		em = NULL;
> -
> -		bg = btrfs_lookup_block_group(fs_info, logical_block_start);
> -		if (!bg) {
> -			btrfs_warn(fs_info,
> -			   "could not find block group containing swapfile");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -
> -		ret = btrfs_add_swapfile_pin(inode, bg, true);
> -		if (ret) {
> -			btrfs_put_block_group(bg);
> -			if (ret == 1)
> -				ret = 0;
> -			else
> -				goto out;
> -		}
> -
> -		if (bsi.block_len &&
> -		    bsi.block_start + bsi.block_len == physical_block_start) {
> -			bsi.block_len += len;
> -		} else {
> -			if (bsi.block_len) {
> -				ret = btrfs_add_swap_extent(sis, &bsi);
> -				if (ret)
> -					goto out;
> -			}
> -			bsi.start = start;
> -			bsi.block_start = physical_block_start;
> -			bsi.block_len = len;
> -		}
> -
> -		start += len;
> -	}
> -
> -	if (bsi.block_len)
> -		ret = btrfs_add_swap_extent(sis, &bsi);
> -
> -out:
> -	if (!IS_ERR_OR_NULL(em))
> -		free_extent_map(em);
> -
> -	unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
> -
> -	if (ret)
> -		btrfs_swap_deactivate(file);
> -
> -	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
> -
> -	if (ret)
> -		return ret;
> -
> -	if (device)
> -		sis->bdev = device->bdev;
> -	*span = bsi.highest_ppage - bsi.lowest_ppage + 1;
> -	sis->max = bsi.nr_pages;
> -	sis->pages = bsi.nr_pages - 1;
> -	sis->highest_bit = bsi.nr_pages - 1;
> -	return bsi.nr_extents;
> -}
> -#else
> -static void btrfs_swap_deactivate(struct file *file)
> -{
> -}
> -
> -static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> -			       sector_t *span)
> -{
> -	return -EOPNOTSUPP;
> -}
> -#endif
> -
>  static const struct inode_operations btrfs_dir_inode_operations = {
>  	.getattr	= btrfs_getattr,
>  	.lookup		= btrfs_lookup,
> @@ -11032,8 +10694,6 @@ static const struct address_space_operations btrfs_aops = {
>  	.releasepage	= btrfs_releasepage,
>  	.set_page_dirty	= btrfs_set_page_dirty,
>  	.error_remove_page = generic_error_remove_page,
> -	.swap_activate	= btrfs_swap_activate,
> -	.swap_deactivate = btrfs_swap_deactivate,
>  };
>  
>  static const struct inode_operations btrfs_file_inode_operations = {
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 3c7777bfae17..04b2a8f44fa9 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -14,7 +14,6 @@
>  #include <linux/pagevec.h>
>  #include <linux/blkdev.h>
>  #include <linux/bio.h>
> -#include <linux/swap.h>
>  #include <linux/prefetch.h>
>  #include <linux/uio.h>
>  #include <linux/cleancache.h>
> @@ -3142,125 +3141,6 @@ int f2fs_migrate_page(struct address_space *mapping,
>  }
>  #endif
>  
> -#ifdef CONFIG_SWAP
> -/* Copied from generic_swapfile_activate() to check any holes */
> -static int check_swap_activate(struct file *swap_file, unsigned int max)
> -{
> -	struct inode *inode = swap_file->f_mapping->host;
> -	unsigned blocks_per_page;
> -	unsigned long page_no;
> -	unsigned blkbits;
> -	sector_t probe_block;
> -	sector_t last_block;
> -	sector_t lowest_block = -1;
> -	sector_t highest_block = 0;
> -
> -	blkbits = inode->i_blkbits;
> -	blocks_per_page = PAGE_SIZE >> blkbits;
> -
> -	/*
> -	 * Map all the blocks into the extent list.  This code doesn't try
> -	 * to be very smart.
> -	 */
> -	probe_block = 0;
> -	page_no = 0;
> -	last_block = i_size_read(inode) >> blkbits;
> -	while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
> -		unsigned block_in_page;
> -		sector_t first_block;
> -
> -		cond_resched();
> -
> -		first_block = bmap(inode, probe_block);
> -		if (first_block == 0)
> -			goto bad_bmap;
> -
> -		/*
> -		 * It must be PAGE_SIZE aligned on-disk
> -		 */
> -		if (first_block & (blocks_per_page - 1)) {
> -			probe_block++;
> -			goto reprobe;
> -		}
> -
> -		for (block_in_page = 1; block_in_page < blocks_per_page;
> -					block_in_page++) {
> -			sector_t block;
> -
> -			block = bmap(inode, probe_block + block_in_page);
> -			if (block == 0)
> -				goto bad_bmap;
> -			if (block != first_block + block_in_page) {
> -				/* Discontiguity */
> -				probe_block++;
> -				goto reprobe;
> -			}
> -		}
> -
> -		first_block >>= (PAGE_SHIFT - blkbits);
> -		if (page_no) {	/* exclude the header page */
> -			if (first_block < lowest_block)
> -				lowest_block = first_block;
> -			if (first_block > highest_block)
> -				highest_block = first_block;
> -		}
> -
> -		page_no++;
> -		probe_block += blocks_per_page;
> -reprobe:
> -		continue;
> -	}
> -	return 0;
> -
> -bad_bmap:
> -	pr_err("swapon: swapfile has holes\n");
> -	return -EINVAL;
> -}
> -
> -static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> -				sector_t *span)
> -{
> -	struct inode *inode = file_inode(file);
> -	int ret;
> -
> -	if (!S_ISREG(inode->i_mode))
> -		return -EINVAL;
> -
> -	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> -		return -EROFS;
> -
> -	ret = f2fs_convert_inline_inode(inode);
> -	if (ret)
> -		return ret;
> -
> -	ret = check_swap_activate(file, sis->max);
> -	if (ret)
> -		return ret;
> -
> -	set_inode_flag(inode, FI_PIN_FILE);
> -	f2fs_precache_extents(inode);
> -	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> -	return 0;
> -}
> -
> -static void f2fs_swap_deactivate(struct file *file)
> -{
> -	struct inode *inode = file_inode(file);
> -
> -	clear_inode_flag(inode, FI_PIN_FILE);
> -}
> -#else
> -static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> -				sector_t *span)
> -{
> -	return -EOPNOTSUPP;
> -}
> -
> -static void f2fs_swap_deactivate(struct file *file)
> -{
> -}
> -#endif
> -
>  const struct address_space_operations f2fs_dblock_aops = {
>  	.readpage	= f2fs_read_data_page,
>  	.readpages	= f2fs_read_data_pages,
> @@ -3273,8 +3153,6 @@ const struct address_space_operations f2fs_dblock_aops = {
>  	.releasepage	= f2fs_release_page,
>  	.direct_IO	= f2fs_direct_IO,
>  	.bmap		= f2fs_bmap,
> -	.swap_activate  = f2fs_swap_activate,
> -	.swap_deactivate = f2fs_swap_deactivate,
>  #ifdef CONFIG_MIGRATION
>  	.migratepage    = f2fs_migrate_page,
>  #endif
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 483ad22a0946..de7f9cf36689 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -21,6 +21,7 @@
>  #include <linux/uuid.h>
>  #include <linux/file.h>
>  #include <linux/nls.h>
> +#include <linux/swap.h>
>  
>  #include "f2fs.h"
>  #include "node.h"
> @@ -3466,6 +3467,125 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>  }
>  #endif
>  
> +#ifdef CONFIG_SWAP
> +/* Copied from generic_swapfile_activate() to check any holes */
> +static int check_swap_activate(struct file *swap_file, unsigned int max)
> +{
> +	struct inode *inode = swap_file->f_mapping->host;
> +	unsigned blocks_per_page;
> +	unsigned long page_no;
> +	unsigned blkbits;
> +	sector_t probe_block;
> +	sector_t last_block;
> +	sector_t lowest_block = -1;
> +	sector_t highest_block = 0;
> +
> +	blkbits = inode->i_blkbits;
> +	blocks_per_page = PAGE_SIZE >> blkbits;
> +
> +	/*
> +	 * Map all the blocks into the extent list.  This code doesn't try
> +	 * to be very smart.
> +	 */
> +	probe_block = 0;
> +	page_no = 0;
> +	last_block = i_size_read(inode) >> blkbits;
> +	while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
> +		unsigned block_in_page;
> +		sector_t first_block;
> +
> +		cond_resched();
> +
> +		first_block = bmap(inode, probe_block);
> +		if (first_block == 0)
> +			goto bad_bmap;
> +
> +		/*
> +		 * It must be PAGE_SIZE aligned on-disk
> +		 */
> +		if (first_block & (blocks_per_page - 1)) {
> +			probe_block++;
> +			goto reprobe;
> +		}
> +
> +		for (block_in_page = 1; block_in_page < blocks_per_page;
> +					block_in_page++) {
> +			sector_t block;
> +
> +			block = bmap(inode, probe_block + block_in_page);
> +			if (block == 0)
> +				goto bad_bmap;
> +			if (block != first_block + block_in_page) {
> +				/* Discontiguity */
> +				probe_block++;
> +				goto reprobe;
> +			}
> +		}
> +
> +		first_block >>= (PAGE_SHIFT - blkbits);
> +		if (page_no) {	/* exclude the header page */
> +			if (first_block < lowest_block)
> +				lowest_block = first_block;
> +			if (first_block > highest_block)
> +				highest_block = first_block;
> +		}
> +
> +		page_no++;
> +		probe_block += blocks_per_page;
> +reprobe:
> +		continue;
> +	}
> +	return 0;
> +
> +bad_bmap:
> +	pr_err("swapon: swapfile has holes\n");
> +	return -EINVAL;
> +}
> +
> +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> +				sector_t *span)
> +{
> +	struct inode *inode = file_inode(file);
> +	int ret;
> +
> +	if (!S_ISREG(inode->i_mode))
> +		return -EINVAL;
> +
> +	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> +		return -EROFS;
> +
> +	ret = f2fs_convert_inline_inode(inode);
> +	if (ret)
> +		return ret;
> +
> +	ret = check_swap_activate(file, sis->max);
> +	if (ret)
> +		return ret;
> +
> +	set_inode_flag(inode, FI_PIN_FILE);
> +	f2fs_precache_extents(inode);
> +	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> +	return 0;
> +}
> +
> +static void f2fs_swap_deactivate(struct file *file)
> +{
> +	struct inode *inode = file_inode(file);
> +
> +	clear_inode_flag(inode, FI_PIN_FILE);
> +}
> +#else
> +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> +				sector_t *span)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +static void f2fs_swap_deactivate(struct file *file)
> +{
> +}
> +#endif
> +
>  const struct file_operations f2fs_file_operations = {
>  	.llseek		= f2fs_llseek,
>  	.read_iter	= generic_file_read_iter,
> @@ -3482,4 +3602,6 @@ const struct file_operations f2fs_file_operations = {
>  #endif
>  	.splice_read	= generic_file_splice_read,
>  	.splice_write	= iter_file_splice_write,
> +	.swap_activate  = f2fs_swap_activate,
> +	.swap_deactivate = f2fs_swap_deactivate,
>  };
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index 95dc90570786..1f82f92185d6 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -520,8 +520,6 @@ const struct address_space_operations nfs_file_aops = {
>  	.launder_page = nfs_launder_page,
>  	.is_dirty_writeback = nfs_check_dirty_writeback,
>  	.error_remove_page = generic_error_remove_page,
> -	.swap_activate = nfs_swap_activate,
> -	.swap_deactivate = nfs_swap_deactivate,
>  };
>  
>  /*
> @@ -847,5 +845,7 @@ const struct file_operations nfs_file_operations = {
>  	.splice_write	= iter_file_splice_write,
>  	.check_flags	= nfs_check_flags,
>  	.setlease	= simple_nosetlease,
> +	.swap_activate = nfs_swap_activate,
> +	.swap_deactivate = nfs_swap_deactivate,
>  };
>  EXPORT_SYMBOL_GPL(nfs_file_operations);
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 3a688eb5c5ae..99f578a9ed90 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -631,17 +631,6 @@ xfs_vm_readpages(
>  	return iomap_readpages(mapping, pages, nr_pages, &xfs_read_iomap_ops);
>  }
>  
> -static int
> -xfs_iomap_swapfile_activate(
> -	struct swap_info_struct		*sis,
> -	struct file			*swap_file,
> -	sector_t			*span)
> -{
> -	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
> -	return iomap_swapfile_activate(sis, swap_file, span,
> -			&xfs_read_iomap_ops);
> -}
> -
>  const struct address_space_operations xfs_address_space_operations = {
>  	.readpage		= xfs_vm_readpage,
>  	.readpages		= xfs_vm_readpages,
> @@ -655,7 +644,6 @@ const struct address_space_operations xfs_address_space_operations = {
>  	.migratepage		= iomap_migrate_page,
>  	.is_partially_uptodate  = iomap_is_partially_uptodate,
>  	.error_remove_page	= generic_error_remove_page,
> -	.swap_activate		= xfs_iomap_swapfile_activate,
>  };
>  
>  const struct address_space_operations xfs_dax_aops = {
> @@ -663,5 +651,4 @@ const struct address_space_operations xfs_dax_aops = {
>  	.direct_IO		= noop_direct_IO,
>  	.set_page_dirty		= noop_set_page_dirty,
>  	.invalidatepage		= noop_invalidatepage,
> -	.swap_activate		= xfs_iomap_swapfile_activate,
>  };
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 865543e41fb4..225f58561f06 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1294,6 +1294,17 @@ xfs_file_mmap(
>  	return 0;
>  }
>  
> +static int
> +xfs_file_swap_activate(
> +	struct swap_info_struct		*sis,
> +	struct file			*swap_file,
> +	sector_t			*span)
> +{
> +	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
> +	return iomap_swapfile_activate(sis, swap_file, span,
> +			&xfs_read_iomap_ops);
> +}
> +
>  const struct file_operations xfs_file_operations = {
>  	.llseek		= xfs_file_llseek,
>  	.read_iter	= xfs_file_read_iter,
> @@ -1314,6 +1325,7 @@ const struct file_operations xfs_file_operations = {
>  	.fallocate	= xfs_file_fallocate,
>  	.fadvise	= xfs_file_fadvise,
>  	.remap_file_range = xfs_file_remap_range,
> +	.swap_activate	= xfs_file_swap_activate,
>  };
>  
>  const struct file_operations xfs_dir_file_operations = {
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 83e011e0df7f..1175815da3df 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -402,11 +402,6 @@ struct address_space_operations {
>  					unsigned long);
>  	void (*is_dirty_writeback) (struct page *, bool *, bool *);
>  	int (*error_remove_page)(struct address_space *, struct page *);
> -
> -	/* swapfile support */
> -	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
> -				sector_t *span);
> -	void (*swap_deactivate)(struct file *file);
>  };
>  
>  extern const struct address_space_operations empty_aops;
> @@ -1858,6 +1853,11 @@ struct file_operations {
>  				   struct file *file_out, loff_t pos_out,
>  				   loff_t len, unsigned int remap_flags);
>  	int (*fadvise)(struct file *, loff_t, loff_t, int);
> +
> +	/* swapfile support */
> +	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
> +				sector_t *span);
> +	void (*swap_deactivate)(struct file *file);
>  } __randomize_layout;
>  
>  struct inode_operations {
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index bb3261d45b6a..d2de8d668708 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -2293,11 +2293,10 @@ static void destroy_swap_extents(struct swap_info_struct *sis)
>  
>  	if (sis->flags & SWP_ACTIVATED) {
>  		struct file *swap_file = sis->swap_file;
> -		struct address_space *mapping = swap_file->f_mapping;
>  
>  		sis->flags &= ~SWP_ACTIVATED;
> -		if (mapping->a_ops->swap_deactivate)
> -			mapping->a_ops->swap_deactivate(swap_file);
> +		if (swap_file->f_op->swap_deactivate)
> +			swap_file->f_op->swap_deactivate(swap_file);
>  	}
>  }
>  
> @@ -2381,8 +2380,7 @@ EXPORT_SYMBOL_GPL(add_swap_extent);
>  static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
>  {
>  	struct file *swap_file = sis->swap_file;
> -	struct address_space *mapping = swap_file->f_mapping;
> -	struct inode *inode = mapping->host;
> +	struct inode *inode = swap_file->f_mapping->host;
>  	int ret;
>  
>  	if (S_ISBLK(inode->i_mode)) {
> @@ -2391,8 +2389,8 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
>  		return ret;
>  	}
>  
> -	if (mapping->a_ops->swap_activate) {
> -		ret = mapping->a_ops->swap_activate(sis, swap_file, span);
> +	if (swap_file->f_op->swap_activate) {
> +		ret = swap_file->f_op->swap_activate(sis, swap_file, span);
>  		if (ret >= 0)
>  			sis->flags |= SWP_ACTIVATED;
>  		if (!ret) {
> -- 
> 2.21.0
>
David Sterba Nov. 13, 2019, 3:25 p.m. UTC | #2
On Tue, Nov 12, 2019 at 04:42:44PM -0800, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> swap_activate() and swap_deactivate() have nothing to do with address
> spaces.  We want to be able to change the address space operations on
> the fly to allow changing inode flags dynamically.
> 
> Switching address space operations can be difficult to do reliably.[1]
> Therefore, to simplify switching address space operations we reduce the
> number of functions in those operations by moving swap_activate() and
> swap_deactivate() out of the address space operations.
> 
> No functionality is changed with this patch.
> 
> This has been tested with XFS but not NFS, f2fs, or btrfs.
> 
> Also note we move some functions to facilitate compilation.  But there
> are no functional changes are contained within those diffs.
> 
> [1] https://lkml.org/lkml/2019/11/11/572
> 
> Cc: Dave Chinner <david@fromorbit.com>
> Cc: linux-fsdevel@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes from V0:
> 	Update cover letter.
> 	fix btrfs as per Andrew's comments
> 	change xfs_iomap_swapfile_activate() to xfs_file_swap_activate()
> 
> Changes from V1:
> 	Update recipients list
> 
> 
>  fs/btrfs/file.c    | 341 +++++++++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/inode.c   | 340 --------------------------------------------

For the btrfs part

Acked-by: David Sterba <dsterba@suse.com>

There's going to be a minor conflict with current 5.5 queue, the
resolution is simple rename of btrfs_block_group_cache to btrfs_block_group.
Darrick J. Wong Nov. 14, 2019, 9:03 p.m. UTC | #3
On Tue, Nov 12, 2019 at 04:42:44PM -0800, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> swap_activate() and swap_deactivate() have nothing to do with address
> spaces.  We want to be able to change the address space operations on
> the fly to allow changing inode flags dynamically.
> 
> Switching address space operations can be difficult to do reliably.[1]
> Therefore, to simplify switching address space operations we reduce the
> number of functions in those operations by moving swap_activate() and
> swap_deactivate() out of the address space operations.
> 
> No functionality is changed with this patch.
> 
> This has been tested with XFS but not NFS, f2fs, or btrfs.
> 
> Also note we move some functions to facilitate compilation.  But there
> are no functional changes are contained within those diffs.
> 
> [1] https://lkml.org/lkml/2019/11/11/572
> 
> Cc: Dave Chinner <david@fromorbit.com>
> Cc: linux-fsdevel@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>

Replace previous ack with:

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> 
> ---
> Changes from V0:
> 	Update cover letter.
> 	fix btrfs as per Andrew's comments
> 	change xfs_iomap_swapfile_activate() to xfs_file_swap_activate()
> 
> Changes from V1:
> 	Update recipients list
> 
> 
>  fs/btrfs/file.c    | 341 +++++++++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/inode.c   | 340 --------------------------------------------
>  fs/f2fs/data.c     | 122 ----------------
>  fs/f2fs/file.c     | 122 ++++++++++++++++
>  fs/nfs/file.c      |   4 +-
>  fs/xfs/xfs_aops.c  |  13 --
>  fs/xfs/xfs_file.c  |  12 ++
>  include/linux/fs.h |  10 +-
>  mm/swapfile.c      |  12 +-
>  9 files changed, 487 insertions(+), 489 deletions(-)
> 
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 0cb43b682789..117502311fe0 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -16,6 +16,7 @@
>  #include <linux/btrfs.h>
>  #include <linux/uio.h>
>  #include <linux/iversion.h>
> +#include <linux/swap.h>
>  #include "ctree.h"
>  #include "disk-io.h"
>  #include "transaction.h"
> @@ -27,6 +28,7 @@
>  #include "qgroup.h"
>  #include "compression.h"
>  #include "delalloc-space.h"
> +#include "block-group.h"
>  
>  static struct kmem_cache *btrfs_inode_defrag_cachep;
>  /*
> @@ -3444,6 +3446,343 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
>  	return generic_file_open(inode, filp);
>  }
>  
> +#ifdef CONFIG_SWAP
> +/*
> + * Add an entry indicating a block group or device which is pinned by a
> + * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a
> + * negative errno on failure.
> + */
> +static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
> +				  bool is_block_group)
> +{
> +	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> +	struct btrfs_swapfile_pin *sp, *entry;
> +	struct rb_node **p;
> +	struct rb_node *parent = NULL;
> +
> +	sp = kmalloc(sizeof(*sp), GFP_NOFS);
> +	if (!sp)
> +		return -ENOMEM;
> +	sp->ptr = ptr;
> +	sp->inode = inode;
> +	sp->is_block_group = is_block_group;
> +
> +	spin_lock(&fs_info->swapfile_pins_lock);
> +	p = &fs_info->swapfile_pins.rb_node;
> +	while (*p) {
> +		parent = *p;
> +		entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
> +		if (sp->ptr < entry->ptr ||
> +		    (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
> +			p = &(*p)->rb_left;
> +		} else if (sp->ptr > entry->ptr ||
> +			   (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
> +			p = &(*p)->rb_right;
> +		} else {
> +			spin_unlock(&fs_info->swapfile_pins_lock);
> +			kfree(sp);
> +			return 1;
> +		}
> +	}
> +	rb_link_node(&sp->node, parent, p);
> +	rb_insert_color(&sp->node, &fs_info->swapfile_pins);
> +	spin_unlock(&fs_info->swapfile_pins_lock);
> +	return 0;
> +}
> +
> +/* Free all of the entries pinned by this swapfile. */
> +static void btrfs_free_swapfile_pins(struct inode *inode)
> +{
> +	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> +	struct btrfs_swapfile_pin *sp;
> +	struct rb_node *node, *next;
> +
> +	spin_lock(&fs_info->swapfile_pins_lock);
> +	node = rb_first(&fs_info->swapfile_pins);
> +	while (node) {
> +		next = rb_next(node);
> +		sp = rb_entry(node, struct btrfs_swapfile_pin, node);
> +		if (sp->inode == inode) {
> +			rb_erase(&sp->node, &fs_info->swapfile_pins);
> +			if (sp->is_block_group)
> +				btrfs_put_block_group(sp->ptr);
> +			kfree(sp);
> +		}
> +		node = next;
> +	}
> +	spin_unlock(&fs_info->swapfile_pins_lock);
> +}
> +
> +struct btrfs_swap_info {
> +	u64 start;
> +	u64 block_start;
> +	u64 block_len;
> +	u64 lowest_ppage;
> +	u64 highest_ppage;
> +	unsigned long nr_pages;
> +	int nr_extents;
> +};
> +
> +static int btrfs_add_swap_extent(struct swap_info_struct *sis,
> +				 struct btrfs_swap_info *bsi)
> +{
> +	unsigned long nr_pages;
> +	u64 first_ppage, first_ppage_reported, next_ppage;
> +	int ret;
> +
> +	first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
> +	next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
> +				PAGE_SIZE) >> PAGE_SHIFT;
> +
> +	if (first_ppage >= next_ppage)
> +		return 0;
> +	nr_pages = next_ppage - first_ppage;
> +
> +	first_ppage_reported = first_ppage;
> +	if (bsi->start == 0)
> +		first_ppage_reported++;
> +	if (bsi->lowest_ppage > first_ppage_reported)
> +		bsi->lowest_ppage = first_ppage_reported;
> +	if (bsi->highest_ppage < (next_ppage - 1))
> +		bsi->highest_ppage = next_ppage - 1;
> +
> +	ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
> +	if (ret < 0)
> +		return ret;
> +	bsi->nr_extents += ret;
> +	bsi->nr_pages += nr_pages;
> +	return 0;
> +}
> +
> +static void btrfs_swap_deactivate(struct file *file)
> +{
> +	struct inode *inode = file_inode(file);
> +
> +	btrfs_free_swapfile_pins(inode);
> +	atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
> +}
> +
> +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> +			       sector_t *span)
> +{
> +	struct inode *inode = file_inode(file);
> +	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> +	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
> +	struct extent_state *cached_state = NULL;
> +	struct extent_map *em = NULL;
> +	struct btrfs_device *device = NULL;
> +	struct btrfs_swap_info bsi = {
> +		.lowest_ppage = (sector_t)-1ULL,
> +	};
> +	int ret = 0;
> +	u64 isize;
> +	u64 start;
> +
> +	/*
> +	 * If the swap file was just created, make sure delalloc is done. If the
> +	 * file changes again after this, the user is doing something stupid and
> +	 * we don't really care.
> +	 */
> +	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * The inode is locked, so these flags won't change after we check them.
> +	 */
> +	if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
> +		btrfs_warn(fs_info, "swapfile must not be compressed");
> +		return -EINVAL;
> +	}
> +	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
> +		btrfs_warn(fs_info, "swapfile must not be copy-on-write");
> +		return -EINVAL;
> +	}
> +	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
> +		btrfs_warn(fs_info, "swapfile must not be checksummed");
> +		return -EINVAL;
> +	}
> +
> +	/*
> +	 * Balance or device remove/replace/resize can move stuff around from
> +	 * under us. The EXCL_OP flag makes sure they aren't running/won't run
> +	 * concurrently while we are mapping the swap extents, and
> +	 * fs_info->swapfile_pins prevents them from running while the swap file
> +	 * is active and moving the extents. Note that this also prevents a
> +	 * concurrent device add which isn't actually necessary, but it's not
> +	 * really worth the trouble to allow it.
> +	 */
> +	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
> +		btrfs_warn(fs_info,
> +	   "cannot activate swapfile while exclusive operation is running");
> +		return -EBUSY;
> +	}
> +	/*
> +	 * Snapshots can create extents which require COW even if NODATACOW is
> +	 * set. We use this counter to prevent snapshots. We must increment it
> +	 * before walking the extents because we don't want a concurrent
> +	 * snapshot to run after we've already checked the extents.
> +	 */
> +	atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
> +
> +	isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
> +
> +	lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
> +	start = 0;
> +	while (start < isize) {
> +		u64 logical_block_start, physical_block_start;
> +		struct btrfs_block_group_cache *bg;
> +		u64 len = isize - start;
> +
> +		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
> +		if (IS_ERR(em)) {
> +			ret = PTR_ERR(em);
> +			goto out;
> +		}
> +
> +		if (em->block_start == EXTENT_MAP_HOLE) {
> +			btrfs_warn(fs_info, "swapfile must not have holes");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +		if (em->block_start == EXTENT_MAP_INLINE) {
> +			/*
> +			 * It's unlikely we'll ever actually find ourselves
> +			 * here, as a file small enough to fit inline won't be
> +			 * big enough to store more than the swap header, but in
> +			 * case something changes in the future, let's catch it
> +			 * here rather than later.
> +			 */
> +			btrfs_warn(fs_info, "swapfile must not be inline");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
> +			btrfs_warn(fs_info, "swapfile must not be compressed");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		logical_block_start = em->block_start + (start - em->start);
> +		len = min(len, em->len - (start - em->start));
> +		free_extent_map(em);
> +		em = NULL;
> +
> +		ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
> +		if (ret < 0) {
> +			goto out;
> +		} else if (ret) {
> +			ret = 0;
> +		} else {
> +			btrfs_warn(fs_info,
> +				   "swapfile must not be copy-on-write");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
> +		if (IS_ERR(em)) {
> +			ret = PTR_ERR(em);
> +			goto out;
> +		}
> +
> +		if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> +			btrfs_warn(fs_info,
> +				   "swapfile must have single data profile");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		if (device == NULL) {
> +			device = em->map_lookup->stripes[0].dev;
> +			ret = btrfs_add_swapfile_pin(inode, device, false);
> +			if (ret == 1)
> +				ret = 0;
> +			else if (ret)
> +				goto out;
> +		} else if (device != em->map_lookup->stripes[0].dev) {
> +			btrfs_warn(fs_info, "swapfile must be on one device");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		physical_block_start = (em->map_lookup->stripes[0].physical +
> +					(logical_block_start - em->start));
> +		len = min(len, em->len - (logical_block_start - em->start));
> +		free_extent_map(em);
> +		em = NULL;
> +
> +		bg = btrfs_lookup_block_group(fs_info, logical_block_start);
> +		if (!bg) {
> +			btrfs_warn(fs_info,
> +			   "could not find block group containing swapfile");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		ret = btrfs_add_swapfile_pin(inode, bg, true);
> +		if (ret) {
> +			btrfs_put_block_group(bg);
> +			if (ret == 1)
> +				ret = 0;
> +			else
> +				goto out;
> +		}
> +
> +		if (bsi.block_len &&
> +		    bsi.block_start + bsi.block_len == physical_block_start) {
> +			bsi.block_len += len;
> +		} else {
> +			if (bsi.block_len) {
> +				ret = btrfs_add_swap_extent(sis, &bsi);
> +				if (ret)
> +					goto out;
> +			}
> +			bsi.start = start;
> +			bsi.block_start = physical_block_start;
> +			bsi.block_len = len;
> +		}
> +
> +		start += len;
> +	}
> +
> +	if (bsi.block_len)
> +		ret = btrfs_add_swap_extent(sis, &bsi);
> +
> +out:
> +	if (!IS_ERR_OR_NULL(em))
> +		free_extent_map(em);
> +
> +	unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
> +
> +	if (ret)
> +		btrfs_swap_deactivate(file);
> +
> +	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
> +
> +	if (ret)
> +		return ret;
> +
> +	if (device)
> +		sis->bdev = device->bdev;
> +	*span = bsi.highest_ppage - bsi.lowest_ppage + 1;
> +	sis->max = bsi.nr_pages;
> +	sis->pages = bsi.nr_pages - 1;
> +	sis->highest_bit = bsi.nr_pages - 1;
> +	return bsi.nr_extents;
> +}
> +#else
> +static void btrfs_swap_deactivate(struct file *file)
> +{
> +}
> +
> +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> +			       sector_t *span)
> +{
> +	return -EOPNOTSUPP;
> +}
> +#endif
> +
>  const struct file_operations btrfs_file_operations = {
>  	.llseek		= btrfs_file_llseek,
>  	.read_iter      = generic_file_read_iter,
> @@ -3459,6 +3798,8 @@ const struct file_operations btrfs_file_operations = {
>  	.compat_ioctl	= btrfs_compat_ioctl,
>  #endif
>  	.remap_file_range = btrfs_remap_file_range,
> +	.swap_activate	= btrfs_swap_activate,
> +	.swap_deactivate = btrfs_swap_deactivate,
>  };
>  
>  void __cold btrfs_auto_defrag_exit(void)
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 6d159df7b536..c11b86f2bf24 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -27,7 +27,6 @@
>  #include <linux/uio.h>
>  #include <linux/magic.h>
>  #include <linux/iversion.h>
> -#include <linux/swap.h>
>  #include <linux/sched/mm.h>
>  #include <asm/unaligned.h>
>  #include "misc.h"
> @@ -10629,343 +10628,6 @@ void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
>  	}
>  }
>  
> -#ifdef CONFIG_SWAP
> -/*
> - * Add an entry indicating a block group or device which is pinned by a
> - * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a
> - * negative errno on failure.
> - */
> -static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
> -				  bool is_block_group)
> -{
> -	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> -	struct btrfs_swapfile_pin *sp, *entry;
> -	struct rb_node **p;
> -	struct rb_node *parent = NULL;
> -
> -	sp = kmalloc(sizeof(*sp), GFP_NOFS);
> -	if (!sp)
> -		return -ENOMEM;
> -	sp->ptr = ptr;
> -	sp->inode = inode;
> -	sp->is_block_group = is_block_group;
> -
> -	spin_lock(&fs_info->swapfile_pins_lock);
> -	p = &fs_info->swapfile_pins.rb_node;
> -	while (*p) {
> -		parent = *p;
> -		entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
> -		if (sp->ptr < entry->ptr ||
> -		    (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
> -			p = &(*p)->rb_left;
> -		} else if (sp->ptr > entry->ptr ||
> -			   (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
> -			p = &(*p)->rb_right;
> -		} else {
> -			spin_unlock(&fs_info->swapfile_pins_lock);
> -			kfree(sp);
> -			return 1;
> -		}
> -	}
> -	rb_link_node(&sp->node, parent, p);
> -	rb_insert_color(&sp->node, &fs_info->swapfile_pins);
> -	spin_unlock(&fs_info->swapfile_pins_lock);
> -	return 0;
> -}
> -
> -/* Free all of the entries pinned by this swapfile. */
> -static void btrfs_free_swapfile_pins(struct inode *inode)
> -{
> -	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> -	struct btrfs_swapfile_pin *sp;
> -	struct rb_node *node, *next;
> -
> -	spin_lock(&fs_info->swapfile_pins_lock);
> -	node = rb_first(&fs_info->swapfile_pins);
> -	while (node) {
> -		next = rb_next(node);
> -		sp = rb_entry(node, struct btrfs_swapfile_pin, node);
> -		if (sp->inode == inode) {
> -			rb_erase(&sp->node, &fs_info->swapfile_pins);
> -			if (sp->is_block_group)
> -				btrfs_put_block_group(sp->ptr);
> -			kfree(sp);
> -		}
> -		node = next;
> -	}
> -	spin_unlock(&fs_info->swapfile_pins_lock);
> -}
> -
> -struct btrfs_swap_info {
> -	u64 start;
> -	u64 block_start;
> -	u64 block_len;
> -	u64 lowest_ppage;
> -	u64 highest_ppage;
> -	unsigned long nr_pages;
> -	int nr_extents;
> -};
> -
> -static int btrfs_add_swap_extent(struct swap_info_struct *sis,
> -				 struct btrfs_swap_info *bsi)
> -{
> -	unsigned long nr_pages;
> -	u64 first_ppage, first_ppage_reported, next_ppage;
> -	int ret;
> -
> -	first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
> -	next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
> -				PAGE_SIZE) >> PAGE_SHIFT;
> -
> -	if (first_ppage >= next_ppage)
> -		return 0;
> -	nr_pages = next_ppage - first_ppage;
> -
> -	first_ppage_reported = first_ppage;
> -	if (bsi->start == 0)
> -		first_ppage_reported++;
> -	if (bsi->lowest_ppage > first_ppage_reported)
> -		bsi->lowest_ppage = first_ppage_reported;
> -	if (bsi->highest_ppage < (next_ppage - 1))
> -		bsi->highest_ppage = next_ppage - 1;
> -
> -	ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
> -	if (ret < 0)
> -		return ret;
> -	bsi->nr_extents += ret;
> -	bsi->nr_pages += nr_pages;
> -	return 0;
> -}
> -
> -static void btrfs_swap_deactivate(struct file *file)
> -{
> -	struct inode *inode = file_inode(file);
> -
> -	btrfs_free_swapfile_pins(inode);
> -	atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
> -}
> -
> -static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> -			       sector_t *span)
> -{
> -	struct inode *inode = file_inode(file);
> -	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> -	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
> -	struct extent_state *cached_state = NULL;
> -	struct extent_map *em = NULL;
> -	struct btrfs_device *device = NULL;
> -	struct btrfs_swap_info bsi = {
> -		.lowest_ppage = (sector_t)-1ULL,
> -	};
> -	int ret = 0;
> -	u64 isize;
> -	u64 start;
> -
> -	/*
> -	 * If the swap file was just created, make sure delalloc is done. If the
> -	 * file changes again after this, the user is doing something stupid and
> -	 * we don't really care.
> -	 */
> -	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
> -	if (ret)
> -		return ret;
> -
> -	/*
> -	 * The inode is locked, so these flags won't change after we check them.
> -	 */
> -	if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
> -		btrfs_warn(fs_info, "swapfile must not be compressed");
> -		return -EINVAL;
> -	}
> -	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
> -		btrfs_warn(fs_info, "swapfile must not be copy-on-write");
> -		return -EINVAL;
> -	}
> -	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
> -		btrfs_warn(fs_info, "swapfile must not be checksummed");
> -		return -EINVAL;
> -	}
> -
> -	/*
> -	 * Balance or device remove/replace/resize can move stuff around from
> -	 * under us. The EXCL_OP flag makes sure they aren't running/won't run
> -	 * concurrently while we are mapping the swap extents, and
> -	 * fs_info->swapfile_pins prevents them from running while the swap file
> -	 * is active and moving the extents. Note that this also prevents a
> -	 * concurrent device add which isn't actually necessary, but it's not
> -	 * really worth the trouble to allow it.
> -	 */
> -	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
> -		btrfs_warn(fs_info,
> -	   "cannot activate swapfile while exclusive operation is running");
> -		return -EBUSY;
> -	}
> -	/*
> -	 * Snapshots can create extents which require COW even if NODATACOW is
> -	 * set. We use this counter to prevent snapshots. We must increment it
> -	 * before walking the extents because we don't want a concurrent
> -	 * snapshot to run after we've already checked the extents.
> -	 */
> -	atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
> -
> -	isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
> -
> -	lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
> -	start = 0;
> -	while (start < isize) {
> -		u64 logical_block_start, physical_block_start;
> -		struct btrfs_block_group_cache *bg;
> -		u64 len = isize - start;
> -
> -		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
> -		if (IS_ERR(em)) {
> -			ret = PTR_ERR(em);
> -			goto out;
> -		}
> -
> -		if (em->block_start == EXTENT_MAP_HOLE) {
> -			btrfs_warn(fs_info, "swapfile must not have holes");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -		if (em->block_start == EXTENT_MAP_INLINE) {
> -			/*
> -			 * It's unlikely we'll ever actually find ourselves
> -			 * here, as a file small enough to fit inline won't be
> -			 * big enough to store more than the swap header, but in
> -			 * case something changes in the future, let's catch it
> -			 * here rather than later.
> -			 */
> -			btrfs_warn(fs_info, "swapfile must not be inline");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
> -			btrfs_warn(fs_info, "swapfile must not be compressed");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -
> -		logical_block_start = em->block_start + (start - em->start);
> -		len = min(len, em->len - (start - em->start));
> -		free_extent_map(em);
> -		em = NULL;
> -
> -		ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
> -		if (ret < 0) {
> -			goto out;
> -		} else if (ret) {
> -			ret = 0;
> -		} else {
> -			btrfs_warn(fs_info,
> -				   "swapfile must not be copy-on-write");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -
> -		em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
> -		if (IS_ERR(em)) {
> -			ret = PTR_ERR(em);
> -			goto out;
> -		}
> -
> -		if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> -			btrfs_warn(fs_info,
> -				   "swapfile must have single data profile");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -
> -		if (device == NULL) {
> -			device = em->map_lookup->stripes[0].dev;
> -			ret = btrfs_add_swapfile_pin(inode, device, false);
> -			if (ret == 1)
> -				ret = 0;
> -			else if (ret)
> -				goto out;
> -		} else if (device != em->map_lookup->stripes[0].dev) {
> -			btrfs_warn(fs_info, "swapfile must be on one device");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -
> -		physical_block_start = (em->map_lookup->stripes[0].physical +
> -					(logical_block_start - em->start));
> -		len = min(len, em->len - (logical_block_start - em->start));
> -		free_extent_map(em);
> -		em = NULL;
> -
> -		bg = btrfs_lookup_block_group(fs_info, logical_block_start);
> -		if (!bg) {
> -			btrfs_warn(fs_info,
> -			   "could not find block group containing swapfile");
> -			ret = -EINVAL;
> -			goto out;
> -		}
> -
> -		ret = btrfs_add_swapfile_pin(inode, bg, true);
> -		if (ret) {
> -			btrfs_put_block_group(bg);
> -			if (ret == 1)
> -				ret = 0;
> -			else
> -				goto out;
> -		}
> -
> -		if (bsi.block_len &&
> -		    bsi.block_start + bsi.block_len == physical_block_start) {
> -			bsi.block_len += len;
> -		} else {
> -			if (bsi.block_len) {
> -				ret = btrfs_add_swap_extent(sis, &bsi);
> -				if (ret)
> -					goto out;
> -			}
> -			bsi.start = start;
> -			bsi.block_start = physical_block_start;
> -			bsi.block_len = len;
> -		}
> -
> -		start += len;
> -	}
> -
> -	if (bsi.block_len)
> -		ret = btrfs_add_swap_extent(sis, &bsi);
> -
> -out:
> -	if (!IS_ERR_OR_NULL(em))
> -		free_extent_map(em);
> -
> -	unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
> -
> -	if (ret)
> -		btrfs_swap_deactivate(file);
> -
> -	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
> -
> -	if (ret)
> -		return ret;
> -
> -	if (device)
> -		sis->bdev = device->bdev;
> -	*span = bsi.highest_ppage - bsi.lowest_ppage + 1;
> -	sis->max = bsi.nr_pages;
> -	sis->pages = bsi.nr_pages - 1;
> -	sis->highest_bit = bsi.nr_pages - 1;
> -	return bsi.nr_extents;
> -}
> -#else
> -static void btrfs_swap_deactivate(struct file *file)
> -{
> -}
> -
> -static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> -			       sector_t *span)
> -{
> -	return -EOPNOTSUPP;
> -}
> -#endif
> -
>  static const struct inode_operations btrfs_dir_inode_operations = {
>  	.getattr	= btrfs_getattr,
>  	.lookup		= btrfs_lookup,
> @@ -11032,8 +10694,6 @@ static const struct address_space_operations btrfs_aops = {
>  	.releasepage	= btrfs_releasepage,
>  	.set_page_dirty	= btrfs_set_page_dirty,
>  	.error_remove_page = generic_error_remove_page,
> -	.swap_activate	= btrfs_swap_activate,
> -	.swap_deactivate = btrfs_swap_deactivate,
>  };
>  
>  static const struct inode_operations btrfs_file_inode_operations = {
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 3c7777bfae17..04b2a8f44fa9 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -14,7 +14,6 @@
>  #include <linux/pagevec.h>
>  #include <linux/blkdev.h>
>  #include <linux/bio.h>
> -#include <linux/swap.h>
>  #include <linux/prefetch.h>
>  #include <linux/uio.h>
>  #include <linux/cleancache.h>
> @@ -3142,125 +3141,6 @@ int f2fs_migrate_page(struct address_space *mapping,
>  }
>  #endif
>  
> -#ifdef CONFIG_SWAP
> -/* Copied from generic_swapfile_activate() to check any holes */
> -static int check_swap_activate(struct file *swap_file, unsigned int max)
> -{
> -	struct inode *inode = swap_file->f_mapping->host;
> -	unsigned blocks_per_page;
> -	unsigned long page_no;
> -	unsigned blkbits;
> -	sector_t probe_block;
> -	sector_t last_block;
> -	sector_t lowest_block = -1;
> -	sector_t highest_block = 0;
> -
> -	blkbits = inode->i_blkbits;
> -	blocks_per_page = PAGE_SIZE >> blkbits;
> -
> -	/*
> -	 * Map all the blocks into the extent list.  This code doesn't try
> -	 * to be very smart.
> -	 */
> -	probe_block = 0;
> -	page_no = 0;
> -	last_block = i_size_read(inode) >> blkbits;
> -	while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
> -		unsigned block_in_page;
> -		sector_t first_block;
> -
> -		cond_resched();
> -
> -		first_block = bmap(inode, probe_block);
> -		if (first_block == 0)
> -			goto bad_bmap;
> -
> -		/*
> -		 * It must be PAGE_SIZE aligned on-disk
> -		 */
> -		if (first_block & (blocks_per_page - 1)) {
> -			probe_block++;
> -			goto reprobe;
> -		}
> -
> -		for (block_in_page = 1; block_in_page < blocks_per_page;
> -					block_in_page++) {
> -			sector_t block;
> -
> -			block = bmap(inode, probe_block + block_in_page);
> -			if (block == 0)
> -				goto bad_bmap;
> -			if (block != first_block + block_in_page) {
> -				/* Discontiguity */
> -				probe_block++;
> -				goto reprobe;
> -			}
> -		}
> -
> -		first_block >>= (PAGE_SHIFT - blkbits);
> -		if (page_no) {	/* exclude the header page */
> -			if (first_block < lowest_block)
> -				lowest_block = first_block;
> -			if (first_block > highest_block)
> -				highest_block = first_block;
> -		}
> -
> -		page_no++;
> -		probe_block += blocks_per_page;
> -reprobe:
> -		continue;
> -	}
> -	return 0;
> -
> -bad_bmap:
> -	pr_err("swapon: swapfile has holes\n");
> -	return -EINVAL;
> -}
> -
> -static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> -				sector_t *span)
> -{
> -	struct inode *inode = file_inode(file);
> -	int ret;
> -
> -	if (!S_ISREG(inode->i_mode))
> -		return -EINVAL;
> -
> -	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> -		return -EROFS;
> -
> -	ret = f2fs_convert_inline_inode(inode);
> -	if (ret)
> -		return ret;
> -
> -	ret = check_swap_activate(file, sis->max);
> -	if (ret)
> -		return ret;
> -
> -	set_inode_flag(inode, FI_PIN_FILE);
> -	f2fs_precache_extents(inode);
> -	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> -	return 0;
> -}
> -
> -static void f2fs_swap_deactivate(struct file *file)
> -{
> -	struct inode *inode = file_inode(file);
> -
> -	clear_inode_flag(inode, FI_PIN_FILE);
> -}
> -#else
> -static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> -				sector_t *span)
> -{
> -	return -EOPNOTSUPP;
> -}
> -
> -static void f2fs_swap_deactivate(struct file *file)
> -{
> -}
> -#endif
> -
>  const struct address_space_operations f2fs_dblock_aops = {
>  	.readpage	= f2fs_read_data_page,
>  	.readpages	= f2fs_read_data_pages,
> @@ -3273,8 +3153,6 @@ const struct address_space_operations f2fs_dblock_aops = {
>  	.releasepage	= f2fs_release_page,
>  	.direct_IO	= f2fs_direct_IO,
>  	.bmap		= f2fs_bmap,
> -	.swap_activate  = f2fs_swap_activate,
> -	.swap_deactivate = f2fs_swap_deactivate,
>  #ifdef CONFIG_MIGRATION
>  	.migratepage    = f2fs_migrate_page,
>  #endif
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 483ad22a0946..de7f9cf36689 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -21,6 +21,7 @@
>  #include <linux/uuid.h>
>  #include <linux/file.h>
>  #include <linux/nls.h>
> +#include <linux/swap.h>
>  
>  #include "f2fs.h"
>  #include "node.h"
> @@ -3466,6 +3467,125 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>  }
>  #endif
>  
> +#ifdef CONFIG_SWAP
> +/* Copied from generic_swapfile_activate() to check any holes */
> +static int check_swap_activate(struct file *swap_file, unsigned int max)
> +{
> +	struct inode *inode = swap_file->f_mapping->host;
> +	unsigned blocks_per_page;
> +	unsigned long page_no;
> +	unsigned blkbits;
> +	sector_t probe_block;
> +	sector_t last_block;
> +	sector_t lowest_block = -1;
> +	sector_t highest_block = 0;
> +
> +	blkbits = inode->i_blkbits;
> +	blocks_per_page = PAGE_SIZE >> blkbits;
> +
> +	/*
> +	 * Map all the blocks into the extent list.  This code doesn't try
> +	 * to be very smart.
> +	 */
> +	probe_block = 0;
> +	page_no = 0;
> +	last_block = i_size_read(inode) >> blkbits;
> +	while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
> +		unsigned block_in_page;
> +		sector_t first_block;
> +
> +		cond_resched();
> +
> +		first_block = bmap(inode, probe_block);
> +		if (first_block == 0)
> +			goto bad_bmap;
> +
> +		/*
> +		 * It must be PAGE_SIZE aligned on-disk
> +		 */
> +		if (first_block & (blocks_per_page - 1)) {
> +			probe_block++;
> +			goto reprobe;
> +		}
> +
> +		for (block_in_page = 1; block_in_page < blocks_per_page;
> +					block_in_page++) {
> +			sector_t block;
> +
> +			block = bmap(inode, probe_block + block_in_page);
> +			if (block == 0)
> +				goto bad_bmap;
> +			if (block != first_block + block_in_page) {
> +				/* Discontiguity */
> +				probe_block++;
> +				goto reprobe;
> +			}
> +		}
> +
> +		first_block >>= (PAGE_SHIFT - blkbits);
> +		if (page_no) {	/* exclude the header page */
> +			if (first_block < lowest_block)
> +				lowest_block = first_block;
> +			if (first_block > highest_block)
> +				highest_block = first_block;
> +		}
> +
> +		page_no++;
> +		probe_block += blocks_per_page;
> +reprobe:
> +		continue;
> +	}
> +	return 0;
> +
> +bad_bmap:
> +	pr_err("swapon: swapfile has holes\n");
> +	return -EINVAL;
> +}
> +
> +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> +				sector_t *span)
> +{
> +	struct inode *inode = file_inode(file);
> +	int ret;
> +
> +	if (!S_ISREG(inode->i_mode))
> +		return -EINVAL;
> +
> +	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> +		return -EROFS;
> +
> +	ret = f2fs_convert_inline_inode(inode);
> +	if (ret)
> +		return ret;
> +
> +	ret = check_swap_activate(file, sis->max);
> +	if (ret)
> +		return ret;
> +
> +	set_inode_flag(inode, FI_PIN_FILE);
> +	f2fs_precache_extents(inode);
> +	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> +	return 0;
> +}
> +
> +static void f2fs_swap_deactivate(struct file *file)
> +{
> +	struct inode *inode = file_inode(file);
> +
> +	clear_inode_flag(inode, FI_PIN_FILE);
> +}
> +#else
> +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> +				sector_t *span)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +static void f2fs_swap_deactivate(struct file *file)
> +{
> +}
> +#endif
> +
>  const struct file_operations f2fs_file_operations = {
>  	.llseek		= f2fs_llseek,
>  	.read_iter	= generic_file_read_iter,
> @@ -3482,4 +3602,6 @@ const struct file_operations f2fs_file_operations = {
>  #endif
>  	.splice_read	= generic_file_splice_read,
>  	.splice_write	= iter_file_splice_write,
> +	.swap_activate  = f2fs_swap_activate,
> +	.swap_deactivate = f2fs_swap_deactivate,
>  };
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index 95dc90570786..1f82f92185d6 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -520,8 +520,6 @@ const struct address_space_operations nfs_file_aops = {
>  	.launder_page = nfs_launder_page,
>  	.is_dirty_writeback = nfs_check_dirty_writeback,
>  	.error_remove_page = generic_error_remove_page,
> -	.swap_activate = nfs_swap_activate,
> -	.swap_deactivate = nfs_swap_deactivate,
>  };
>  
>  /*
> @@ -847,5 +845,7 @@ const struct file_operations nfs_file_operations = {
>  	.splice_write	= iter_file_splice_write,
>  	.check_flags	= nfs_check_flags,
>  	.setlease	= simple_nosetlease,
> +	.swap_activate = nfs_swap_activate,
> +	.swap_deactivate = nfs_swap_deactivate,
>  };
>  EXPORT_SYMBOL_GPL(nfs_file_operations);
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 3a688eb5c5ae..99f578a9ed90 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -631,17 +631,6 @@ xfs_vm_readpages(
>  	return iomap_readpages(mapping, pages, nr_pages, &xfs_read_iomap_ops);
>  }
>  
> -static int
> -xfs_iomap_swapfile_activate(
> -	struct swap_info_struct		*sis,
> -	struct file			*swap_file,
> -	sector_t			*span)
> -{
> -	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
> -	return iomap_swapfile_activate(sis, swap_file, span,
> -			&xfs_read_iomap_ops);
> -}
> -
>  const struct address_space_operations xfs_address_space_operations = {
>  	.readpage		= xfs_vm_readpage,
>  	.readpages		= xfs_vm_readpages,
> @@ -655,7 +644,6 @@ const struct address_space_operations xfs_address_space_operations = {
>  	.migratepage		= iomap_migrate_page,
>  	.is_partially_uptodate  = iomap_is_partially_uptodate,
>  	.error_remove_page	= generic_error_remove_page,
> -	.swap_activate		= xfs_iomap_swapfile_activate,
>  };
>  
>  const struct address_space_operations xfs_dax_aops = {
> @@ -663,5 +651,4 @@ const struct address_space_operations xfs_dax_aops = {
>  	.direct_IO		= noop_direct_IO,
>  	.set_page_dirty		= noop_set_page_dirty,
>  	.invalidatepage		= noop_invalidatepage,
> -	.swap_activate		= xfs_iomap_swapfile_activate,
>  };
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 865543e41fb4..225f58561f06 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1294,6 +1294,17 @@ xfs_file_mmap(
>  	return 0;
>  }
>  
> +static int
> +xfs_file_swap_activate(
> +	struct swap_info_struct		*sis,
> +	struct file			*swap_file,
> +	sector_t			*span)
> +{
> +	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
> +	return iomap_swapfile_activate(sis, swap_file, span,
> +			&xfs_read_iomap_ops);
> +}
> +
>  const struct file_operations xfs_file_operations = {
>  	.llseek		= xfs_file_llseek,
>  	.read_iter	= xfs_file_read_iter,
> @@ -1314,6 +1325,7 @@ const struct file_operations xfs_file_operations = {
>  	.fallocate	= xfs_file_fallocate,
>  	.fadvise	= xfs_file_fadvise,
>  	.remap_file_range = xfs_file_remap_range,
> +	.swap_activate	= xfs_file_swap_activate,
>  };
>  
>  const struct file_operations xfs_dir_file_operations = {
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 83e011e0df7f..1175815da3df 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -402,11 +402,6 @@ struct address_space_operations {
>  					unsigned long);
>  	void (*is_dirty_writeback) (struct page *, bool *, bool *);
>  	int (*error_remove_page)(struct address_space *, struct page *);
> -
> -	/* swapfile support */
> -	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
> -				sector_t *span);
> -	void (*swap_deactivate)(struct file *file);
>  };
>  
>  extern const struct address_space_operations empty_aops;
> @@ -1858,6 +1853,11 @@ struct file_operations {
>  				   struct file *file_out, loff_t pos_out,
>  				   loff_t len, unsigned int remap_flags);
>  	int (*fadvise)(struct file *, loff_t, loff_t, int);
> +
> +	/* swapfile support */
> +	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
> +				sector_t *span);
> +	void (*swap_deactivate)(struct file *file);
>  } __randomize_layout;
>  
>  struct inode_operations {
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index bb3261d45b6a..d2de8d668708 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -2293,11 +2293,10 @@ static void destroy_swap_extents(struct swap_info_struct *sis)
>  
>  	if (sis->flags & SWP_ACTIVATED) {
>  		struct file *swap_file = sis->swap_file;
> -		struct address_space *mapping = swap_file->f_mapping;
>  
>  		sis->flags &= ~SWP_ACTIVATED;
> -		if (mapping->a_ops->swap_deactivate)
> -			mapping->a_ops->swap_deactivate(swap_file);
> +		if (swap_file->f_op->swap_deactivate)
> +			swap_file->f_op->swap_deactivate(swap_file);
>  	}
>  }
>  
> @@ -2381,8 +2380,7 @@ EXPORT_SYMBOL_GPL(add_swap_extent);
>  static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
>  {
>  	struct file *swap_file = sis->swap_file;
> -	struct address_space *mapping = swap_file->f_mapping;
> -	struct inode *inode = mapping->host;
> +	struct inode *inode = swap_file->f_mapping->host;
>  	int ret;
>  
>  	if (S_ISBLK(inode->i_mode)) {
> @@ -2391,8 +2389,8 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
>  		return ret;
>  	}
>  
> -	if (mapping->a_ops->swap_activate) {
> -		ret = mapping->a_ops->swap_activate(sis, swap_file, span);
> +	if (swap_file->f_op->swap_activate) {
> +		ret = swap_file->f_op->swap_activate(sis, swap_file, span);
>  		if (ret >= 0)
>  			sis->flags |= SWP_ACTIVATED;
>  		if (!ret) {
> -- 
> 2.21.0
>
Ira Weiny Nov. 19, 2019, 6:57 p.m. UTC | #4
On Thu, Nov 14, 2019 at 01:03:47PM -0800, Darrick J. Wong wrote:
> On Tue, Nov 12, 2019 at 04:42:44PM -0800, ira.weiny@intel.com wrote:
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > swap_activate() and swap_deactivate() have nothing to do with address
> > spaces.  We want to be able to change the address space operations on
> > the fly to allow changing inode flags dynamically.
> > 
> > Switching address space operations can be difficult to do reliably.[1]
> > Therefore, to simplify switching address space operations we reduce the
> > number of functions in those operations by moving swap_activate() and
> > swap_deactivate() out of the address space operations.
> > 
> > No functionality is changed with this patch.
> > 
> > This has been tested with XFS but not NFS, f2fs, or btrfs.
> > 
> > Also note we move some functions to facilitate compilation.  But there
> > are no functional changes are contained within those diffs.
> > 
> > [1] https://lkml.org/lkml/2019/11/11/572
> > 
> > Cc: Dave Chinner <david@fromorbit.com>
> > Cc: linux-fsdevel@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > Suggested-by: Jan Kara <jack@suse.cz>
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> Replace previous ack with:
> 
> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

Thanks Darrick and David for the reviews.

Are there any objections from NFS or F2FS?

NFS is pretty straight forward.  For F2FS the large changes are a simple
copy/paste of the functions which, to me at least, make code maintenance easier
in the long run.

Thanks,
Ira

> 
> --D
> 
> > 
> > ---
> > Changes from V0:
> > 	Update cover letter.
> > 	fix btrfs as per Andrew's comments
> > 	change xfs_iomap_swapfile_activate() to xfs_file_swap_activate()
> > 
> > Changes from V1:
> > 	Update recipients list
> > 
> > 
> >  fs/btrfs/file.c    | 341 +++++++++++++++++++++++++++++++++++++++++++++
> >  fs/btrfs/inode.c   | 340 --------------------------------------------
> >  fs/f2fs/data.c     | 122 ----------------
> >  fs/f2fs/file.c     | 122 ++++++++++++++++
> >  fs/nfs/file.c      |   4 +-
> >  fs/xfs/xfs_aops.c  |  13 --
> >  fs/xfs/xfs_file.c  |  12 ++
> >  include/linux/fs.h |  10 +-
> >  mm/swapfile.c      |  12 +-
> >  9 files changed, 487 insertions(+), 489 deletions(-)
> > 
> > diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> > index 0cb43b682789..117502311fe0 100644
> > --- a/fs/btrfs/file.c
> > +++ b/fs/btrfs/file.c
> > @@ -16,6 +16,7 @@
> >  #include <linux/btrfs.h>
> >  #include <linux/uio.h>
> >  #include <linux/iversion.h>
> > +#include <linux/swap.h>
> >  #include "ctree.h"
> >  #include "disk-io.h"
> >  #include "transaction.h"
> > @@ -27,6 +28,7 @@
> >  #include "qgroup.h"
> >  #include "compression.h"
> >  #include "delalloc-space.h"
> > +#include "block-group.h"
> >  
> >  static struct kmem_cache *btrfs_inode_defrag_cachep;
> >  /*
> > @@ -3444,6 +3446,343 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
> >  	return generic_file_open(inode, filp);
> >  }
> >  
> > +#ifdef CONFIG_SWAP
> > +/*
> > + * Add an entry indicating a block group or device which is pinned by a
> > + * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a
> > + * negative errno on failure.
> > + */
> > +static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
> > +				  bool is_block_group)
> > +{
> > +	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> > +	struct btrfs_swapfile_pin *sp, *entry;
> > +	struct rb_node **p;
> > +	struct rb_node *parent = NULL;
> > +
> > +	sp = kmalloc(sizeof(*sp), GFP_NOFS);
> > +	if (!sp)
> > +		return -ENOMEM;
> > +	sp->ptr = ptr;
> > +	sp->inode = inode;
> > +	sp->is_block_group = is_block_group;
> > +
> > +	spin_lock(&fs_info->swapfile_pins_lock);
> > +	p = &fs_info->swapfile_pins.rb_node;
> > +	while (*p) {
> > +		parent = *p;
> > +		entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
> > +		if (sp->ptr < entry->ptr ||
> > +		    (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
> > +			p = &(*p)->rb_left;
> > +		} else if (sp->ptr > entry->ptr ||
> > +			   (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
> > +			p = &(*p)->rb_right;
> > +		} else {
> > +			spin_unlock(&fs_info->swapfile_pins_lock);
> > +			kfree(sp);
> > +			return 1;
> > +		}
> > +	}
> > +	rb_link_node(&sp->node, parent, p);
> > +	rb_insert_color(&sp->node, &fs_info->swapfile_pins);
> > +	spin_unlock(&fs_info->swapfile_pins_lock);
> > +	return 0;
> > +}
> > +
> > +/* Free all of the entries pinned by this swapfile. */
> > +static void btrfs_free_swapfile_pins(struct inode *inode)
> > +{
> > +	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> > +	struct btrfs_swapfile_pin *sp;
> > +	struct rb_node *node, *next;
> > +
> > +	spin_lock(&fs_info->swapfile_pins_lock);
> > +	node = rb_first(&fs_info->swapfile_pins);
> > +	while (node) {
> > +		next = rb_next(node);
> > +		sp = rb_entry(node, struct btrfs_swapfile_pin, node);
> > +		if (sp->inode == inode) {
> > +			rb_erase(&sp->node, &fs_info->swapfile_pins);
> > +			if (sp->is_block_group)
> > +				btrfs_put_block_group(sp->ptr);
> > +			kfree(sp);
> > +		}
> > +		node = next;
> > +	}
> > +	spin_unlock(&fs_info->swapfile_pins_lock);
> > +}
> > +
> > +struct btrfs_swap_info {
> > +	u64 start;
> > +	u64 block_start;
> > +	u64 block_len;
> > +	u64 lowest_ppage;
> > +	u64 highest_ppage;
> > +	unsigned long nr_pages;
> > +	int nr_extents;
> > +};
> > +
> > +static int btrfs_add_swap_extent(struct swap_info_struct *sis,
> > +				 struct btrfs_swap_info *bsi)
> > +{
> > +	unsigned long nr_pages;
> > +	u64 first_ppage, first_ppage_reported, next_ppage;
> > +	int ret;
> > +
> > +	first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
> > +	next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
> > +				PAGE_SIZE) >> PAGE_SHIFT;
> > +
> > +	if (first_ppage >= next_ppage)
> > +		return 0;
> > +	nr_pages = next_ppage - first_ppage;
> > +
> > +	first_ppage_reported = first_ppage;
> > +	if (bsi->start == 0)
> > +		first_ppage_reported++;
> > +	if (bsi->lowest_ppage > first_ppage_reported)
> > +		bsi->lowest_ppage = first_ppage_reported;
> > +	if (bsi->highest_ppage < (next_ppage - 1))
> > +		bsi->highest_ppage = next_ppage - 1;
> > +
> > +	ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
> > +	if (ret < 0)
> > +		return ret;
> > +	bsi->nr_extents += ret;
> > +	bsi->nr_pages += nr_pages;
> > +	return 0;
> > +}
> > +
> > +static void btrfs_swap_deactivate(struct file *file)
> > +{
> > +	struct inode *inode = file_inode(file);
> > +
> > +	btrfs_free_swapfile_pins(inode);
> > +	atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
> > +}
> > +
> > +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> > +			       sector_t *span)
> > +{
> > +	struct inode *inode = file_inode(file);
> > +	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> > +	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
> > +	struct extent_state *cached_state = NULL;
> > +	struct extent_map *em = NULL;
> > +	struct btrfs_device *device = NULL;
> > +	struct btrfs_swap_info bsi = {
> > +		.lowest_ppage = (sector_t)-1ULL,
> > +	};
> > +	int ret = 0;
> > +	u64 isize;
> > +	u64 start;
> > +
> > +	/*
> > +	 * If the swap file was just created, make sure delalloc is done. If the
> > +	 * file changes again after this, the user is doing something stupid and
> > +	 * we don't really care.
> > +	 */
> > +	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/*
> > +	 * The inode is locked, so these flags won't change after we check them.
> > +	 */
> > +	if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
> > +		btrfs_warn(fs_info, "swapfile must not be compressed");
> > +		return -EINVAL;
> > +	}
> > +	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
> > +		btrfs_warn(fs_info, "swapfile must not be copy-on-write");
> > +		return -EINVAL;
> > +	}
> > +	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
> > +		btrfs_warn(fs_info, "swapfile must not be checksummed");
> > +		return -EINVAL;
> > +	}
> > +
> > +	/*
> > +	 * Balance or device remove/replace/resize can move stuff around from
> > +	 * under us. The EXCL_OP flag makes sure they aren't running/won't run
> > +	 * concurrently while we are mapping the swap extents, and
> > +	 * fs_info->swapfile_pins prevents them from running while the swap file
> > +	 * is active and moving the extents. Note that this also prevents a
> > +	 * concurrent device add which isn't actually necessary, but it's not
> > +	 * really worth the trouble to allow it.
> > +	 */
> > +	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
> > +		btrfs_warn(fs_info,
> > +	   "cannot activate swapfile while exclusive operation is running");
> > +		return -EBUSY;
> > +	}
> > +	/*
> > +	 * Snapshots can create extents which require COW even if NODATACOW is
> > +	 * set. We use this counter to prevent snapshots. We must increment it
> > +	 * before walking the extents because we don't want a concurrent
> > +	 * snapshot to run after we've already checked the extents.
> > +	 */
> > +	atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
> > +
> > +	isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
> > +
> > +	lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
> > +	start = 0;
> > +	while (start < isize) {
> > +		u64 logical_block_start, physical_block_start;
> > +		struct btrfs_block_group_cache *bg;
> > +		u64 len = isize - start;
> > +
> > +		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
> > +		if (IS_ERR(em)) {
> > +			ret = PTR_ERR(em);
> > +			goto out;
> > +		}
> > +
> > +		if (em->block_start == EXTENT_MAP_HOLE) {
> > +			btrfs_warn(fs_info, "swapfile must not have holes");
> > +			ret = -EINVAL;
> > +			goto out;
> > +		}
> > +		if (em->block_start == EXTENT_MAP_INLINE) {
> > +			/*
> > +			 * It's unlikely we'll ever actually find ourselves
> > +			 * here, as a file small enough to fit inline won't be
> > +			 * big enough to store more than the swap header, but in
> > +			 * case something changes in the future, let's catch it
> > +			 * here rather than later.
> > +			 */
> > +			btrfs_warn(fs_info, "swapfile must not be inline");
> > +			ret = -EINVAL;
> > +			goto out;
> > +		}
> > +		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
> > +			btrfs_warn(fs_info, "swapfile must not be compressed");
> > +			ret = -EINVAL;
> > +			goto out;
> > +		}
> > +
> > +		logical_block_start = em->block_start + (start - em->start);
> > +		len = min(len, em->len - (start - em->start));
> > +		free_extent_map(em);
> > +		em = NULL;
> > +
> > +		ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
> > +		if (ret < 0) {
> > +			goto out;
> > +		} else if (ret) {
> > +			ret = 0;
> > +		} else {
> > +			btrfs_warn(fs_info,
> > +				   "swapfile must not be copy-on-write");
> > +			ret = -EINVAL;
> > +			goto out;
> > +		}
> > +
> > +		em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
> > +		if (IS_ERR(em)) {
> > +			ret = PTR_ERR(em);
> > +			goto out;
> > +		}
> > +
> > +		if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> > +			btrfs_warn(fs_info,
> > +				   "swapfile must have single data profile");
> > +			ret = -EINVAL;
> > +			goto out;
> > +		}
> > +
> > +		if (device == NULL) {
> > +			device = em->map_lookup->stripes[0].dev;
> > +			ret = btrfs_add_swapfile_pin(inode, device, false);
> > +			if (ret == 1)
> > +				ret = 0;
> > +			else if (ret)
> > +				goto out;
> > +		} else if (device != em->map_lookup->stripes[0].dev) {
> > +			btrfs_warn(fs_info, "swapfile must be on one device");
> > +			ret = -EINVAL;
> > +			goto out;
> > +		}
> > +
> > +		physical_block_start = (em->map_lookup->stripes[0].physical +
> > +					(logical_block_start - em->start));
> > +		len = min(len, em->len - (logical_block_start - em->start));
> > +		free_extent_map(em);
> > +		em = NULL;
> > +
> > +		bg = btrfs_lookup_block_group(fs_info, logical_block_start);
> > +		if (!bg) {
> > +			btrfs_warn(fs_info,
> > +			   "could not find block group containing swapfile");
> > +			ret = -EINVAL;
> > +			goto out;
> > +		}
> > +
> > +		ret = btrfs_add_swapfile_pin(inode, bg, true);
> > +		if (ret) {
> > +			btrfs_put_block_group(bg);
> > +			if (ret == 1)
> > +				ret = 0;
> > +			else
> > +				goto out;
> > +		}
> > +
> > +		if (bsi.block_len &&
> > +		    bsi.block_start + bsi.block_len == physical_block_start) {
> > +			bsi.block_len += len;
> > +		} else {
> > +			if (bsi.block_len) {
> > +				ret = btrfs_add_swap_extent(sis, &bsi);
> > +				if (ret)
> > +					goto out;
> > +			}
> > +			bsi.start = start;
> > +			bsi.block_start = physical_block_start;
> > +			bsi.block_len = len;
> > +		}
> > +
> > +		start += len;
> > +	}
> > +
> > +	if (bsi.block_len)
> > +		ret = btrfs_add_swap_extent(sis, &bsi);
> > +
> > +out:
> > +	if (!IS_ERR_OR_NULL(em))
> > +		free_extent_map(em);
> > +
> > +	unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
> > +
> > +	if (ret)
> > +		btrfs_swap_deactivate(file);
> > +
> > +	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
> > +
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (device)
> > +		sis->bdev = device->bdev;
> > +	*span = bsi.highest_ppage - bsi.lowest_ppage + 1;
> > +	sis->max = bsi.nr_pages;
> > +	sis->pages = bsi.nr_pages - 1;
> > +	sis->highest_bit = bsi.nr_pages - 1;
> > +	return bsi.nr_extents;
> > +}
> > +#else
> > +static void btrfs_swap_deactivate(struct file *file)
> > +{
> > +}
> > +
> > +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> > +			       sector_t *span)
> > +{
> > +	return -EOPNOTSUPP;
> > +}
> > +#endif
> > +
> >  const struct file_operations btrfs_file_operations = {
> >  	.llseek		= btrfs_file_llseek,
> >  	.read_iter      = generic_file_read_iter,
> > @@ -3459,6 +3798,8 @@ const struct file_operations btrfs_file_operations = {
> >  	.compat_ioctl	= btrfs_compat_ioctl,
> >  #endif
> >  	.remap_file_range = btrfs_remap_file_range,
> > +	.swap_activate	= btrfs_swap_activate,
> > +	.swap_deactivate = btrfs_swap_deactivate,
> >  };
> >  
> >  void __cold btrfs_auto_defrag_exit(void)
> > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> > index 6d159df7b536..c11b86f2bf24 100644
> > --- a/fs/btrfs/inode.c
> > +++ b/fs/btrfs/inode.c
> > @@ -27,7 +27,6 @@
> >  #include <linux/uio.h>
> >  #include <linux/magic.h>
> >  #include <linux/iversion.h>
> > -#include <linux/swap.h>
> >  #include <linux/sched/mm.h>
> >  #include <asm/unaligned.h>
> >  #include "misc.h"
> > @@ -10629,343 +10628,6 @@ void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
> >  	}
> >  }
> >  
> > -#ifdef CONFIG_SWAP
> > -/*
> > - * Add an entry indicating a block group or device which is pinned by a
> > - * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a
> > - * negative errno on failure.
> > - */
> > -static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
> > -				  bool is_block_group)
> > -{
> > -	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> > -	struct btrfs_swapfile_pin *sp, *entry;
> > -	struct rb_node **p;
> > -	struct rb_node *parent = NULL;
> > -
> > -	sp = kmalloc(sizeof(*sp), GFP_NOFS);
> > -	if (!sp)
> > -		return -ENOMEM;
> > -	sp->ptr = ptr;
> > -	sp->inode = inode;
> > -	sp->is_block_group = is_block_group;
> > -
> > -	spin_lock(&fs_info->swapfile_pins_lock);
> > -	p = &fs_info->swapfile_pins.rb_node;
> > -	while (*p) {
> > -		parent = *p;
> > -		entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
> > -		if (sp->ptr < entry->ptr ||
> > -		    (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
> > -			p = &(*p)->rb_left;
> > -		} else if (sp->ptr > entry->ptr ||
> > -			   (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
> > -			p = &(*p)->rb_right;
> > -		} else {
> > -			spin_unlock(&fs_info->swapfile_pins_lock);
> > -			kfree(sp);
> > -			return 1;
> > -		}
> > -	}
> > -	rb_link_node(&sp->node, parent, p);
> > -	rb_insert_color(&sp->node, &fs_info->swapfile_pins);
> > -	spin_unlock(&fs_info->swapfile_pins_lock);
> > -	return 0;
> > -}
> > -
> > -/* Free all of the entries pinned by this swapfile. */
> > -static void btrfs_free_swapfile_pins(struct inode *inode)
> > -{
> > -	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> > -	struct btrfs_swapfile_pin *sp;
> > -	struct rb_node *node, *next;
> > -
> > -	spin_lock(&fs_info->swapfile_pins_lock);
> > -	node = rb_first(&fs_info->swapfile_pins);
> > -	while (node) {
> > -		next = rb_next(node);
> > -		sp = rb_entry(node, struct btrfs_swapfile_pin, node);
> > -		if (sp->inode == inode) {
> > -			rb_erase(&sp->node, &fs_info->swapfile_pins);
> > -			if (sp->is_block_group)
> > -				btrfs_put_block_group(sp->ptr);
> > -			kfree(sp);
> > -		}
> > -		node = next;
> > -	}
> > -	spin_unlock(&fs_info->swapfile_pins_lock);
> > -}
> > -
> > -struct btrfs_swap_info {
> > -	u64 start;
> > -	u64 block_start;
> > -	u64 block_len;
> > -	u64 lowest_ppage;
> > -	u64 highest_ppage;
> > -	unsigned long nr_pages;
> > -	int nr_extents;
> > -};
> > -
> > -static int btrfs_add_swap_extent(struct swap_info_struct *sis,
> > -				 struct btrfs_swap_info *bsi)
> > -{
> > -	unsigned long nr_pages;
> > -	u64 first_ppage, first_ppage_reported, next_ppage;
> > -	int ret;
> > -
> > -	first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
> > -	next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
> > -				PAGE_SIZE) >> PAGE_SHIFT;
> > -
> > -	if (first_ppage >= next_ppage)
> > -		return 0;
> > -	nr_pages = next_ppage - first_ppage;
> > -
> > -	first_ppage_reported = first_ppage;
> > -	if (bsi->start == 0)
> > -		first_ppage_reported++;
> > -	if (bsi->lowest_ppage > first_ppage_reported)
> > -		bsi->lowest_ppage = first_ppage_reported;
> > -	if (bsi->highest_ppage < (next_ppage - 1))
> > -		bsi->highest_ppage = next_ppage - 1;
> > -
> > -	ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
> > -	if (ret < 0)
> > -		return ret;
> > -	bsi->nr_extents += ret;
> > -	bsi->nr_pages += nr_pages;
> > -	return 0;
> > -}
> > -
> > -static void btrfs_swap_deactivate(struct file *file)
> > -{
> > -	struct inode *inode = file_inode(file);
> > -
> > -	btrfs_free_swapfile_pins(inode);
> > -	atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
> > -}
> > -
> > -static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> > -			       sector_t *span)
> > -{
> > -	struct inode *inode = file_inode(file);
> > -	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> > -	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
> > -	struct extent_state *cached_state = NULL;
> > -	struct extent_map *em = NULL;
> > -	struct btrfs_device *device = NULL;
> > -	struct btrfs_swap_info bsi = {
> > -		.lowest_ppage = (sector_t)-1ULL,
> > -	};
> > -	int ret = 0;
> > -	u64 isize;
> > -	u64 start;
> > -
> > -	/*
> > -	 * If the swap file was just created, make sure delalloc is done. If the
> > -	 * file changes again after this, the user is doing something stupid and
> > -	 * we don't really care.
> > -	 */
> > -	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
> > -	if (ret)
> > -		return ret;
> > -
> > -	/*
> > -	 * The inode is locked, so these flags won't change after we check them.
> > -	 */
> > -	if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
> > -		btrfs_warn(fs_info, "swapfile must not be compressed");
> > -		return -EINVAL;
> > -	}
> > -	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
> > -		btrfs_warn(fs_info, "swapfile must not be copy-on-write");
> > -		return -EINVAL;
> > -	}
> > -	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
> > -		btrfs_warn(fs_info, "swapfile must not be checksummed");
> > -		return -EINVAL;
> > -	}
> > -
> > -	/*
> > -	 * Balance or device remove/replace/resize can move stuff around from
> > -	 * under us. The EXCL_OP flag makes sure they aren't running/won't run
> > -	 * concurrently while we are mapping the swap extents, and
> > -	 * fs_info->swapfile_pins prevents them from running while the swap file
> > -	 * is active and moving the extents. Note that this also prevents a
> > -	 * concurrent device add which isn't actually necessary, but it's not
> > -	 * really worth the trouble to allow it.
> > -	 */
> > -	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
> > -		btrfs_warn(fs_info,
> > -	   "cannot activate swapfile while exclusive operation is running");
> > -		return -EBUSY;
> > -	}
> > -	/*
> > -	 * Snapshots can create extents which require COW even if NODATACOW is
> > -	 * set. We use this counter to prevent snapshots. We must increment it
> > -	 * before walking the extents because we don't want a concurrent
> > -	 * snapshot to run after we've already checked the extents.
> > -	 */
> > -	atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
> > -
> > -	isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
> > -
> > -	lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
> > -	start = 0;
> > -	while (start < isize) {
> > -		u64 logical_block_start, physical_block_start;
> > -		struct btrfs_block_group_cache *bg;
> > -		u64 len = isize - start;
> > -
> > -		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
> > -		if (IS_ERR(em)) {
> > -			ret = PTR_ERR(em);
> > -			goto out;
> > -		}
> > -
> > -		if (em->block_start == EXTENT_MAP_HOLE) {
> > -			btrfs_warn(fs_info, "swapfile must not have holes");
> > -			ret = -EINVAL;
> > -			goto out;
> > -		}
> > -		if (em->block_start == EXTENT_MAP_INLINE) {
> > -			/*
> > -			 * It's unlikely we'll ever actually find ourselves
> > -			 * here, as a file small enough to fit inline won't be
> > -			 * big enough to store more than the swap header, but in
> > -			 * case something changes in the future, let's catch it
> > -			 * here rather than later.
> > -			 */
> > -			btrfs_warn(fs_info, "swapfile must not be inline");
> > -			ret = -EINVAL;
> > -			goto out;
> > -		}
> > -		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
> > -			btrfs_warn(fs_info, "swapfile must not be compressed");
> > -			ret = -EINVAL;
> > -			goto out;
> > -		}
> > -
> > -		logical_block_start = em->block_start + (start - em->start);
> > -		len = min(len, em->len - (start - em->start));
> > -		free_extent_map(em);
> > -		em = NULL;
> > -
> > -		ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
> > -		if (ret < 0) {
> > -			goto out;
> > -		} else if (ret) {
> > -			ret = 0;
> > -		} else {
> > -			btrfs_warn(fs_info,
> > -				   "swapfile must not be copy-on-write");
> > -			ret = -EINVAL;
> > -			goto out;
> > -		}
> > -
> > -		em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
> > -		if (IS_ERR(em)) {
> > -			ret = PTR_ERR(em);
> > -			goto out;
> > -		}
> > -
> > -		if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> > -			btrfs_warn(fs_info,
> > -				   "swapfile must have single data profile");
> > -			ret = -EINVAL;
> > -			goto out;
> > -		}
> > -
> > -		if (device == NULL) {
> > -			device = em->map_lookup->stripes[0].dev;
> > -			ret = btrfs_add_swapfile_pin(inode, device, false);
> > -			if (ret == 1)
> > -				ret = 0;
> > -			else if (ret)
> > -				goto out;
> > -		} else if (device != em->map_lookup->stripes[0].dev) {
> > -			btrfs_warn(fs_info, "swapfile must be on one device");
> > -			ret = -EINVAL;
> > -			goto out;
> > -		}
> > -
> > -		physical_block_start = (em->map_lookup->stripes[0].physical +
> > -					(logical_block_start - em->start));
> > -		len = min(len, em->len - (logical_block_start - em->start));
> > -		free_extent_map(em);
> > -		em = NULL;
> > -
> > -		bg = btrfs_lookup_block_group(fs_info, logical_block_start);
> > -		if (!bg) {
> > -			btrfs_warn(fs_info,
> > -			   "could not find block group containing swapfile");
> > -			ret = -EINVAL;
> > -			goto out;
> > -		}
> > -
> > -		ret = btrfs_add_swapfile_pin(inode, bg, true);
> > -		if (ret) {
> > -			btrfs_put_block_group(bg);
> > -			if (ret == 1)
> > -				ret = 0;
> > -			else
> > -				goto out;
> > -		}
> > -
> > -		if (bsi.block_len &&
> > -		    bsi.block_start + bsi.block_len == physical_block_start) {
> > -			bsi.block_len += len;
> > -		} else {
> > -			if (bsi.block_len) {
> > -				ret = btrfs_add_swap_extent(sis, &bsi);
> > -				if (ret)
> > -					goto out;
> > -			}
> > -			bsi.start = start;
> > -			bsi.block_start = physical_block_start;
> > -			bsi.block_len = len;
> > -		}
> > -
> > -		start += len;
> > -	}
> > -
> > -	if (bsi.block_len)
> > -		ret = btrfs_add_swap_extent(sis, &bsi);
> > -
> > -out:
> > -	if (!IS_ERR_OR_NULL(em))
> > -		free_extent_map(em);
> > -
> > -	unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
> > -
> > -	if (ret)
> > -		btrfs_swap_deactivate(file);
> > -
> > -	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
> > -
> > -	if (ret)
> > -		return ret;
> > -
> > -	if (device)
> > -		sis->bdev = device->bdev;
> > -	*span = bsi.highest_ppage - bsi.lowest_ppage + 1;
> > -	sis->max = bsi.nr_pages;
> > -	sis->pages = bsi.nr_pages - 1;
> > -	sis->highest_bit = bsi.nr_pages - 1;
> > -	return bsi.nr_extents;
> > -}
> > -#else
> > -static void btrfs_swap_deactivate(struct file *file)
> > -{
> > -}
> > -
> > -static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> > -			       sector_t *span)
> > -{
> > -	return -EOPNOTSUPP;
> > -}
> > -#endif
> > -
> >  static const struct inode_operations btrfs_dir_inode_operations = {
> >  	.getattr	= btrfs_getattr,
> >  	.lookup		= btrfs_lookup,
> > @@ -11032,8 +10694,6 @@ static const struct address_space_operations btrfs_aops = {
> >  	.releasepage	= btrfs_releasepage,
> >  	.set_page_dirty	= btrfs_set_page_dirty,
> >  	.error_remove_page = generic_error_remove_page,
> > -	.swap_activate	= btrfs_swap_activate,
> > -	.swap_deactivate = btrfs_swap_deactivate,
> >  };
> >  
> >  static const struct inode_operations btrfs_file_inode_operations = {
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 3c7777bfae17..04b2a8f44fa9 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -14,7 +14,6 @@
> >  #include <linux/pagevec.h>
> >  #include <linux/blkdev.h>
> >  #include <linux/bio.h>
> > -#include <linux/swap.h>
> >  #include <linux/prefetch.h>
> >  #include <linux/uio.h>
> >  #include <linux/cleancache.h>
> > @@ -3142,125 +3141,6 @@ int f2fs_migrate_page(struct address_space *mapping,
> >  }
> >  #endif
> >  
> > -#ifdef CONFIG_SWAP
> > -/* Copied from generic_swapfile_activate() to check any holes */
> > -static int check_swap_activate(struct file *swap_file, unsigned int max)
> > -{
> > -	struct inode *inode = swap_file->f_mapping->host;
> > -	unsigned blocks_per_page;
> > -	unsigned long page_no;
> > -	unsigned blkbits;
> > -	sector_t probe_block;
> > -	sector_t last_block;
> > -	sector_t lowest_block = -1;
> > -	sector_t highest_block = 0;
> > -
> > -	blkbits = inode->i_blkbits;
> > -	blocks_per_page = PAGE_SIZE >> blkbits;
> > -
> > -	/*
> > -	 * Map all the blocks into the extent list.  This code doesn't try
> > -	 * to be very smart.
> > -	 */
> > -	probe_block = 0;
> > -	page_no = 0;
> > -	last_block = i_size_read(inode) >> blkbits;
> > -	while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
> > -		unsigned block_in_page;
> > -		sector_t first_block;
> > -
> > -		cond_resched();
> > -
> > -		first_block = bmap(inode, probe_block);
> > -		if (first_block == 0)
> > -			goto bad_bmap;
> > -
> > -		/*
> > -		 * It must be PAGE_SIZE aligned on-disk
> > -		 */
> > -		if (first_block & (blocks_per_page - 1)) {
> > -			probe_block++;
> > -			goto reprobe;
> > -		}
> > -
> > -		for (block_in_page = 1; block_in_page < blocks_per_page;
> > -					block_in_page++) {
> > -			sector_t block;
> > -
> > -			block = bmap(inode, probe_block + block_in_page);
> > -			if (block == 0)
> > -				goto bad_bmap;
> > -			if (block != first_block + block_in_page) {
> > -				/* Discontiguity */
> > -				probe_block++;
> > -				goto reprobe;
> > -			}
> > -		}
> > -
> > -		first_block >>= (PAGE_SHIFT - blkbits);
> > -		if (page_no) {	/* exclude the header page */
> > -			if (first_block < lowest_block)
> > -				lowest_block = first_block;
> > -			if (first_block > highest_block)
> > -				highest_block = first_block;
> > -		}
> > -
> > -		page_no++;
> > -		probe_block += blocks_per_page;
> > -reprobe:
> > -		continue;
> > -	}
> > -	return 0;
> > -
> > -bad_bmap:
> > -	pr_err("swapon: swapfile has holes\n");
> > -	return -EINVAL;
> > -}
> > -
> > -static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> > -				sector_t *span)
> > -{
> > -	struct inode *inode = file_inode(file);
> > -	int ret;
> > -
> > -	if (!S_ISREG(inode->i_mode))
> > -		return -EINVAL;
> > -
> > -	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> > -		return -EROFS;
> > -
> > -	ret = f2fs_convert_inline_inode(inode);
> > -	if (ret)
> > -		return ret;
> > -
> > -	ret = check_swap_activate(file, sis->max);
> > -	if (ret)
> > -		return ret;
> > -
> > -	set_inode_flag(inode, FI_PIN_FILE);
> > -	f2fs_precache_extents(inode);
> > -	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> > -	return 0;
> > -}
> > -
> > -static void f2fs_swap_deactivate(struct file *file)
> > -{
> > -	struct inode *inode = file_inode(file);
> > -
> > -	clear_inode_flag(inode, FI_PIN_FILE);
> > -}
> > -#else
> > -static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> > -				sector_t *span)
> > -{
> > -	return -EOPNOTSUPP;
> > -}
> > -
> > -static void f2fs_swap_deactivate(struct file *file)
> > -{
> > -}
> > -#endif
> > -
> >  const struct address_space_operations f2fs_dblock_aops = {
> >  	.readpage	= f2fs_read_data_page,
> >  	.readpages	= f2fs_read_data_pages,
> > @@ -3273,8 +3153,6 @@ const struct address_space_operations f2fs_dblock_aops = {
> >  	.releasepage	= f2fs_release_page,
> >  	.direct_IO	= f2fs_direct_IO,
> >  	.bmap		= f2fs_bmap,
> > -	.swap_activate  = f2fs_swap_activate,
> > -	.swap_deactivate = f2fs_swap_deactivate,
> >  #ifdef CONFIG_MIGRATION
> >  	.migratepage    = f2fs_migrate_page,
> >  #endif
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 483ad22a0946..de7f9cf36689 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -21,6 +21,7 @@
> >  #include <linux/uuid.h>
> >  #include <linux/file.h>
> >  #include <linux/nls.h>
> > +#include <linux/swap.h>
> >  
> >  #include "f2fs.h"
> >  #include "node.h"
> > @@ -3466,6 +3467,125 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> >  }
> >  #endif
> >  
> > +#ifdef CONFIG_SWAP
> > +/* Copied from generic_swapfile_activate() to check any holes */
> > +static int check_swap_activate(struct file *swap_file, unsigned int max)
> > +{
> > +	struct inode *inode = swap_file->f_mapping->host;
> > +	unsigned blocks_per_page;
> > +	unsigned long page_no;
> > +	unsigned blkbits;
> > +	sector_t probe_block;
> > +	sector_t last_block;
> > +	sector_t lowest_block = -1;
> > +	sector_t highest_block = 0;
> > +
> > +	blkbits = inode->i_blkbits;
> > +	blocks_per_page = PAGE_SIZE >> blkbits;
> > +
> > +	/*
> > +	 * Map all the blocks into the extent list.  This code doesn't try
> > +	 * to be very smart.
> > +	 */
> > +	probe_block = 0;
> > +	page_no = 0;
> > +	last_block = i_size_read(inode) >> blkbits;
> > +	while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
> > +		unsigned block_in_page;
> > +		sector_t first_block;
> > +
> > +		cond_resched();
> > +
> > +		first_block = bmap(inode, probe_block);
> > +		if (first_block == 0)
> > +			goto bad_bmap;
> > +
> > +		/*
> > +		 * It must be PAGE_SIZE aligned on-disk
> > +		 */
> > +		if (first_block & (blocks_per_page - 1)) {
> > +			probe_block++;
> > +			goto reprobe;
> > +		}
> > +
> > +		for (block_in_page = 1; block_in_page < blocks_per_page;
> > +					block_in_page++) {
> > +			sector_t block;
> > +
> > +			block = bmap(inode, probe_block + block_in_page);
> > +			if (block == 0)
> > +				goto bad_bmap;
> > +			if (block != first_block + block_in_page) {
> > +				/* Discontiguity */
> > +				probe_block++;
> > +				goto reprobe;
> > +			}
> > +		}
> > +
> > +		first_block >>= (PAGE_SHIFT - blkbits);
> > +		if (page_no) {	/* exclude the header page */
> > +			if (first_block < lowest_block)
> > +				lowest_block = first_block;
> > +			if (first_block > highest_block)
> > +				highest_block = first_block;
> > +		}
> > +
> > +		page_no++;
> > +		probe_block += blocks_per_page;
> > +reprobe:
> > +		continue;
> > +	}
> > +	return 0;
> > +
> > +bad_bmap:
> > +	pr_err("swapon: swapfile has holes\n");
> > +	return -EINVAL;
> > +}
> > +
> > +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> > +				sector_t *span)
> > +{
> > +	struct inode *inode = file_inode(file);
> > +	int ret;
> > +
> > +	if (!S_ISREG(inode->i_mode))
> > +		return -EINVAL;
> > +
> > +	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> > +		return -EROFS;
> > +
> > +	ret = f2fs_convert_inline_inode(inode);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = check_swap_activate(file, sis->max);
> > +	if (ret)
> > +		return ret;
> > +
> > +	set_inode_flag(inode, FI_PIN_FILE);
> > +	f2fs_precache_extents(inode);
> > +	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> > +	return 0;
> > +}
> > +
> > +static void f2fs_swap_deactivate(struct file *file)
> > +{
> > +	struct inode *inode = file_inode(file);
> > +
> > +	clear_inode_flag(inode, FI_PIN_FILE);
> > +}
> > +#else
> > +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> > +				sector_t *span)
> > +{
> > +	return -EOPNOTSUPP;
> > +}
> > +
> > +static void f2fs_swap_deactivate(struct file *file)
> > +{
> > +}
> > +#endif
> > +
> >  const struct file_operations f2fs_file_operations = {
> >  	.llseek		= f2fs_llseek,
> >  	.read_iter	= generic_file_read_iter,
> > @@ -3482,4 +3602,6 @@ const struct file_operations f2fs_file_operations = {
> >  #endif
> >  	.splice_read	= generic_file_splice_read,
> >  	.splice_write	= iter_file_splice_write,
> > +	.swap_activate  = f2fs_swap_activate,
> > +	.swap_deactivate = f2fs_swap_deactivate,
> >  };
> > diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> > index 95dc90570786..1f82f92185d6 100644
> > --- a/fs/nfs/file.c
> > +++ b/fs/nfs/file.c
> > @@ -520,8 +520,6 @@ const struct address_space_operations nfs_file_aops = {
> >  	.launder_page = nfs_launder_page,
> >  	.is_dirty_writeback = nfs_check_dirty_writeback,
> >  	.error_remove_page = generic_error_remove_page,
> > -	.swap_activate = nfs_swap_activate,
> > -	.swap_deactivate = nfs_swap_deactivate,
> >  };
> >  
> >  /*
> > @@ -847,5 +845,7 @@ const struct file_operations nfs_file_operations = {
> >  	.splice_write	= iter_file_splice_write,
> >  	.check_flags	= nfs_check_flags,
> >  	.setlease	= simple_nosetlease,
> > +	.swap_activate = nfs_swap_activate,
> > +	.swap_deactivate = nfs_swap_deactivate,
> >  };
> >  EXPORT_SYMBOL_GPL(nfs_file_operations);
> > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> > index 3a688eb5c5ae..99f578a9ed90 100644
> > --- a/fs/xfs/xfs_aops.c
> > +++ b/fs/xfs/xfs_aops.c
> > @@ -631,17 +631,6 @@ xfs_vm_readpages(
> >  	return iomap_readpages(mapping, pages, nr_pages, &xfs_read_iomap_ops);
> >  }
> >  
> > -static int
> > -xfs_iomap_swapfile_activate(
> > -	struct swap_info_struct		*sis,
> > -	struct file			*swap_file,
> > -	sector_t			*span)
> > -{
> > -	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
> > -	return iomap_swapfile_activate(sis, swap_file, span,
> > -			&xfs_read_iomap_ops);
> > -}
> > -
> >  const struct address_space_operations xfs_address_space_operations = {
> >  	.readpage		= xfs_vm_readpage,
> >  	.readpages		= xfs_vm_readpages,
> > @@ -655,7 +644,6 @@ const struct address_space_operations xfs_address_space_operations = {
> >  	.migratepage		= iomap_migrate_page,
> >  	.is_partially_uptodate  = iomap_is_partially_uptodate,
> >  	.error_remove_page	= generic_error_remove_page,
> > -	.swap_activate		= xfs_iomap_swapfile_activate,
> >  };
> >  
> >  const struct address_space_operations xfs_dax_aops = {
> > @@ -663,5 +651,4 @@ const struct address_space_operations xfs_dax_aops = {
> >  	.direct_IO		= noop_direct_IO,
> >  	.set_page_dirty		= noop_set_page_dirty,
> >  	.invalidatepage		= noop_invalidatepage,
> > -	.swap_activate		= xfs_iomap_swapfile_activate,
> >  };
> > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > index 865543e41fb4..225f58561f06 100644
> > --- a/fs/xfs/xfs_file.c
> > +++ b/fs/xfs/xfs_file.c
> > @@ -1294,6 +1294,17 @@ xfs_file_mmap(
> >  	return 0;
> >  }
> >  
> > +static int
> > +xfs_file_swap_activate(
> > +	struct swap_info_struct		*sis,
> > +	struct file			*swap_file,
> > +	sector_t			*span)
> > +{
> > +	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
> > +	return iomap_swapfile_activate(sis, swap_file, span,
> > +			&xfs_read_iomap_ops);
> > +}
> > +
> >  const struct file_operations xfs_file_operations = {
> >  	.llseek		= xfs_file_llseek,
> >  	.read_iter	= xfs_file_read_iter,
> > @@ -1314,6 +1325,7 @@ const struct file_operations xfs_file_operations = {
> >  	.fallocate	= xfs_file_fallocate,
> >  	.fadvise	= xfs_file_fadvise,
> >  	.remap_file_range = xfs_file_remap_range,
> > +	.swap_activate	= xfs_file_swap_activate,
> >  };
> >  
> >  const struct file_operations xfs_dir_file_operations = {
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index 83e011e0df7f..1175815da3df 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -402,11 +402,6 @@ struct address_space_operations {
> >  					unsigned long);
> >  	void (*is_dirty_writeback) (struct page *, bool *, bool *);
> >  	int (*error_remove_page)(struct address_space *, struct page *);
> > -
> > -	/* swapfile support */
> > -	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
> > -				sector_t *span);
> > -	void (*swap_deactivate)(struct file *file);
> >  };
> >  
> >  extern const struct address_space_operations empty_aops;
> > @@ -1858,6 +1853,11 @@ struct file_operations {
> >  				   struct file *file_out, loff_t pos_out,
> >  				   loff_t len, unsigned int remap_flags);
> >  	int (*fadvise)(struct file *, loff_t, loff_t, int);
> > +
> > +	/* swapfile support */
> > +	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
> > +				sector_t *span);
> > +	void (*swap_deactivate)(struct file *file);
> >  } __randomize_layout;
> >  
> >  struct inode_operations {
> > diff --git a/mm/swapfile.c b/mm/swapfile.c
> > index bb3261d45b6a..d2de8d668708 100644
> > --- a/mm/swapfile.c
> > +++ b/mm/swapfile.c
> > @@ -2293,11 +2293,10 @@ static void destroy_swap_extents(struct swap_info_struct *sis)
> >  
> >  	if (sis->flags & SWP_ACTIVATED) {
> >  		struct file *swap_file = sis->swap_file;
> > -		struct address_space *mapping = swap_file->f_mapping;
> >  
> >  		sis->flags &= ~SWP_ACTIVATED;
> > -		if (mapping->a_ops->swap_deactivate)
> > -			mapping->a_ops->swap_deactivate(swap_file);
> > +		if (swap_file->f_op->swap_deactivate)
> > +			swap_file->f_op->swap_deactivate(swap_file);
> >  	}
> >  }
> >  
> > @@ -2381,8 +2380,7 @@ EXPORT_SYMBOL_GPL(add_swap_extent);
> >  static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
> >  {
> >  	struct file *swap_file = sis->swap_file;
> > -	struct address_space *mapping = swap_file->f_mapping;
> > -	struct inode *inode = mapping->host;
> > +	struct inode *inode = swap_file->f_mapping->host;
> >  	int ret;
> >  
> >  	if (S_ISBLK(inode->i_mode)) {
> > @@ -2391,8 +2389,8 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
> >  		return ret;
> >  	}
> >  
> > -	if (mapping->a_ops->swap_activate) {
> > -		ret = mapping->a_ops->swap_activate(sis, swap_file, span);
> > +	if (swap_file->f_op->swap_activate) {
> > +		ret = swap_file->f_op->swap_activate(sis, swap_file, span);
> >  		if (ret >= 0)
> >  			sis->flags |= SWP_ACTIVATED;
> >  		if (!ret) {
> > -- 
> > 2.21.0
> >
diff mbox series

Patch

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0cb43b682789..117502311fe0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -16,6 +16,7 @@ 
 #include <linux/btrfs.h>
 #include <linux/uio.h>
 #include <linux/iversion.h>
+#include <linux/swap.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -27,6 +28,7 @@ 
 #include "qgroup.h"
 #include "compression.h"
 #include "delalloc-space.h"
+#include "block-group.h"
 
 static struct kmem_cache *btrfs_inode_defrag_cachep;
 /*
@@ -3444,6 +3446,343 @@  static int btrfs_file_open(struct inode *inode, struct file *filp)
 	return generic_file_open(inode, filp);
 }
 
+#ifdef CONFIG_SWAP
+/*
+ * Add an entry indicating a block group or device which is pinned by a
+ * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a
+ * negative errno on failure.
+ */
+static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
+				  bool is_block_group)
+{
+	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+	struct btrfs_swapfile_pin *sp, *entry;
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+
+	sp = kmalloc(sizeof(*sp), GFP_NOFS);
+	if (!sp)
+		return -ENOMEM;
+	sp->ptr = ptr;
+	sp->inode = inode;
+	sp->is_block_group = is_block_group;
+
+	spin_lock(&fs_info->swapfile_pins_lock);
+	p = &fs_info->swapfile_pins.rb_node;
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
+		if (sp->ptr < entry->ptr ||
+		    (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
+			p = &(*p)->rb_left;
+		} else if (sp->ptr > entry->ptr ||
+			   (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
+			p = &(*p)->rb_right;
+		} else {
+			spin_unlock(&fs_info->swapfile_pins_lock);
+			kfree(sp);
+			return 1;
+		}
+	}
+	rb_link_node(&sp->node, parent, p);
+	rb_insert_color(&sp->node, &fs_info->swapfile_pins);
+	spin_unlock(&fs_info->swapfile_pins_lock);
+	return 0;
+}
+
+/* Free all of the entries pinned by this swapfile. */
+static void btrfs_free_swapfile_pins(struct inode *inode)
+{
+	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+	struct btrfs_swapfile_pin *sp;
+	struct rb_node *node, *next;
+
+	spin_lock(&fs_info->swapfile_pins_lock);
+	node = rb_first(&fs_info->swapfile_pins);
+	while (node) {
+		next = rb_next(node);
+		sp = rb_entry(node, struct btrfs_swapfile_pin, node);
+		if (sp->inode == inode) {
+			rb_erase(&sp->node, &fs_info->swapfile_pins);
+			if (sp->is_block_group)
+				btrfs_put_block_group(sp->ptr);
+			kfree(sp);
+		}
+		node = next;
+	}
+	spin_unlock(&fs_info->swapfile_pins_lock);
+}
+
+struct btrfs_swap_info {
+	u64 start;
+	u64 block_start;
+	u64 block_len;
+	u64 lowest_ppage;
+	u64 highest_ppage;
+	unsigned long nr_pages;
+	int nr_extents;
+};
+
+static int btrfs_add_swap_extent(struct swap_info_struct *sis,
+				 struct btrfs_swap_info *bsi)
+{
+	unsigned long nr_pages;
+	u64 first_ppage, first_ppage_reported, next_ppage;
+	int ret;
+
+	first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
+	next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
+				PAGE_SIZE) >> PAGE_SHIFT;
+
+	if (first_ppage >= next_ppage)
+		return 0;
+	nr_pages = next_ppage - first_ppage;
+
+	first_ppage_reported = first_ppage;
+	if (bsi->start == 0)
+		first_ppage_reported++;
+	if (bsi->lowest_ppage > first_ppage_reported)
+		bsi->lowest_ppage = first_ppage_reported;
+	if (bsi->highest_ppage < (next_ppage - 1))
+		bsi->highest_ppage = next_ppage - 1;
+
+	ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
+	if (ret < 0)
+		return ret;
+	bsi->nr_extents += ret;
+	bsi->nr_pages += nr_pages;
+	return 0;
+}
+
+static void btrfs_swap_deactivate(struct file *file)
+{
+	struct inode *inode = file_inode(file);
+
+	btrfs_free_swapfile_pins(inode);
+	atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
+}
+
+static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
+			       sector_t *span)
+{
+	struct inode *inode = file_inode(file);
+	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+	struct extent_state *cached_state = NULL;
+	struct extent_map *em = NULL;
+	struct btrfs_device *device = NULL;
+	struct btrfs_swap_info bsi = {
+		.lowest_ppage = (sector_t)-1ULL,
+	};
+	int ret = 0;
+	u64 isize;
+	u64 start;
+
+	/*
+	 * If the swap file was just created, make sure delalloc is done. If the
+	 * file changes again after this, the user is doing something stupid and
+	 * we don't really care.
+	 */
+	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
+	if (ret)
+		return ret;
+
+	/*
+	 * The inode is locked, so these flags won't change after we check them.
+	 */
+	if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
+		btrfs_warn(fs_info, "swapfile must not be compressed");
+		return -EINVAL;
+	}
+	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
+		btrfs_warn(fs_info, "swapfile must not be copy-on-write");
+		return -EINVAL;
+	}
+	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+		btrfs_warn(fs_info, "swapfile must not be checksummed");
+		return -EINVAL;
+	}
+
+	/*
+	 * Balance or device remove/replace/resize can move stuff around from
+	 * under us. The EXCL_OP flag makes sure they aren't running/won't run
+	 * concurrently while we are mapping the swap extents, and
+	 * fs_info->swapfile_pins prevents them from running while the swap file
+	 * is active and moving the extents. Note that this also prevents a
+	 * concurrent device add which isn't actually necessary, but it's not
+	 * really worth the trouble to allow it.
+	 */
+	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
+		btrfs_warn(fs_info,
+	   "cannot activate swapfile while exclusive operation is running");
+		return -EBUSY;
+	}
+	/*
+	 * Snapshots can create extents which require COW even if NODATACOW is
+	 * set. We use this counter to prevent snapshots. We must increment it
+	 * before walking the extents because we don't want a concurrent
+	 * snapshot to run after we've already checked the extents.
+	 */
+	atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
+
+	isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
+
+	lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
+	start = 0;
+	while (start < isize) {
+		u64 logical_block_start, physical_block_start;
+		struct btrfs_block_group_cache *bg;
+		u64 len = isize - start;
+
+		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
+		if (IS_ERR(em)) {
+			ret = PTR_ERR(em);
+			goto out;
+		}
+
+		if (em->block_start == EXTENT_MAP_HOLE) {
+			btrfs_warn(fs_info, "swapfile must not have holes");
+			ret = -EINVAL;
+			goto out;
+		}
+		if (em->block_start == EXTENT_MAP_INLINE) {
+			/*
+			 * It's unlikely we'll ever actually find ourselves
+			 * here, as a file small enough to fit inline won't be
+			 * big enough to store more than the swap header, but in
+			 * case something changes in the future, let's catch it
+			 * here rather than later.
+			 */
+			btrfs_warn(fs_info, "swapfile must not be inline");
+			ret = -EINVAL;
+			goto out;
+		}
+		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
+			btrfs_warn(fs_info, "swapfile must not be compressed");
+			ret = -EINVAL;
+			goto out;
+		}
+
+		logical_block_start = em->block_start + (start - em->start);
+		len = min(len, em->len - (start - em->start));
+		free_extent_map(em);
+		em = NULL;
+
+		ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
+		if (ret < 0) {
+			goto out;
+		} else if (ret) {
+			ret = 0;
+		} else {
+			btrfs_warn(fs_info,
+				   "swapfile must not be copy-on-write");
+			ret = -EINVAL;
+			goto out;
+		}
+
+		em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
+		if (IS_ERR(em)) {
+			ret = PTR_ERR(em);
+			goto out;
+		}
+
+		if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+			btrfs_warn(fs_info,
+				   "swapfile must have single data profile");
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (device == NULL) {
+			device = em->map_lookup->stripes[0].dev;
+			ret = btrfs_add_swapfile_pin(inode, device, false);
+			if (ret == 1)
+				ret = 0;
+			else if (ret)
+				goto out;
+		} else if (device != em->map_lookup->stripes[0].dev) {
+			btrfs_warn(fs_info, "swapfile must be on one device");
+			ret = -EINVAL;
+			goto out;
+		}
+
+		physical_block_start = (em->map_lookup->stripes[0].physical +
+					(logical_block_start - em->start));
+		len = min(len, em->len - (logical_block_start - em->start));
+		free_extent_map(em);
+		em = NULL;
+
+		bg = btrfs_lookup_block_group(fs_info, logical_block_start);
+		if (!bg) {
+			btrfs_warn(fs_info,
+			   "could not find block group containing swapfile");
+			ret = -EINVAL;
+			goto out;
+		}
+
+		ret = btrfs_add_swapfile_pin(inode, bg, true);
+		if (ret) {
+			btrfs_put_block_group(bg);
+			if (ret == 1)
+				ret = 0;
+			else
+				goto out;
+		}
+
+		if (bsi.block_len &&
+		    bsi.block_start + bsi.block_len == physical_block_start) {
+			bsi.block_len += len;
+		} else {
+			if (bsi.block_len) {
+				ret = btrfs_add_swap_extent(sis, &bsi);
+				if (ret)
+					goto out;
+			}
+			bsi.start = start;
+			bsi.block_start = physical_block_start;
+			bsi.block_len = len;
+		}
+
+		start += len;
+	}
+
+	if (bsi.block_len)
+		ret = btrfs_add_swap_extent(sis, &bsi);
+
+out:
+	if (!IS_ERR_OR_NULL(em))
+		free_extent_map(em);
+
+	unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
+
+	if (ret)
+		btrfs_swap_deactivate(file);
+
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+
+	if (ret)
+		return ret;
+
+	if (device)
+		sis->bdev = device->bdev;
+	*span = bsi.highest_ppage - bsi.lowest_ppage + 1;
+	sis->max = bsi.nr_pages;
+	sis->pages = bsi.nr_pages - 1;
+	sis->highest_bit = bsi.nr_pages - 1;
+	return bsi.nr_extents;
+}
+#else
+static void btrfs_swap_deactivate(struct file *file)
+{
+}
+
+static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
+			       sector_t *span)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 const struct file_operations btrfs_file_operations = {
 	.llseek		= btrfs_file_llseek,
 	.read_iter      = generic_file_read_iter,
@@ -3459,6 +3798,8 @@  const struct file_operations btrfs_file_operations = {
 	.compat_ioctl	= btrfs_compat_ioctl,
 #endif
 	.remap_file_range = btrfs_remap_file_range,
+	.swap_activate	= btrfs_swap_activate,
+	.swap_deactivate = btrfs_swap_deactivate,
 };
 
 void __cold btrfs_auto_defrag_exit(void)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6d159df7b536..c11b86f2bf24 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -27,7 +27,6 @@ 
 #include <linux/uio.h>
 #include <linux/magic.h>
 #include <linux/iversion.h>
-#include <linux/swap.h>
 #include <linux/sched/mm.h>
 #include <asm/unaligned.h>
 #include "misc.h"
@@ -10629,343 +10628,6 @@  void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
 	}
 }
 
-#ifdef CONFIG_SWAP
-/*
- * Add an entry indicating a block group or device which is pinned by a
- * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a
- * negative errno on failure.
- */
-static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
-				  bool is_block_group)
-{
-	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
-	struct btrfs_swapfile_pin *sp, *entry;
-	struct rb_node **p;
-	struct rb_node *parent = NULL;
-
-	sp = kmalloc(sizeof(*sp), GFP_NOFS);
-	if (!sp)
-		return -ENOMEM;
-	sp->ptr = ptr;
-	sp->inode = inode;
-	sp->is_block_group = is_block_group;
-
-	spin_lock(&fs_info->swapfile_pins_lock);
-	p = &fs_info->swapfile_pins.rb_node;
-	while (*p) {
-		parent = *p;
-		entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
-		if (sp->ptr < entry->ptr ||
-		    (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
-			p = &(*p)->rb_left;
-		} else if (sp->ptr > entry->ptr ||
-			   (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
-			p = &(*p)->rb_right;
-		} else {
-			spin_unlock(&fs_info->swapfile_pins_lock);
-			kfree(sp);
-			return 1;
-		}
-	}
-	rb_link_node(&sp->node, parent, p);
-	rb_insert_color(&sp->node, &fs_info->swapfile_pins);
-	spin_unlock(&fs_info->swapfile_pins_lock);
-	return 0;
-}
-
-/* Free all of the entries pinned by this swapfile. */
-static void btrfs_free_swapfile_pins(struct inode *inode)
-{
-	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
-	struct btrfs_swapfile_pin *sp;
-	struct rb_node *node, *next;
-
-	spin_lock(&fs_info->swapfile_pins_lock);
-	node = rb_first(&fs_info->swapfile_pins);
-	while (node) {
-		next = rb_next(node);
-		sp = rb_entry(node, struct btrfs_swapfile_pin, node);
-		if (sp->inode == inode) {
-			rb_erase(&sp->node, &fs_info->swapfile_pins);
-			if (sp->is_block_group)
-				btrfs_put_block_group(sp->ptr);
-			kfree(sp);
-		}
-		node = next;
-	}
-	spin_unlock(&fs_info->swapfile_pins_lock);
-}
-
-struct btrfs_swap_info {
-	u64 start;
-	u64 block_start;
-	u64 block_len;
-	u64 lowest_ppage;
-	u64 highest_ppage;
-	unsigned long nr_pages;
-	int nr_extents;
-};
-
-static int btrfs_add_swap_extent(struct swap_info_struct *sis,
-				 struct btrfs_swap_info *bsi)
-{
-	unsigned long nr_pages;
-	u64 first_ppage, first_ppage_reported, next_ppage;
-	int ret;
-
-	first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
-	next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
-				PAGE_SIZE) >> PAGE_SHIFT;
-
-	if (first_ppage >= next_ppage)
-		return 0;
-	nr_pages = next_ppage - first_ppage;
-
-	first_ppage_reported = first_ppage;
-	if (bsi->start == 0)
-		first_ppage_reported++;
-	if (bsi->lowest_ppage > first_ppage_reported)
-		bsi->lowest_ppage = first_ppage_reported;
-	if (bsi->highest_ppage < (next_ppage - 1))
-		bsi->highest_ppage = next_ppage - 1;
-
-	ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
-	if (ret < 0)
-		return ret;
-	bsi->nr_extents += ret;
-	bsi->nr_pages += nr_pages;
-	return 0;
-}
-
-static void btrfs_swap_deactivate(struct file *file)
-{
-	struct inode *inode = file_inode(file);
-
-	btrfs_free_swapfile_pins(inode);
-	atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
-}
-
-static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
-			       sector_t *span)
-{
-	struct inode *inode = file_inode(file);
-	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
-	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
-	struct extent_state *cached_state = NULL;
-	struct extent_map *em = NULL;
-	struct btrfs_device *device = NULL;
-	struct btrfs_swap_info bsi = {
-		.lowest_ppage = (sector_t)-1ULL,
-	};
-	int ret = 0;
-	u64 isize;
-	u64 start;
-
-	/*
-	 * If the swap file was just created, make sure delalloc is done. If the
-	 * file changes again after this, the user is doing something stupid and
-	 * we don't really care.
-	 */
-	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
-	if (ret)
-		return ret;
-
-	/*
-	 * The inode is locked, so these flags won't change after we check them.
-	 */
-	if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
-		btrfs_warn(fs_info, "swapfile must not be compressed");
-		return -EINVAL;
-	}
-	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
-		btrfs_warn(fs_info, "swapfile must not be copy-on-write");
-		return -EINVAL;
-	}
-	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
-		btrfs_warn(fs_info, "swapfile must not be checksummed");
-		return -EINVAL;
-	}
-
-	/*
-	 * Balance or device remove/replace/resize can move stuff around from
-	 * under us. The EXCL_OP flag makes sure they aren't running/won't run
-	 * concurrently while we are mapping the swap extents, and
-	 * fs_info->swapfile_pins prevents them from running while the swap file
-	 * is active and moving the extents. Note that this also prevents a
-	 * concurrent device add which isn't actually necessary, but it's not
-	 * really worth the trouble to allow it.
-	 */
-	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
-		btrfs_warn(fs_info,
-	   "cannot activate swapfile while exclusive operation is running");
-		return -EBUSY;
-	}
-	/*
-	 * Snapshots can create extents which require COW even if NODATACOW is
-	 * set. We use this counter to prevent snapshots. We must increment it
-	 * before walking the extents because we don't want a concurrent
-	 * snapshot to run after we've already checked the extents.
-	 */
-	atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
-
-	isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
-
-	lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
-	start = 0;
-	while (start < isize) {
-		u64 logical_block_start, physical_block_start;
-		struct btrfs_block_group_cache *bg;
-		u64 len = isize - start;
-
-		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
-		if (IS_ERR(em)) {
-			ret = PTR_ERR(em);
-			goto out;
-		}
-
-		if (em->block_start == EXTENT_MAP_HOLE) {
-			btrfs_warn(fs_info, "swapfile must not have holes");
-			ret = -EINVAL;
-			goto out;
-		}
-		if (em->block_start == EXTENT_MAP_INLINE) {
-			/*
-			 * It's unlikely we'll ever actually find ourselves
-			 * here, as a file small enough to fit inline won't be
-			 * big enough to store more than the swap header, but in
-			 * case something changes in the future, let's catch it
-			 * here rather than later.
-			 */
-			btrfs_warn(fs_info, "swapfile must not be inline");
-			ret = -EINVAL;
-			goto out;
-		}
-		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
-			btrfs_warn(fs_info, "swapfile must not be compressed");
-			ret = -EINVAL;
-			goto out;
-		}
-
-		logical_block_start = em->block_start + (start - em->start);
-		len = min(len, em->len - (start - em->start));
-		free_extent_map(em);
-		em = NULL;
-
-		ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
-		if (ret < 0) {
-			goto out;
-		} else if (ret) {
-			ret = 0;
-		} else {
-			btrfs_warn(fs_info,
-				   "swapfile must not be copy-on-write");
-			ret = -EINVAL;
-			goto out;
-		}
-
-		em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
-		if (IS_ERR(em)) {
-			ret = PTR_ERR(em);
-			goto out;
-		}
-
-		if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
-			btrfs_warn(fs_info,
-				   "swapfile must have single data profile");
-			ret = -EINVAL;
-			goto out;
-		}
-
-		if (device == NULL) {
-			device = em->map_lookup->stripes[0].dev;
-			ret = btrfs_add_swapfile_pin(inode, device, false);
-			if (ret == 1)
-				ret = 0;
-			else if (ret)
-				goto out;
-		} else if (device != em->map_lookup->stripes[0].dev) {
-			btrfs_warn(fs_info, "swapfile must be on one device");
-			ret = -EINVAL;
-			goto out;
-		}
-
-		physical_block_start = (em->map_lookup->stripes[0].physical +
-					(logical_block_start - em->start));
-		len = min(len, em->len - (logical_block_start - em->start));
-		free_extent_map(em);
-		em = NULL;
-
-		bg = btrfs_lookup_block_group(fs_info, logical_block_start);
-		if (!bg) {
-			btrfs_warn(fs_info,
-			   "could not find block group containing swapfile");
-			ret = -EINVAL;
-			goto out;
-		}
-
-		ret = btrfs_add_swapfile_pin(inode, bg, true);
-		if (ret) {
-			btrfs_put_block_group(bg);
-			if (ret == 1)
-				ret = 0;
-			else
-				goto out;
-		}
-
-		if (bsi.block_len &&
-		    bsi.block_start + bsi.block_len == physical_block_start) {
-			bsi.block_len += len;
-		} else {
-			if (bsi.block_len) {
-				ret = btrfs_add_swap_extent(sis, &bsi);
-				if (ret)
-					goto out;
-			}
-			bsi.start = start;
-			bsi.block_start = physical_block_start;
-			bsi.block_len = len;
-		}
-
-		start += len;
-	}
-
-	if (bsi.block_len)
-		ret = btrfs_add_swap_extent(sis, &bsi);
-
-out:
-	if (!IS_ERR_OR_NULL(em))
-		free_extent_map(em);
-
-	unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
-
-	if (ret)
-		btrfs_swap_deactivate(file);
-
-	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
-
-	if (ret)
-		return ret;
-
-	if (device)
-		sis->bdev = device->bdev;
-	*span = bsi.highest_ppage - bsi.lowest_ppage + 1;
-	sis->max = bsi.nr_pages;
-	sis->pages = bsi.nr_pages - 1;
-	sis->highest_bit = bsi.nr_pages - 1;
-	return bsi.nr_extents;
-}
-#else
-static void btrfs_swap_deactivate(struct file *file)
-{
-}
-
-static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
-			       sector_t *span)
-{
-	return -EOPNOTSUPP;
-}
-#endif
-
 static const struct inode_operations btrfs_dir_inode_operations = {
 	.getattr	= btrfs_getattr,
 	.lookup		= btrfs_lookup,
@@ -11032,8 +10694,6 @@  static const struct address_space_operations btrfs_aops = {
 	.releasepage	= btrfs_releasepage,
 	.set_page_dirty	= btrfs_set_page_dirty,
 	.error_remove_page = generic_error_remove_page,
-	.swap_activate	= btrfs_swap_activate,
-	.swap_deactivate = btrfs_swap_deactivate,
 };
 
 static const struct inode_operations btrfs_file_inode_operations = {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3c7777bfae17..04b2a8f44fa9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -14,7 +14,6 @@ 
 #include <linux/pagevec.h>
 #include <linux/blkdev.h>
 #include <linux/bio.h>
-#include <linux/swap.h>
 #include <linux/prefetch.h>
 #include <linux/uio.h>
 #include <linux/cleancache.h>
@@ -3142,125 +3141,6 @@  int f2fs_migrate_page(struct address_space *mapping,
 }
 #endif
 
-#ifdef CONFIG_SWAP
-/* Copied from generic_swapfile_activate() to check any holes */
-static int check_swap_activate(struct file *swap_file, unsigned int max)
-{
-	struct inode *inode = swap_file->f_mapping->host;
-	unsigned blocks_per_page;
-	unsigned long page_no;
-	unsigned blkbits;
-	sector_t probe_block;
-	sector_t last_block;
-	sector_t lowest_block = -1;
-	sector_t highest_block = 0;
-
-	blkbits = inode->i_blkbits;
-	blocks_per_page = PAGE_SIZE >> blkbits;
-
-	/*
-	 * Map all the blocks into the extent list.  This code doesn't try
-	 * to be very smart.
-	 */
-	probe_block = 0;
-	page_no = 0;
-	last_block = i_size_read(inode) >> blkbits;
-	while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
-		unsigned block_in_page;
-		sector_t first_block;
-
-		cond_resched();
-
-		first_block = bmap(inode, probe_block);
-		if (first_block == 0)
-			goto bad_bmap;
-
-		/*
-		 * It must be PAGE_SIZE aligned on-disk
-		 */
-		if (first_block & (blocks_per_page - 1)) {
-			probe_block++;
-			goto reprobe;
-		}
-
-		for (block_in_page = 1; block_in_page < blocks_per_page;
-					block_in_page++) {
-			sector_t block;
-
-			block = bmap(inode, probe_block + block_in_page);
-			if (block == 0)
-				goto bad_bmap;
-			if (block != first_block + block_in_page) {
-				/* Discontiguity */
-				probe_block++;
-				goto reprobe;
-			}
-		}
-
-		first_block >>= (PAGE_SHIFT - blkbits);
-		if (page_no) {	/* exclude the header page */
-			if (first_block < lowest_block)
-				lowest_block = first_block;
-			if (first_block > highest_block)
-				highest_block = first_block;
-		}
-
-		page_no++;
-		probe_block += blocks_per_page;
-reprobe:
-		continue;
-	}
-	return 0;
-
-bad_bmap:
-	pr_err("swapon: swapfile has holes\n");
-	return -EINVAL;
-}
-
-static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
-				sector_t *span)
-{
-	struct inode *inode = file_inode(file);
-	int ret;
-
-	if (!S_ISREG(inode->i_mode))
-		return -EINVAL;
-
-	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
-		return -EROFS;
-
-	ret = f2fs_convert_inline_inode(inode);
-	if (ret)
-		return ret;
-
-	ret = check_swap_activate(file, sis->max);
-	if (ret)
-		return ret;
-
-	set_inode_flag(inode, FI_PIN_FILE);
-	f2fs_precache_extents(inode);
-	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
-	return 0;
-}
-
-static void f2fs_swap_deactivate(struct file *file)
-{
-	struct inode *inode = file_inode(file);
-
-	clear_inode_flag(inode, FI_PIN_FILE);
-}
-#else
-static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
-				sector_t *span)
-{
-	return -EOPNOTSUPP;
-}
-
-static void f2fs_swap_deactivate(struct file *file)
-{
-}
-#endif
-
 const struct address_space_operations f2fs_dblock_aops = {
 	.readpage	= f2fs_read_data_page,
 	.readpages	= f2fs_read_data_pages,
@@ -3273,8 +3153,6 @@  const struct address_space_operations f2fs_dblock_aops = {
 	.releasepage	= f2fs_release_page,
 	.direct_IO	= f2fs_direct_IO,
 	.bmap		= f2fs_bmap,
-	.swap_activate  = f2fs_swap_activate,
-	.swap_deactivate = f2fs_swap_deactivate,
 #ifdef CONFIG_MIGRATION
 	.migratepage    = f2fs_migrate_page,
 #endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 483ad22a0946..de7f9cf36689 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -21,6 +21,7 @@ 
 #include <linux/uuid.h>
 #include <linux/file.h>
 #include <linux/nls.h>
+#include <linux/swap.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -3466,6 +3467,125 @@  long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 }
 #endif
 
+#ifdef CONFIG_SWAP
+/* Copied from generic_swapfile_activate() to check any holes */
+static int check_swap_activate(struct file *swap_file, unsigned int max)
+{
+	struct inode *inode = swap_file->f_mapping->host;
+	unsigned blocks_per_page;
+	unsigned long page_no;
+	unsigned blkbits;
+	sector_t probe_block;
+	sector_t last_block;
+	sector_t lowest_block = -1;
+	sector_t highest_block = 0;
+
+	blkbits = inode->i_blkbits;
+	blocks_per_page = PAGE_SIZE >> blkbits;
+
+	/*
+	 * Map all the blocks into the extent list.  This code doesn't try
+	 * to be very smart.
+	 */
+	probe_block = 0;
+	page_no = 0;
+	last_block = i_size_read(inode) >> blkbits;
+	while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
+		unsigned block_in_page;
+		sector_t first_block;
+
+		cond_resched();
+
+		first_block = bmap(inode, probe_block);
+		if (first_block == 0)
+			goto bad_bmap;
+
+		/*
+		 * It must be PAGE_SIZE aligned on-disk
+		 */
+		if (first_block & (blocks_per_page - 1)) {
+			probe_block++;
+			goto reprobe;
+		}
+
+		for (block_in_page = 1; block_in_page < blocks_per_page;
+					block_in_page++) {
+			sector_t block;
+
+			block = bmap(inode, probe_block + block_in_page);
+			if (block == 0)
+				goto bad_bmap;
+			if (block != first_block + block_in_page) {
+				/* Discontiguity */
+				probe_block++;
+				goto reprobe;
+			}
+		}
+
+		first_block >>= (PAGE_SHIFT - blkbits);
+		if (page_no) {	/* exclude the header page */
+			if (first_block < lowest_block)
+				lowest_block = first_block;
+			if (first_block > highest_block)
+				highest_block = first_block;
+		}
+
+		page_no++;
+		probe_block += blocks_per_page;
+reprobe:
+		continue;
+	}
+	return 0;
+
+bad_bmap:
+	pr_err("swapon: swapfile has holes\n");
+	return -EINVAL;
+}
+
+static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
+				sector_t *span)
+{
+	struct inode *inode = file_inode(file);
+	int ret;
+
+	if (!S_ISREG(inode->i_mode))
+		return -EINVAL;
+
+	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+		return -EROFS;
+
+	ret = f2fs_convert_inline_inode(inode);
+	if (ret)
+		return ret;
+
+	ret = check_swap_activate(file, sis->max);
+	if (ret)
+		return ret;
+
+	set_inode_flag(inode, FI_PIN_FILE);
+	f2fs_precache_extents(inode);
+	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+	return 0;
+}
+
+static void f2fs_swap_deactivate(struct file *file)
+{
+	struct inode *inode = file_inode(file);
+
+	clear_inode_flag(inode, FI_PIN_FILE);
+}
+#else
+static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
+				sector_t *span)
+{
+	return -EOPNOTSUPP;
+}
+
+static void f2fs_swap_deactivate(struct file *file)
+{
+}
+#endif
+
 const struct file_operations f2fs_file_operations = {
 	.llseek		= f2fs_llseek,
 	.read_iter	= generic_file_read_iter,
@@ -3482,4 +3602,6 @@  const struct file_operations f2fs_file_operations = {
 #endif
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
+	.swap_activate  = f2fs_swap_activate,
+	.swap_deactivate = f2fs_swap_deactivate,
 };
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 95dc90570786..1f82f92185d6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -520,8 +520,6 @@  const struct address_space_operations nfs_file_aops = {
 	.launder_page = nfs_launder_page,
 	.is_dirty_writeback = nfs_check_dirty_writeback,
 	.error_remove_page = generic_error_remove_page,
-	.swap_activate = nfs_swap_activate,
-	.swap_deactivate = nfs_swap_deactivate,
 };
 
 /*
@@ -847,5 +845,7 @@  const struct file_operations nfs_file_operations = {
 	.splice_write	= iter_file_splice_write,
 	.check_flags	= nfs_check_flags,
 	.setlease	= simple_nosetlease,
+	.swap_activate = nfs_swap_activate,
+	.swap_deactivate = nfs_swap_deactivate,
 };
 EXPORT_SYMBOL_GPL(nfs_file_operations);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a688eb5c5ae..99f578a9ed90 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -631,17 +631,6 @@  xfs_vm_readpages(
 	return iomap_readpages(mapping, pages, nr_pages, &xfs_read_iomap_ops);
 }
 
-static int
-xfs_iomap_swapfile_activate(
-	struct swap_info_struct		*sis,
-	struct file			*swap_file,
-	sector_t			*span)
-{
-	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
-	return iomap_swapfile_activate(sis, swap_file, span,
-			&xfs_read_iomap_ops);
-}
-
 const struct address_space_operations xfs_address_space_operations = {
 	.readpage		= xfs_vm_readpage,
 	.readpages		= xfs_vm_readpages,
@@ -655,7 +644,6 @@  const struct address_space_operations xfs_address_space_operations = {
 	.migratepage		= iomap_migrate_page,
 	.is_partially_uptodate  = iomap_is_partially_uptodate,
 	.error_remove_page	= generic_error_remove_page,
-	.swap_activate		= xfs_iomap_swapfile_activate,
 };
 
 const struct address_space_operations xfs_dax_aops = {
@@ -663,5 +651,4 @@  const struct address_space_operations xfs_dax_aops = {
 	.direct_IO		= noop_direct_IO,
 	.set_page_dirty		= noop_set_page_dirty,
 	.invalidatepage		= noop_invalidatepage,
-	.swap_activate		= xfs_iomap_swapfile_activate,
 };
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 865543e41fb4..225f58561f06 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1294,6 +1294,17 @@  xfs_file_mmap(
 	return 0;
 }
 
+static int
+xfs_file_swap_activate(
+	struct swap_info_struct		*sis,
+	struct file			*swap_file,
+	sector_t			*span)
+{
+	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
+	return iomap_swapfile_activate(sis, swap_file, span,
+			&xfs_read_iomap_ops);
+}
+
 const struct file_operations xfs_file_operations = {
 	.llseek		= xfs_file_llseek,
 	.read_iter	= xfs_file_read_iter,
@@ -1314,6 +1325,7 @@  const struct file_operations xfs_file_operations = {
 	.fallocate	= xfs_file_fallocate,
 	.fadvise	= xfs_file_fadvise,
 	.remap_file_range = xfs_file_remap_range,
+	.swap_activate	= xfs_file_swap_activate,
 };
 
 const struct file_operations xfs_dir_file_operations = {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 83e011e0df7f..1175815da3df 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -402,11 +402,6 @@  struct address_space_operations {
 					unsigned long);
 	void (*is_dirty_writeback) (struct page *, bool *, bool *);
 	int (*error_remove_page)(struct address_space *, struct page *);
-
-	/* swapfile support */
-	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
-				sector_t *span);
-	void (*swap_deactivate)(struct file *file);
 };
 
 extern const struct address_space_operations empty_aops;
@@ -1858,6 +1853,11 @@  struct file_operations {
 				   struct file *file_out, loff_t pos_out,
 				   loff_t len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
+
+	/* swapfile support */
+	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
+				sector_t *span);
+	void (*swap_deactivate)(struct file *file);
 } __randomize_layout;
 
 struct inode_operations {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index bb3261d45b6a..d2de8d668708 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2293,11 +2293,10 @@  static void destroy_swap_extents(struct swap_info_struct *sis)
 
 	if (sis->flags & SWP_ACTIVATED) {
 		struct file *swap_file = sis->swap_file;
-		struct address_space *mapping = swap_file->f_mapping;
 
 		sis->flags &= ~SWP_ACTIVATED;
-		if (mapping->a_ops->swap_deactivate)
-			mapping->a_ops->swap_deactivate(swap_file);
+		if (swap_file->f_op->swap_deactivate)
+			swap_file->f_op->swap_deactivate(swap_file);
 	}
 }
 
@@ -2381,8 +2380,7 @@  EXPORT_SYMBOL_GPL(add_swap_extent);
 static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
 {
 	struct file *swap_file = sis->swap_file;
-	struct address_space *mapping = swap_file->f_mapping;
-	struct inode *inode = mapping->host;
+	struct inode *inode = swap_file->f_mapping->host;
 	int ret;
 
 	if (S_ISBLK(inode->i_mode)) {
@@ -2391,8 +2389,8 @@  static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
 		return ret;
 	}
 
-	if (mapping->a_ops->swap_activate) {
-		ret = mapping->a_ops->swap_activate(sis, swap_file, span);
+	if (swap_file->f_op->swap_activate) {
+		ret = swap_file->f_op->swap_activate(sis, swap_file, span);
 		if (ret >= 0)
 			sis->flags |= SWP_ACTIVATED;
 		if (!ret) {