diff mbox series

[07/25] vfs: combine the clone and dedupe into a single remap_file_range

Message ID 153938919123.8361.13059492965161549195.stgit@magnolia (mailing list archive)
State New, archived
Headers show
Series fs: fixes for serious clone/dedupe problems | expand

Commit Message

Darrick J. Wong Oct. 13, 2018, 12:06 a.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Combine the clone_file_range and dedupe_file_range operations into a
single remap_file_range file operation dispatch since they're
fundamentally the same operation.  The differences between the two can
be made in the prep functions.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
---
 Documentation/filesystems/vfs.txt |   12 ++++------
 fs/btrfs/ctree.h                  |    8 ++-----
 fs/btrfs/file.c                   |    3 +-
 fs/btrfs/ioctl.c                  |   45 +++++++++++++++++++------------------
 fs/cifs/cifsfs.c                  |   22 +++++++++++-------
 fs/nfs/nfs4file.c                 |   10 ++++++--
 fs/ocfs2/file.c                   |   24 +++++++-------------
 fs/overlayfs/file.c               |   30 ++++++++++++++-----------
 fs/read_write.c                   |   18 +++++++--------
 fs/xfs/xfs_file.c                 |   23 ++++++-------------
 include/linux/fs.h                |   27 +++++++++++++++++++---
 11 files changed, 116 insertions(+), 106 deletions(-)

Comments

Christoph Hellwig Oct. 14, 2018, 5:19 p.m. UTC | #1
>  	unsigned (*mmap_capabilities)(struct file *);
>  #endif
>  	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
> -	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
> -	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
> +	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
> +				struct file *file_out, loff_t pos_out,
> +				u64 len, unsigned int remap_flags);

None of the other methods in this file name their parameters.  While
I generally don't like people leaving them out, in the end consistency
is even more important.

> +int btrfs_remap_file_range(struct file *src_file, loff_t off,
> +		struct file *dst_file, loff_t destoff, u64 len,
> +		unsigned int remap_flags)
>  {
> +	if (!remap_check_flags(remap_flags, RFR_SAME_DATA))
> +		return -EINVAL;
> +
> +	if (remap_flags & RFR_SAME_DATA) {

So at least for btrfs there seems to be no shared code at all below
the function calls.  This kinda speaks against the argument that
they fundamentally are the same..

> +/*
> + * These flags control the behavior of the remap_file_range function pointer.
> + *
> + * RFR_SAME_DATA: only remap if contents identical (i.e. deduplicate)
> + */
> +#define RFR_SAME_DATA		(1 << 0)
> +
> +#define RFR_VALID_FLAGS		(RFR_SAME_DATA)

RFR?  Why not REMAP_FILE_*  Also why not the well understood
REMAP_FILE_DEDUP instead of the odd SAME_DATA?

> +
> +/*
> + * Filesystem remapping implementations should call this helper on their
> + * remap flags to filter out flags that the implementation doesn't support.
> + *
> + * Returns true if the flags are ok, false otherwise.
> + */
> +static inline bool remap_check_flags(unsigned int remap_flags,
> +				     unsigned int supported_flags)
> +{
> +	return (remap_flags & ~(supported_flags & RFR_VALID_FLAGS)) == 0;
> +}

Any reason to even bother with a helper for this?  ->fallocate
seems to be doing fine without the helper, and the resulting code
seems a lot easier to understand to me.

> @@ -1759,10 +1779,9 @@ struct file_operations {
>  #endif
>  	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
>  			loff_t, size_t, unsigned int);
> -	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
> -			u64);
> -	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
> -			u64);
> +	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
> +				struct file *file_out, loff_t pos_out,
> +				u64 len, unsigned int remap_flags);

Same comment here.  Didn't we have some nice doc tools to avoid this
duplication? :)
Amir Goldstein Oct. 15, 2018, 6:04 a.m. UTC | #2
> > +/*
> > + * These flags control the behavior of the remap_file_range function pointer.
> > + *
> > + * RFR_SAME_DATA: only remap if contents identical (i.e. deduplicate)
> > + */
> > +#define RFR_SAME_DATA                (1 << 0)
> > +
> > +#define RFR_VALID_FLAGS              (RFR_SAME_DATA)
>
> RFR?  Why not REMAP_FILE_*  Also why not the well understood
> REMAP_FILE_DEDUP instead of the odd SAME_DATA?
>
> > +
> > +/*
> > + * Filesystem remapping implementations should call this helper on their
> > + * remap flags to filter out flags that the implementation doesn't support.
> > + *
> > + * Returns true if the flags are ok, false otherwise.
> > + */
> > +static inline bool remap_check_flags(unsigned int remap_flags,
> > +                                  unsigned int supported_flags)
> > +{
> > +     return (remap_flags & ~(supported_flags & RFR_VALID_FLAGS)) == 0;
> > +}
>
> Any reason to even bother with a helper for this?  ->fallocate
> seems to be doing fine without the helper, and the resulting code
> seems a lot easier to understand to me.

I supposed you figured out the reason already.
It makes it appearance in patch 16/25 as RFR_VFS_FLAGS.
All those "advisory" flags, we want to pass them in to filesystem as FYI,
but we don't want to explicitly add support for e.g. RFR_CAN_SHORTEN
to every filesystem, when vfs has already taken care of the advice.

The reason a similar helper doesn't make sense for ->fallocate()
is because vfs does not take any action on behalf of filesystem
nor does vfs pass any internal flags to filesystem.

I argued that fiemap_check_flags() should similarly mask out
FIEMAP_FLAG_SYNC before checking supported fs_flags,
because ioctl_fiemap() respects this flag regardless if filesystem
(or generic helper) declares support for FIEMAP_FLAG_SYNC.

Thanks,
Amir.
Christoph Hellwig Oct. 15, 2018, 12:47 p.m. UTC | #3
On Mon, Oct 15, 2018 at 09:04:13AM +0300, Amir Goldstein wrote:
> I supposed you figured out the reason already.

No, I hadn't.

> It makes it appearance in patch 16/25 as RFR_VFS_FLAGS.
> All those "advisory" flags, we want to pass them in to filesystem as FYI,
> but we don't want to explicitly add support for e.g. RFR_CAN_SHORTEN
> to every filesystem, when vfs has already taken care of the advice.

I don't think this model makes sense.  If they really are purely
handled in the VFS we can mask them before passing them to the file
system, if not we need to check them, or the they are avisory and
we can have a simple #define instead of the helper.

RFR_TO_SRC_EOF is checked in generic_remap_file_range_prep,
so the file system should know about it  Also looking at it again now
it seems entirely superflous - we can just pass down then len == we
use in higher level code instead of having a flag and will side step
the issue here.

RFR_CAN_SHORTEN is advisory as no one has to shorten, but that can
easily be solved by including it everywhere.

RFR_SHORT_DEDUPE is as far as I can tell entirely superflous to
start with, as RFR_CAN_SHORTEN can be used instead.

So something like this in fs.h:

#define REMAP_FILE_ADVISORY_FLAGS	REMAP_FILE_CAN_SHORTEN

And then in the file system:

	if (flags & ~REMAP_FILE_ADVISORY_FLAGS)
		-EINVAL;

or

	if (flags & ~(REMAP_FILE_ADVISORY_FLAGS | REMAP_FILE_DEDUP))
		-EINVAL;

should be all that is needed.
Amir Goldstein Oct. 15, 2018, 12:54 p.m. UTC | #4
On Mon, Oct 15, 2018 at 3:47 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Mon, Oct 15, 2018 at 09:04:13AM +0300, Amir Goldstein wrote:
> > I supposed you figured out the reason already.
>
> No, I hadn't.
>
> > It makes it appearance in patch 16/25 as RFR_VFS_FLAGS.
> > All those "advisory" flags, we want to pass them in to filesystem as FYI,
> > but we don't want to explicitly add support for e.g. RFR_CAN_SHORTEN
> > to every filesystem, when vfs has already taken care of the advice.
>
> I don't think this model makes sense.  If they really are purely
> handled in the VFS we can mask them before passing them to the file
> system, if not we need to check them, or the they are avisory and
> we can have a simple #define instead of the helper.
>
> RFR_TO_SRC_EOF is checked in generic_remap_file_range_prep,
> so the file system should know about it  Also looking at it again now
> it seems entirely superflous - we can just pass down then len == we
> use in higher level code instead of having a flag and will side step
> the issue here.
>
> RFR_CAN_SHORTEN is advisory as no one has to shorten, but that can
> easily be solved by including it everywhere.
>
> RFR_SHORT_DEDUPE is as far as I can tell entirely superflous to
> start with, as RFR_CAN_SHORTEN can be used instead.
>
> So something like this in fs.h:
>
> #define REMAP_FILE_ADVISORY_FLAGS       REMAP_FILE_CAN_SHORTEN
>
> And then in the file system:
>
>         if (flags & ~REMAP_FILE_ADVISORY_FLAGS)
>                 -EINVAL;
>
> or
>
>         if (flags & ~(REMAP_FILE_ADVISORY_FLAGS | REMAP_FILE_DEDUP))
>                 -EINVAL;
>
> should be all that is needed.

Yeh, I suppose that makes sense.

Thanks,
Amir.
Matthew Wilcox (Oracle) Oct. 15, 2018, 1:18 p.m. UTC | #5
On Sun, Oct 14, 2018 at 10:19:27AM -0700, Christoph Hellwig wrote:
> >  	unsigned (*mmap_capabilities)(struct file *);
> >  #endif
> >  	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
> > -	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
> > -	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
> > +	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
> > +				struct file *file_out, loff_t pos_out,
> > +				u64 len, unsigned int remap_flags);
> 
> None of the other methods in this file name their parameters.  While
> I generally don't like people leaving them out, in the end consistency
> is even more important.

I would agree with you *except* that the parameters do not follow memcpy()
traditional order (dst, src, len).  Instead they are (src, dst, len), so we
should probably name them to advise the poor sod who has to implement this
that we've chosen an inconsistent API.

Or we could fix it.
Darrick J. Wong Oct. 15, 2018, 4:42 p.m. UTC | #6
On Sun, Oct 14, 2018 at 10:19:27AM -0700, Christoph Hellwig wrote:
> >  	unsigned (*mmap_capabilities)(struct file *);
> >  #endif
> >  	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
> > -	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
> > -	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
> > +	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
> > +				struct file *file_out, loff_t pos_out,
> > +				u64 len, unsigned int remap_flags);
> 
> None of the other methods in this file name their parameters.  While
> I generally don't like people leaving them out, in the end consistency
> is even more important.
> 
> > +int btrfs_remap_file_range(struct file *src_file, loff_t off,
> > +		struct file *dst_file, loff_t destoff, u64 len,
> > +		unsigned int remap_flags)
> >  {
> > +	if (!remap_check_flags(remap_flags, RFR_SAME_DATA))
> > +		return -EINVAL;
> > +
> > +	if (remap_flags & RFR_SAME_DATA) {
> 
> So at least for btrfs there seems to be no shared code at all below
> the function calls.  This kinda speaks against the argument that
> they fundamentally are the same..

They /do/ share/ code -- eventually both btrfs_extent_same and
btrfs_clone_files call btrfs_clone.  xfs and ocfs2 call the same paths
internally too; it's only the vfs helpers that have the extra page cache
comparisons if it's a dedup operation.

> > +/*
> > + * These flags control the behavior of the remap_file_range function pointer.
> > + *
> > + * RFR_SAME_DATA: only remap if contents identical (i.e. deduplicate)
> > + */
> > +#define RFR_SAME_DATA		(1 << 0)
> > +
> > +#define RFR_VALID_FLAGS		(RFR_SAME_DATA)
> 
> RFR?  Why not REMAP_FILE_*  Also why not the well understood
> REMAP_FILE_DEDUP instead of the odd SAME_DATA?

Sure.  I had begin to dislike typing RFR anyway.

> > +
> > +/*
> > + * Filesystem remapping implementations should call this helper on their
> > + * remap flags to filter out flags that the implementation doesn't support.
> > + *
> > + * Returns true if the flags are ok, false otherwise.
> > + */
> > +static inline bool remap_check_flags(unsigned int remap_flags,
> > +				     unsigned int supported_flags)
> > +{
> > +	return (remap_flags & ~(supported_flags & RFR_VALID_FLAGS)) == 0;
> > +}
> 
> Any reason to even bother with a helper for this?  ->fallocate
> seems to be doing fine without the helper, and the resulting code
> seems a lot easier to understand to me.

(Will respond to these at the current end of the flags thread.)

> > @@ -1759,10 +1779,9 @@ struct file_operations {
> >  #endif
> >  	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
> >  			loff_t, size_t, unsigned int);
> > -	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
> > -			u64);
> > -	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
> > -			u64);
> > +	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
> > +				struct file *file_out, loff_t pos_out,
> > +				u64 len, unsigned int remap_flags);
> 
> Same comment here.  Didn't we have some nice doc tools to avoid this
> duplication? :)

We do, but vfs.txt hasn't been ported to any of that.

--D
Darrick J. Wong Oct. 15, 2018, 5:13 p.m. UTC | #7
On Mon, Oct 15, 2018 at 05:47:19AM -0700, Christoph Hellwig wrote:
> On Mon, Oct 15, 2018 at 09:04:13AM +0300, Amir Goldstein wrote:
> > I supposed you figured out the reason already.
> 
> No, I hadn't.
> 
> > It makes it appearance in patch 16/25 as RFR_VFS_FLAGS.
> > All those "advisory" flags, we want to pass them in to filesystem as FYI,
> > but we don't want to explicitly add support for e.g. RFR_CAN_SHORTEN
> > to every filesystem, when vfs has already taken care of the advice.
> 
> I don't think this model makes sense.  If they really are purely
> handled in the VFS we can mask them before passing them to the file
> system, if not we need to check them, or the they are avisory and
> we can have a simple #define instead of the helper.
> 
> RFR_TO_SRC_EOF is checked in generic_remap_file_range_prep,
> so the file system should know about it  Also looking at it again now
> it seems entirely superflous - we can just pass down then len == we
> use in higher level code instead of having a flag and will side step
> the issue here.

I'm not a fan of hidden behaviors like that, particularly when we
already have a flags field where callers can explicitly ask for the
to-eof behavior.

> RFR_CAN_SHORTEN is advisory as no one has to shorten, but that can
> easily be solved by including it everywhere.

CAN_SHORTEN isn't included everywhere -- FICLONE{,RANGE} don't enable it
because they have no way to communicate the number of bytes cloned back
to userspace.  Either we can clone every byte the user asked for, or we
send back -EINVAL.  (Maybe I'm misinterpreting what you meant by 'solved
by including it everywhere'?)

> RFR_SHORT_DEDUPE is as far as I can tell entirely superflous to
> start with, as RFR_CAN_SHORTEN can be used instead.

For now it's superfluous.  At first I was thinking that we could return
a short bytes_deduped if, say, the first part of the range actually did
match, but it became pretty obvious via shared/010 that duperemove can't
handle that, so we really must stick to the existing btrfs behavior.

The existing btrfs behavior is that we can round the length down to
avoid deduping partial EOF blocks, but we return the original length
(i.e. lie) in bytes_deduped when we do that.

I sort of thought about introducing a new copy_file_range flag that
would just do deduplication and allow for opportunistic "dedup as much
as you can" but ... meh.  Maybe I'll just drop the patch instead; we can
revisit that when anyone wants a better dedupe interface.

> So something like this in fs.h:
> 
> #define REMAP_FILE_ADVISORY_FLAGS	REMAP_FILE_CAN_SHORTEN
> 
> And then in the file system:
> 
> 	if (flags & ~REMAP_FILE_ADVISORY_FLAGS)
> 		-EINVAL;
> 
> or
> 
> 	if (flags & ~(REMAP_FILE_ADVISORY_FLAGS | REMAP_FILE_DEDUP))
> 		-EINVAL;
> 
> should be all that is needed.

Sounds good to me.

--D
Christoph Hellwig Oct. 15, 2018, 6:32 p.m. UTC | #8
On Mon, Oct 15, 2018 at 10:13:17AM -0700, Darrick J. Wong wrote:
> > RFR_TO_SRC_EOF is checked in generic_remap_file_range_prep,
> > so the file system should know about it  Also looking at it again now
> > it seems entirely superflous - we can just pass down then len == we
> > use in higher level code instead of having a flag and will side step
> > the issue here.
> 
> I'm not a fan of hidden behaviors like that, particularly when we
> already have a flags field where callers can explicitly ask for the
> to-eof behavior.

This just means we have a flag to mean ken is 0 and needs to be filled,
rather than encoding that in the field itself.  If you fell better we
can replace 0 with 0xffffffff and still encode it in the field.

> > RFR_CAN_SHORTEN is advisory as no one has to shorten, but that can
> > easily be solved by including it everywhere.
> 
> CAN_SHORTEN isn't included everywhere

Include it everywhere as in allow it in ever ->remap_file instance.

> I sort of thought about introducing a new copy_file_range flag that
> would just do deduplication and allow for opportunistic "dedup as much
> as you can" but ... meh.  Maybe I'll just drop the patch instead; we can
> revisit that when anyone wants a better dedupe interface.

Sounds fine to me.  The btrfs ioctl is really ugly, but then again
there is no pressing need for something better.
diff mbox series

Patch

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index a6c6a8af48a2..2ec27203e4a6 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -883,8 +883,9 @@  struct file_operations {
 	unsigned (*mmap_capabilities)(struct file *);
 #endif
 	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
-	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
-	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
+	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				u64 len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
 };
 
@@ -960,11 +961,8 @@  otherwise noted.
 
   copy_file_range: called by the copy_file_range(2) system call.
 
-  clone_file_range: called by the ioctl(2) system call for FICLONERANGE and
-	FICLONE commands.
-
-  dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE
-	command.
+  remap_file_range: called by the ioctl(2) system call for FICLONERANGE and
+	FICLONE and FIDEDUPERANGE commands to remap file ranges.
 
   fadvise: possibly called by the fadvise64() system call.
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2cddfe7806a4..124a05662fc2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3218,9 +3218,6 @@  void btrfs_get_block_group_info(struct list_head *groups_list,
 				struct btrfs_ioctl_space_info *space);
 void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
 			       struct btrfs_ioctl_balance_args *bargs);
-int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
-			    struct file *dst_file, loff_t dst_loff,
-			    u64 olen);
 
 /* file.c */
 int __init btrfs_auto_defrag_init(void);
@@ -3250,8 +3247,9 @@  int btrfs_dirty_pages(struct inode *inode, struct page **pages,
 		      size_t num_pages, loff_t pos, size_t write_bytes,
 		      struct extent_state **cached);
 int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
-int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
-			   struct file *file_out, loff_t pos_out, u64 len);
+int btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
+			   struct file *file_out, loff_t pos_out, u64 len,
+			   unsigned int remap_flags);
 
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 2be00e873e92..9a963f061393 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3269,8 +3269,7 @@  const struct file_operations btrfs_file_operations = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= btrfs_compat_ioctl,
 #endif
-	.clone_file_range = btrfs_clone_file_range,
-	.dedupe_file_range = btrfs_dedupe_file_range,
+	.remap_file_range = btrfs_remap_file_range,
 };
 
 void __cold btrfs_auto_defrag_exit(void)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d60b6caf09e8..bed5b8f9ec09 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3627,26 +3627,6 @@  static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
 	return ret;
 }
 
-int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
-			    struct file *dst_file, loff_t dst_loff,
-			    u64 olen)
-{
-	struct inode *src = file_inode(src_file);
-	struct inode *dst = file_inode(dst_file);
-	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
-
-	if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
-		/*
-		 * Btrfs does not support blocksize < page_size. As a
-		 * result, btrfs_cmp_data() won't correctly handle
-		 * this situation without an update.
-		 */
-		return -EINVAL;
-	}
-
-	return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
-}
-
 static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
 				     struct inode *inode,
 				     u64 endoff,
@@ -4348,9 +4328,30 @@  static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
 	return ret;
 }
 
-int btrfs_clone_file_range(struct file *src_file, loff_t off,
-		struct file *dst_file, loff_t destoff, u64 len)
+int btrfs_remap_file_range(struct file *src_file, loff_t off,
+		struct file *dst_file, loff_t destoff, u64 len,
+		unsigned int remap_flags)
 {
+	if (!remap_check_flags(remap_flags, RFR_SAME_DATA))
+		return -EINVAL;
+
+	if (remap_flags & RFR_SAME_DATA) {
+		struct inode *src = file_inode(src_file);
+		struct inode *dst = file_inode(dst_file);
+		u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
+
+		if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
+			/*
+			 * Btrfs does not support blocksize < page_size. As a
+			 * result, btrfs_cmp_data() won't correctly handle
+			 * this situation without an update.
+			 */
+			return -EINVAL;
+		}
+
+		return btrfs_extent_same(src, off, len, dst, destoff);
+	}
+
 	return btrfs_clone_files(dst_file, src_file, off, len, destoff);
 }
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 7065426b3280..06b2587fcc77 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -975,8 +975,9 @@  const struct inode_operations cifs_symlink_inode_ops = {
 	.listxattr = cifs_listxattr,
 };
 
-static int cifs_clone_file_range(struct file *src_file, loff_t off,
-		struct file *dst_file, loff_t destoff, u64 len)
+static int cifs_remap_file_range(struct file *src_file, loff_t off,
+		struct file *dst_file, loff_t destoff, u64 len,
+		unsigned int remap_flags)
 {
 	struct inode *src_inode = file_inode(src_file);
 	struct inode *target_inode = file_inode(dst_file);
@@ -986,6 +987,9 @@  static int cifs_clone_file_range(struct file *src_file, loff_t off,
 	unsigned int xid;
 	int rc;
 
+	if (!remap_check_flags(remap_flags, 0))
+		return -EINVAL;
+
 	cifs_dbg(FYI, "clone range\n");
 
 	xid = get_xid();
@@ -1134,7 +1138,7 @@  const struct file_operations cifs_file_ops = {
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
@@ -1153,7 +1157,7 @@  const struct file_operations cifs_file_strict_ops = {
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
@@ -1172,7 +1176,7 @@  const struct file_operations cifs_file_direct_ops = {
 	.splice_write = iter_file_splice_write,
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.llseek = cifs_llseek,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1191,7 +1195,7 @@  const struct file_operations cifs_file_nobrl_ops = {
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
@@ -1209,7 +1213,7 @@  const struct file_operations cifs_file_strict_nobrl_ops = {
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
@@ -1227,7 +1231,7 @@  const struct file_operations cifs_file_direct_nobrl_ops = {
 	.splice_write = iter_file_splice_write,
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.llseek = cifs_llseek,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1239,7 +1243,7 @@  const struct file_operations cifs_dir_ops = {
 	.read    = generic_read_dir,
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.llseek = generic_file_llseek,
 	.fsync = cifs_dir_fsync,
 };
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 4288a6ecaf75..2452b1941f36 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -180,8 +180,9 @@  static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
 	return nfs42_proc_allocate(filep, offset, len);
 }
 
-static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
-		struct file *dst_file, loff_t dst_off, u64 count)
+static int nfs42_remap_file_range(struct file *src_file, loff_t src_off,
+		struct file *dst_file, loff_t dst_off, u64 count,
+		unsigned int remap_flags)
 {
 	struct inode *dst_inode = file_inode(dst_file);
 	struct nfs_server *server = NFS_SERVER(dst_inode);
@@ -190,6 +191,9 @@  static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
 	bool same_inode = false;
 	int ret;
 
+	if (!remap_check_flags(remap_flags, 0))
+		return -EINVAL;
+
 	/* check alignment w.r.t. clone_blksize */
 	ret = -EINVAL;
 	if (bs) {
@@ -262,7 +266,7 @@  const struct file_operations nfs4_file_operations = {
 	.copy_file_range = nfs4_copy_file_range,
 	.llseek		= nfs4_file_llseek,
 	.fallocate	= nfs42_fallocate,
-	.clone_file_range = nfs42_clone_file_range,
+	.remap_file_range = nfs42_remap_file_range,
 #else
 	.llseek		= nfs_file_llseek,
 #endif
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9fa35cb6f6e0..852cdfaadd89 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2527,24 +2527,18 @@  static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
 	return offset;
 }
 
-static int ocfs2_file_clone_range(struct file *file_in,
+static int ocfs2_remap_file_range(struct file *file_in,
 				  loff_t pos_in,
 				  struct file *file_out,
 				  loff_t pos_out,
-				  u64 len)
+				  u64 len,
+				  unsigned int remap_flags)
 {
-	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-					 len, false);
-}
+	if (!remap_check_flags(remap_flags, RFR_SAME_DATA))
+		return -EINVAL;
 
-static int ocfs2_file_dedupe_range(struct file *file_in,
-				   loff_t pos_in,
-				   struct file *file_out,
-				   loff_t pos_out,
-				   u64 len)
-{
 	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-					  len, true);
+					 len, remap_flags & RFR_SAME_DATA);
 }
 
 const struct inode_operations ocfs2_file_iops = {
@@ -2586,8 +2580,7 @@  const struct file_operations ocfs2_fops = {
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.fallocate	= ocfs2_fallocate,
-	.clone_file_range = ocfs2_file_clone_range,
-	.dedupe_file_range = ocfs2_file_dedupe_range,
+	.remap_file_range = ocfs2_remap_file_range,
 };
 
 const struct file_operations ocfs2_dops = {
@@ -2633,8 +2626,7 @@  const struct file_operations ocfs2_fops_no_plocks = {
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.fallocate	= ocfs2_fallocate,
-	.clone_file_range = ocfs2_file_clone_range,
-	.dedupe_file_range = ocfs2_file_dedupe_range,
+	.remap_file_range = ocfs2_remap_file_range,
 };
 
 const struct file_operations ocfs2_dops_no_plocks = {
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 986313da0c88..455bf49bd07b 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -489,26 +489,31 @@  static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
 			    OVL_COPY);
 }
 
-static int ovl_clone_file_range(struct file *file_in, loff_t pos_in,
-				struct file *file_out, loff_t pos_out, u64 len)
+static int ovl_remap_file_range(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				u64 len, unsigned int remap_flags)
 {
-	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
-			    OVL_CLONE);
-}
+	enum ovl_copyop op;
+
+	if (!remap_check_flags(remap_flags, RFR_SAME_DATA))
+		return -EINVAL;
+
+	if (remap_flags & RFR_SAME_DATA)
+		op = OVL_DEDUPE;
+	else
+		op = OVL_CLONE;
 
-static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
-				 struct file *file_out, loff_t pos_out, u64 len)
-{
 	/*
 	 * Don't copy up because of a dedupe request, this wouldn't make sense
 	 * most of the time (data would be duplicated instead of deduplicated).
 	 */
-	if (!ovl_inode_upper(file_inode(file_in)) ||
-	    !ovl_inode_upper(file_inode(file_out)))
+	if (op == OVL_DEDUPE &&
+	    (!ovl_inode_upper(file_inode(file_in)) ||
+	     !ovl_inode_upper(file_inode(file_out))))
 		return -EPERM;
 
 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
-			    OVL_DEDUPE);
+			    op);
 }
 
 const struct file_operations ovl_file_operations = {
@@ -525,6 +530,5 @@  const struct file_operations ovl_file_operations = {
 	.compat_ioctl	= ovl_compat_ioctl,
 
 	.copy_file_range	= ovl_copy_file_range,
-	.clone_file_range	= ovl_clone_file_range,
-	.dedupe_file_range	= ovl_dedupe_file_range,
+	.remap_file_range	= ovl_remap_file_range,
 };
diff --git a/fs/read_write.c b/fs/read_write.c
index 2d84d18dc095..fd3fe05060a4 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1588,9 +1588,9 @@  ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	 * Try cloning first, this is supported by more file systems, and
 	 * more efficient if both clone and copy are supported (e.g. NFS).
 	 */
-	if (file_in->f_op->clone_file_range) {
-		ret = file_in->f_op->clone_file_range(file_in, pos_in,
-				file_out, pos_out, len);
+	if (file_in->f_op->remap_file_range) {
+		ret = file_in->f_op->remap_file_range(file_in, pos_in,
+				file_out, pos_out, len, 0);
 		if (ret == 0) {
 			ret = len;
 			goto done;
@@ -1831,7 +1831,7 @@  int do_clone_file_range(struct file *file_in, loff_t pos_in,
 	    (file_out->f_flags & O_APPEND))
 		return -EBADF;
 
-	if (!file_in->f_op->clone_file_range)
+	if (!file_in->f_op->remap_file_range)
 		return -EOPNOTSUPP;
 
 	ret = clone_verify_area(file_in, pos_in, len, false);
@@ -1842,8 +1842,8 @@  int do_clone_file_range(struct file *file_in, loff_t pos_in,
 	if (ret)
 		return ret;
 
-	ret = file_in->f_op->clone_file_range(file_in, pos_in,
-			file_out, pos_out, len);
+	ret = file_in->f_op->remap_file_range(file_in, pos_in,
+			file_out, pos_out, len, 0);
 	if (!ret) {
 		fsnotify_access(file_in);
 		fsnotify_modify(file_out);
@@ -1988,7 +1988,7 @@  int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 		goto out_drop_write;
 
 	ret = -EINVAL;
-	if (!dst_file->f_op->dedupe_file_range)
+	if (!dst_file->f_op->remap_file_range)
 		goto out_drop_write;
 
 	if (len == 0) {
@@ -1996,8 +1996,8 @@  int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 		goto out_drop_write;
 	}
 
-	ret = dst_file->f_op->dedupe_file_range(src_file, src_pos,
-						dst_file, dst_pos, len);
+	ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
+			dst_pos, len, RFR_SAME_DATA);
 out_drop_write:
 	mnt_drop_write_file(dst_file);
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 61a5ad2600e8..7cce438f856a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -920,27 +920,19 @@  xfs_file_fallocate(
 }
 
 STATIC int
-xfs_file_clone_range(
+xfs_file_remap_range(
 	struct file	*file_in,
 	loff_t		pos_in,
 	struct file	*file_out,
 	loff_t		pos_out,
-	u64		len)
+	u64		len,
+	unsigned int	remap_flags)
 {
-	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-				     len, false);
-}
+	if (!remap_check_flags(remap_flags, RFR_SAME_DATA))
+		return -EINVAL;
 
-STATIC int
-xfs_file_dedupe_range(
-	struct file	*file_in,
-	loff_t		pos_in,
-	struct file	*file_out,
-	loff_t		pos_out,
-	u64		len)
-{
 	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-				     len, true);
+			len, remap_flags & RFR_SAME_DATA);
 }
 
 STATIC int
@@ -1175,8 +1167,7 @@  const struct file_operations xfs_file_operations = {
 	.fsync		= xfs_file_fsync,
 	.get_unmapped_area = thp_get_unmapped_area,
 	.fallocate	= xfs_file_fallocate,
-	.clone_file_range = xfs_file_clone_range,
-	.dedupe_file_range = xfs_file_dedupe_range,
+	.remap_file_range = xfs_file_remap_range,
 };
 
 const struct file_operations xfs_dir_file_operations = {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ba93a6e7dac4..11fe36576d34 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1721,6 +1721,26 @@  struct block_device_operations;
 #define NOMMU_VMFLAGS \
 	(NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC)
 
+/*
+ * These flags control the behavior of the remap_file_range function pointer.
+ *
+ * RFR_SAME_DATA: only remap if contents identical (i.e. deduplicate)
+ */
+#define RFR_SAME_DATA		(1 << 0)
+
+#define RFR_VALID_FLAGS		(RFR_SAME_DATA)
+
+/*
+ * Filesystem remapping implementations should call this helper on their
+ * remap flags to filter out flags that the implementation doesn't support.
+ *
+ * Returns true if the flags are ok, false otherwise.
+ */
+static inline bool remap_check_flags(unsigned int remap_flags,
+				     unsigned int supported_flags)
+{
+	return (remap_flags & ~(supported_flags & RFR_VALID_FLAGS)) == 0;
+}
 
 struct iov_iter;
 
@@ -1759,10 +1779,9 @@  struct file_operations {
 #endif
 	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
 			loff_t, size_t, unsigned int);
-	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
-			u64);
-	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
-			u64);
+	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				u64 len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
 } __randomize_layout;