diff mbox series

[v3] vfs: fix copy_file_range regression in cross-fs copies

Message ID 20210217172654.22519-1-lhenriques@suse.de (mailing list archive)
State New
Headers show
Series [v3] vfs: fix copy_file_range regression in cross-fs copies | expand

Commit Message

Luis Henriques Feb. 17, 2021, 5:26 p.m. UTC
A regression has been reported by Nicolas Boichat, found while using the
copy_file_range syscall to copy a tracefs file.  Before commit
5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") the
kernel would return -EXDEV to userspace when trying to copy a file across
different filesystems.  After this commit, the syscall doesn't fail anymore
and instead returns zero (zero bytes copied), as this file's content is
generated on-the-fly and thus reports a size of zero.

This patch restores some cross-filesystems copy restrictions that existed
prior to commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across
devices").  It also introduces a flag (COPY_FILE_SPLICE) that can be used
by filesystems calling directly into the vfs copy_file_range to override
these restrictions.  Right now, only NFS needs to set this flag.

Fixes: 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices")
Link: https://lore.kernel.org/linux-fsdevel/20210212044405.4120619-1-drinkcat@chromium.org/
Link: https://lore.kernel.org/linux-fsdevel/CANMq1KDZuxir2LM5jOTm0xx+BnvW=ZmpsG47CyHFJwnw7zSX6Q@mail.gmail.com/
Link: https://lore.kernel.org/linux-fsdevel/20210126135012.1.If45b7cdc3ff707bc1efa17f5366057d60603c45f@changeid/
Reported-by: Nicolas Boichat <drinkcat@chromium.org>
Signed-off-by: Luis Henriques <lhenriques@suse.de>
---
Ok, I've tried to address all the issues and comments.  Hopefully this v3
is a bit closer to the final fix.

Changes since v2
- do all the required checks earlier, in generic_copy_file_checks(),
  adding new checks for ->remap_file_range
- new COPY_FILE_SPLICE flag
- don't remove filesystem's fallback to generic_copy_file_range()
- updated commit changelog (and subject)
Changes since v1 (after Amir review)
- restored do_copy_file_range() helper
- return -EOPNOTSUPP if fs doesn't implement CFR
- updated commit description

 fs/nfsd/vfs.c      |  3 ++-
 fs/read_write.c    | 44 +++++++++++++++++++++++++++++++++++++++++---
 include/linux/fs.h |  7 +++++++
 3 files changed, 50 insertions(+), 4 deletions(-)

Comments

Amir Goldstein Feb. 17, 2021, 8:47 p.m. UTC | #1
On Wed, Feb 17, 2021 at 7:25 PM Luis Henriques <lhenriques@suse.de> wrote:
>
> A regression has been reported by Nicolas Boichat, found while using the
> copy_file_range syscall to copy a tracefs file.  Before commit
> 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") the
> kernel would return -EXDEV to userspace when trying to copy a file across
> different filesystems.  After this commit, the syscall doesn't fail anymore
> and instead returns zero (zero bytes copied), as this file's content is
> generated on-the-fly and thus reports a size of zero.
>
> This patch restores some cross-filesystems copy restrictions that existed
> prior to commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across
> devices").  It also introduces a flag (COPY_FILE_SPLICE) that can be used
> by filesystems calling directly into the vfs copy_file_range to override
> these restrictions.  Right now, only NFS needs to set this flag.
>
> Fixes: 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices")
> Link: https://lore.kernel.org/linux-fsdevel/20210212044405.4120619-1-drinkcat@chromium.org/
> Link: https://lore.kernel.org/linux-fsdevel/CANMq1KDZuxir2LM5jOTm0xx+BnvW=ZmpsG47CyHFJwnw7zSX6Q@mail.gmail.com/
> Link: https://lore.kernel.org/linux-fsdevel/20210126135012.1.If45b7cdc3ff707bc1efa17f5366057d60603c45f@changeid/
> Reported-by: Nicolas Boichat <drinkcat@chromium.org>
> Signed-off-by: Luis Henriques <lhenriques@suse.de>
> ---
> Ok, I've tried to address all the issues and comments.  Hopefully this v3
> is a bit closer to the final fix.
>
> Changes since v2
> - do all the required checks earlier, in generic_copy_file_checks(),
>   adding new checks for ->remap_file_range
> - new COPY_FILE_SPLICE flag
> - don't remove filesystem's fallback to generic_copy_file_range()
> - updated commit changelog (and subject)
> Changes since v1 (after Amir review)
> - restored do_copy_file_range() helper
> - return -EOPNOTSUPP if fs doesn't implement CFR
> - updated commit description
>
>  fs/nfsd/vfs.c      |  3 ++-
>  fs/read_write.c    | 44 +++++++++++++++++++++++++++++++++++++++++---
>  include/linux/fs.h |  7 +++++++
>  3 files changed, 50 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 04937e51de56..14e55822c223 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -578,7 +578,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
>          * limit like this and pipeline multiple COPY requests.
>          */
>         count = min_t(u64, count, 1 << 22);
> -       return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
> +       return vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
> +                                  COPY_FILE_SPLICE);
>  }
>
>  __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 75f764b43418..40a16003fb05 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1410,6 +1410,33 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
>                                        flags);
>  }
>
> +/*
> + * This helper function checks whether copy_file_range can actually be used,
> + * depending on the source and destination filesystems being the same.
> + *
> + * In-kernel callers may set COPY_FILE_SPLICE to override these checks.
> + */
> +static int fops_copy_file_checks(struct file *file_in, struct file *file_out,
> +                                unsigned int flags)
> +{
> +       if (WARN_ON_ONCE(flags & ~COPY_FILE_SPLICE))
> +               return -EINVAL;
> +
> +       if (flags & COPY_FILE_SPLICE)
> +               return 0;
> +       /*
> +        * We got here from userspace, so forbid copies if copy_file_range isn't
> +        * implemented or if we're doing a cross-fs copy.
> +        */

Suggest:

       if (!file_in->f_op->copy_file_range) {
               if (file_in->f_op->copy_file_range !=
                   file_out->f_op->copy_file_range)
                   return -EXDEV;
       } else if (file_in->f_op->remap_file_range) {
               if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
                    return -EXDEV;
       } else {
                return -EOPNOTSUPP;
       }

       return 0;
}

> +
>  /*
>   * Performs necessary checks before doing a file copy
>   *
> @@ -1427,6 +1454,14 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
>         loff_t size_in;
>         int ret;
>
> +       /* Only check f_ops if we're not trying to clone */
> +       if (!file_in->f_op->remap_file_range ||
> +           (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb)) {
> +               ret = fops_copy_file_checks(file_in, file_out, flags);
> +               if (ret)
> +                       return ret;
> +       }
> +

and then you don't need this special casing of clone here.

>         ret = generic_file_rw_checks(file_in, file_out);
>         if (ret)
>                 return ret;
> @@ -1474,9 +1509,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>  {
>         ssize_t ret;
>
> -       if (flags != 0)
> -               return -EINVAL;
> -
>         ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
>                                        flags);
>         if (unlikely(ret))
> @@ -1511,6 +1543,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>                         ret = cloned;
>                         goto done;
>                 }
> +               ret = fops_copy_file_checks(file_in, file_out, flags);
> +               if (ret)
> +                       return ret;

and you don't need this here (right?)

and you can remove the checks for same i_sb and same copy_file_range
op that were already tested from vfs_copy_file_range().

Hope I am not missing anything.

Thanks,
Amir.
Nicolas Boichat Feb. 18, 2021, 12:56 a.m. UTC | #2
On Thu, Feb 18, 2021 at 1:25 AM Luis Henriques <lhenriques@suse.de> wrote:
>
> A regression has been reported by Nicolas Boichat, found while using the
> copy_file_range syscall to copy a tracefs file.  Before commit
> 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") the
> kernel would return -EXDEV to userspace when trying to copy a file across
> different filesystems.  After this commit, the syscall doesn't fail anymore
> and instead returns zero (zero bytes copied), as this file's content is
> generated on-the-fly and thus reports a size of zero.
>
> This patch restores some cross-filesystems copy restrictions that existed
> prior to commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across
> devices").

Note that you also fix intra-filesystem copy_file_range on these
generated filesystems. This is IMHO great, but needs to be mentioned
in the commit message.

>  It also introduces a flag (COPY_FILE_SPLICE) that can be used
> by filesystems calling directly into the vfs copy_file_range to override
> these restrictions.  Right now, only NFS needs to set this flag.
>
> Fixes: 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices")

So technically this fixes something much older, presumably ever since
copy_file_range was introduced.

> Link: https://lore.kernel.org/linux-fsdevel/20210212044405.4120619-1-drinkcat@chromium.org/
> Link: https://lore.kernel.org/linux-fsdevel/CANMq1KDZuxir2LM5jOTm0xx+BnvW=ZmpsG47CyHFJwnw7zSX6Q@mail.gmail.com/
> Link: https://lore.kernel.org/linux-fsdevel/20210126135012.1.If45b7cdc3ff707bc1efa17f5366057d60603c45f@changeid/
> Reported-by: Nicolas Boichat <drinkcat@chromium.org>

Tested-by: Nicolas Boichat <drinkcat@chromium.org>
but I guess you should not add to the next revision, I'll keep testing
further revisions ,-)

> Signed-off-by: Luis Henriques <lhenriques@suse.de>
> ---
> Ok, I've tried to address all the issues and comments.  Hopefully this v3
> is a bit closer to the final fix.
>
> Changes since v2
> - do all the required checks earlier, in generic_copy_file_checks(),
>   adding new checks for ->remap_file_range
> - new COPY_FILE_SPLICE flag
> - don't remove filesystem's fallback to generic_copy_file_range()
> - updated commit changelog (and subject)
> Changes since v1 (after Amir review)
> - restored do_copy_file_range() helper
> - return -EOPNOTSUPP if fs doesn't implement CFR
> - updated commit description
>
>  fs/nfsd/vfs.c      |  3 ++-
>  fs/read_write.c    | 44 +++++++++++++++++++++++++++++++++++++++++---
>  include/linux/fs.h |  7 +++++++
>  3 files changed, 50 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 04937e51de56..14e55822c223 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -578,7 +578,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
>          * limit like this and pipeline multiple COPY requests.
>          */
>         count = min_t(u64, count, 1 << 22);
> -       return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
> +       return vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
> +                                  COPY_FILE_SPLICE);
>  }
>
>  __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 75f764b43418..40a16003fb05 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1410,6 +1410,33 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
>                                        flags);
>  }
>
> +/*
> + * This helper function checks whether copy_file_range can actually be used,
> + * depending on the source and destination filesystems being the same.
> + *
> + * In-kernel callers may set COPY_FILE_SPLICE to override these checks.
> + */
> +static int fops_copy_file_checks(struct file *file_in, struct file *file_out,

fops_copy_file_range_checks ?

> +                                unsigned int flags)
> +{
> +       if (WARN_ON_ONCE(flags & ~COPY_FILE_SPLICE))
> +               return -EINVAL;
> +
> +       if (flags & COPY_FILE_SPLICE)
> +               return 0;
> +       /*
> +        * We got here from userspace, so forbid copies if copy_file_range isn't
> +        * implemented or if we're doing a cross-fs copy.
> +        */
> +       if (!file_out->f_op->copy_file_range)
> +               return -EOPNOTSUPP;

After this is merged, should this be added as an error code to the man page?

> +       else if (file_out->f_op->copy_file_range !=
> +                file_in->f_op->copy_file_range)

Just note, this could be a cross-fs copy (just not a cross-fs_type copy).

> +               return -EXDEV;
> +
> +       return 0;
> +}
> +
>  /*
>   * Performs necessary checks before doing a file copy
>   *
> @@ -1427,6 +1454,14 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
>         loff_t size_in;
>         int ret;
>
> +       /* Only check f_ops if we're not trying to clone */
> +       if (!file_in->f_op->remap_file_range ||
> +           (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb)) {
> +               ret = fops_copy_file_checks(file_in, file_out, flags);
> +               if (ret)
> +                       return ret;
> +       }
> +
>         ret = generic_file_rw_checks(file_in, file_out);
>         if (ret)
>                 return ret;
> @@ -1474,9 +1509,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>  {
>         ssize_t ret;
>
> -       if (flags != 0)
> -               return -EINVAL;
> -
>         ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
>                                        flags);
>         if (unlikely(ret))
> @@ -1511,6 +1543,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>                         ret = cloned;
>                         goto done;
>                 }
> +               ret = fops_copy_file_checks(file_in, file_out, flags);
> +               if (ret)
> +                       return ret;
>         }
>
>         ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
> @@ -1543,6 +1578,9 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
>         struct fd f_out;
>         ssize_t ret = -EBADF;
>
> +       if (flags != 0)
> +               return -EINVAL;
> +
>         f_in = fdget(fd_in);
>         if (!f_in.file)
>                 goto out2;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index fd47deea7c17..6f604926d955 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1815,6 +1815,13 @@ struct dir_context {
>   */
>  #define REMAP_FILE_ADVISORY            (REMAP_FILE_CAN_SHORTEN)
>
> +/*
> + * This flag control the behavior of copy_file_range from internal (kernel)
> + * users.  It can be used to override the policy of forbidding copies when
> + * source and destination filesystems are different.
> + */
> +#define COPY_FILE_SPLICE               (1 << 0)

nit: BIT(0) ?


> +
>  struct iov_iter;
>
>  struct file_operations {
Olga Kornievskaia Feb. 18, 2021, 5:32 a.m. UTC | #3
On Wed, Feb 17, 2021 at 3:30 PM Luis Henriques <lhenriques@suse.de> wrote:
>
> A regression has been reported by Nicolas Boichat, found while using the
> copy_file_range syscall to copy a tracefs file.  Before commit
> 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") the
> kernel would return -EXDEV to userspace when trying to copy a file across
> different filesystems.  After this commit, the syscall doesn't fail anymore
> and instead returns zero (zero bytes copied), as this file's content is
> generated on-the-fly and thus reports a size of zero.
>
> This patch restores some cross-filesystems copy restrictions that existed
> prior to commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across
> devices").  It also introduces a flag (COPY_FILE_SPLICE) that can be used
> by filesystems calling directly into the vfs copy_file_range to override
> these restrictions.  Right now, only NFS needs to set this flag.
>
> Fixes: 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices")
> Link: https://lore.kernel.org/linux-fsdevel/20210212044405.4120619-1-drinkcat@chromium.org/
> Link: https://lore.kernel.org/linux-fsdevel/CANMq1KDZuxir2LM5jOTm0xx+BnvW=ZmpsG47CyHFJwnw7zSX6Q@mail.gmail.com/
> Link: https://lore.kernel.org/linux-fsdevel/20210126135012.1.If45b7cdc3ff707bc1efa17f5366057d60603c45f@changeid/
> Reported-by: Nicolas Boichat <drinkcat@chromium.org>
> Signed-off-by: Luis Henriques <lhenriques@suse.de>
> ---
> Ok, I've tried to address all the issues and comments.  Hopefully this v3
> is a bit closer to the final fix.
>
> Changes since v2
> - do all the required checks earlier, in generic_copy_file_checks(),
>   adding new checks for ->remap_file_range
> - new COPY_FILE_SPLICE flag
> - don't remove filesystem's fallback to generic_copy_file_range()
> - updated commit changelog (and subject)
> Changes since v1 (after Amir review)
> - restored do_copy_file_range() helper
> - return -EOPNOTSUPP if fs doesn't implement CFR
> - updated commit description

In my testing, this patch breaks NFS server-to-server copy file.

>
>  fs/nfsd/vfs.c      |  3 ++-
>  fs/read_write.c    | 44 +++++++++++++++++++++++++++++++++++++++++---
>  include/linux/fs.h |  7 +++++++
>  3 files changed, 50 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 04937e51de56..14e55822c223 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -578,7 +578,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
>          * limit like this and pipeline multiple COPY requests.
>          */
>         count = min_t(u64, count, 1 << 22);
> -       return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
> +       return vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
> +                                  COPY_FILE_SPLICE);
>  }
>
>  __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 75f764b43418..40a16003fb05 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1410,6 +1410,33 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
>                                        flags);
>  }
>
> +/*
> + * This helper function checks whether copy_file_range can actually be used,
> + * depending on the source and destination filesystems being the same.
> + *
> + * In-kernel callers may set COPY_FILE_SPLICE to override these checks.
> + */
> +static int fops_copy_file_checks(struct file *file_in, struct file *file_out,
> +                                unsigned int flags)
> +{
> +       if (WARN_ON_ONCE(flags & ~COPY_FILE_SPLICE))
> +               return -EINVAL;
> +
> +       if (flags & COPY_FILE_SPLICE)
> +               return 0;
> +       /*
> +        * We got here from userspace, so forbid copies if copy_file_range isn't
> +        * implemented or if we're doing a cross-fs copy.
> +        */
> +       if (!file_out->f_op->copy_file_range)
> +               return -EOPNOTSUPP;
> +       else if (file_out->f_op->copy_file_range !=
> +                file_in->f_op->copy_file_range)
> +               return -EXDEV;
> +
> +       return 0;
> +}
> +
>  /*
>   * Performs necessary checks before doing a file copy
>   *
> @@ -1427,6 +1454,14 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
>         loff_t size_in;
>         int ret;
>
> +       /* Only check f_ops if we're not trying to clone */
> +       if (!file_in->f_op->remap_file_range ||
> +           (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb)) {
> +               ret = fops_copy_file_checks(file_in, file_out, flags);
> +               if (ret)
> +                       return ret;
> +       }
> +
>         ret = generic_file_rw_checks(file_in, file_out);
>         if (ret)
>                 return ret;
> @@ -1474,9 +1509,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>  {
>         ssize_t ret;
>
> -       if (flags != 0)
> -               return -EINVAL;
> -
>         ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
>                                        flags);
>         if (unlikely(ret))
> @@ -1511,6 +1543,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>                         ret = cloned;
>                         goto done;
>                 }
> +               ret = fops_copy_file_checks(file_in, file_out, flags);
> +               if (ret)
> +                       return ret;
>         }
>
>         ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
> @@ -1543,6 +1578,9 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
>         struct fd f_out;
>         ssize_t ret = -EBADF;
>
> +       if (flags != 0)
> +               return -EINVAL;
> +
>         f_in = fdget(fd_in);
>         if (!f_in.file)
>                 goto out2;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index fd47deea7c17..6f604926d955 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1815,6 +1815,13 @@ struct dir_context {
>   */
>  #define REMAP_FILE_ADVISORY            (REMAP_FILE_CAN_SHORTEN)
>
> +/*
> + * This flag control the behavior of copy_file_range from internal (kernel)
> + * users.  It can be used to override the policy of forbidding copies when
> + * source and destination filesystems are different.
> + */
> +#define COPY_FILE_SPLICE               (1 << 0)
> +
>  struct iov_iter;
>
>  struct file_operations {
Amir Goldstein Feb. 18, 2021, 6:47 a.m. UTC | #4
On Thu, Feb 18, 2021 at 7:33 AM Olga Kornievskaia <aglo@umich.edu> wrote:
>
> On Wed, Feb 17, 2021 at 3:30 PM Luis Henriques <lhenriques@suse.de> wrote:
> >
> > A regression has been reported by Nicolas Boichat, found while using the
> > copy_file_range syscall to copy a tracefs file.  Before commit
> > 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") the
> > kernel would return -EXDEV to userspace when trying to copy a file across
> > different filesystems.  After this commit, the syscall doesn't fail anymore
> > and instead returns zero (zero bytes copied), as this file's content is
> > generated on-the-fly and thus reports a size of zero.
> >
> > This patch restores some cross-filesystems copy restrictions that existed
> > prior to commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across
> > devices").  It also introduces a flag (COPY_FILE_SPLICE) that can be used
> > by filesystems calling directly into the vfs copy_file_range to override
> > these restrictions.  Right now, only NFS needs to set this flag.
> >
> > Fixes: 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices")
> > Link: https://lore.kernel.org/linux-fsdevel/20210212044405.4120619-1-drinkcat@chromium.org/
> > Link: https://lore.kernel.org/linux-fsdevel/CANMq1KDZuxir2LM5jOTm0xx+BnvW=ZmpsG47CyHFJwnw7zSX6Q@mail.gmail.com/
> > Link: https://lore.kernel.org/linux-fsdevel/20210126135012.1.If45b7cdc3ff707bc1efa17f5366057d60603c45f@changeid/
> > Reported-by: Nicolas Boichat <drinkcat@chromium.org>
> > Signed-off-by: Luis Henriques <lhenriques@suse.de>
> > ---
> > Ok, I've tried to address all the issues and comments.  Hopefully this v3
> > is a bit closer to the final fix.
> >
> > Changes since v2
> > - do all the required checks earlier, in generic_copy_file_checks(),
> >   adding new checks for ->remap_file_range
> > - new COPY_FILE_SPLICE flag
> > - don't remove filesystem's fallback to generic_copy_file_range()
> > - updated commit changelog (and subject)
> > Changes since v1 (after Amir review)
> > - restored do_copy_file_range() helper
> > - return -EOPNOTSUPP if fs doesn't implement CFR
> > - updated commit description
>
> In my testing, this patch breaks NFS server-to-server copy file.

Hi Olga,

Can you please provide more details on the failed tests.

Does it fail on the client between two nfs mounts or does it fail
on the server? If the latter, between which two filesystems on the server?

Thanks,
Amir.
Christoph Hellwig Feb. 18, 2021, 7:43 a.m. UTC | #5
On Wed, Feb 17, 2021 at 05:26:54PM +0000, Luis Henriques wrote:
> A regression has been reported by Nicolas Boichat, found while using the
> copy_file_range syscall to copy a tracefs file.  Before commit
> 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") the
> kernel would return -EXDEV to userspace when trying to copy a file across
> different filesystems.  After this commit, the syscall doesn't fail anymore
> and instead returns zero (zero bytes copied), as this file's content is
> generated on-the-fly and thus reports a size of zero.
> 
> This patch restores some cross-filesystems copy restrictions that existed
> prior to commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across
> devices").  It also introduces a flag (COPY_FILE_SPLICE) that can be used
> by filesystems calling directly into the vfs copy_file_range to override
> these restrictions.  Right now, only NFS needs to set this flag.

No need for the flag.  Jyst fall back to splicing in the only caller
that wants it.
Olga Kornievskaia Feb. 18, 2021, 4:28 p.m. UTC | #6
On Thu, Feb 18, 2021 at 1:48 AM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Thu, Feb 18, 2021 at 7:33 AM Olga Kornievskaia <aglo@umich.edu> wrote:
> >
> > On Wed, Feb 17, 2021 at 3:30 PM Luis Henriques <lhenriques@suse.de> wrote:
> > >
> > > A regression has been reported by Nicolas Boichat, found while using the
> > > copy_file_range syscall to copy a tracefs file.  Before commit
> > > 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") the
> > > kernel would return -EXDEV to userspace when trying to copy a file across
> > > different filesystems.  After this commit, the syscall doesn't fail anymore
> > > and instead returns zero (zero bytes copied), as this file's content is
> > > generated on-the-fly and thus reports a size of zero.
> > >
> > > This patch restores some cross-filesystems copy restrictions that existed
> > > prior to commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across
> > > devices").  It also introduces a flag (COPY_FILE_SPLICE) that can be used
> > > by filesystems calling directly into the vfs copy_file_range to override
> > > these restrictions.  Right now, only NFS needs to set this flag.
> > >
> > > Fixes: 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices")
> > > Link: https://lore.kernel.org/linux-fsdevel/20210212044405.4120619-1-drinkcat@chromium.org/
> > > Link: https://lore.kernel.org/linux-fsdevel/CANMq1KDZuxir2LM5jOTm0xx+BnvW=ZmpsG47CyHFJwnw7zSX6Q@mail.gmail.com/
> > > Link: https://lore.kernel.org/linux-fsdevel/20210126135012.1.If45b7cdc3ff707bc1efa17f5366057d60603c45f@changeid/
> > > Reported-by: Nicolas Boichat <drinkcat@chromium.org>
> > > Signed-off-by: Luis Henriques <lhenriques@suse.de>
> > > ---
> > > Ok, I've tried to address all the issues and comments.  Hopefully this v3
> > > is a bit closer to the final fix.
> > >
> > > Changes since v2
> > > - do all the required checks earlier, in generic_copy_file_checks(),
> > >   adding new checks for ->remap_file_range
> > > - new COPY_FILE_SPLICE flag
> > > - don't remove filesystem's fallback to generic_copy_file_range()
> > > - updated commit changelog (and subject)
> > > Changes since v1 (after Amir review)
> > > - restored do_copy_file_range() helper
> > > - return -EOPNOTSUPP if fs doesn't implement CFR
> > > - updated commit description
> >
> > In my testing, this patch breaks NFS server-to-server copy file.
>
> Hi Olga,
>
> Can you please provide more details on the failed tests.
>
> Does it fail on the client between two nfs mounts or does it fail
> on the server? If the latter, between which two filesystems on the server?
>

It was a pilot error. V3 worked. I'm having some other issues with
server to server copy code but they seem to be unrelated to this. I
will test the new v6 versions when it comes out.

> Thanks,
> Amir.
diff mbox series

Patch

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 04937e51de56..14e55822c223 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -578,7 +578,8 @@  ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
 	 * limit like this and pipeline multiple COPY requests.
 	 */
 	count = min_t(u64, count, 1 << 22);
-	return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
+	return vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
+				   COPY_FILE_SPLICE);
 }
 
 __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
diff --git a/fs/read_write.c b/fs/read_write.c
index 75f764b43418..40a16003fb05 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1410,6 +1410,33 @@  static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
 				       flags);
 }
 
+/*
+ * This helper function checks whether copy_file_range can actually be used,
+ * depending on the source and destination filesystems being the same.
+ *
+ * In-kernel callers may set COPY_FILE_SPLICE to override these checks.
+ */
+static int fops_copy_file_checks(struct file *file_in, struct file *file_out,
+				 unsigned int flags)
+{
+	if (WARN_ON_ONCE(flags & ~COPY_FILE_SPLICE))
+		return -EINVAL;
+
+	if (flags & COPY_FILE_SPLICE)
+		return 0;
+	/*
+	 * We got here from userspace, so forbid copies if copy_file_range isn't
+	 * implemented or if we're doing a cross-fs copy.
+	 */
+	if (!file_out->f_op->copy_file_range)
+		return -EOPNOTSUPP;
+	else if (file_out->f_op->copy_file_range !=
+		 file_in->f_op->copy_file_range)
+		return -EXDEV;
+
+	return 0;
+}
+
 /*
  * Performs necessary checks before doing a file copy
  *
@@ -1427,6 +1454,14 @@  static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
 	loff_t size_in;
 	int ret;
 
+	/* Only check f_ops if we're not trying to clone */
+	if (!file_in->f_op->remap_file_range ||
+	    (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb)) {
+		ret = fops_copy_file_checks(file_in, file_out, flags);
+		if (ret)
+			return ret;
+	}
+
 	ret = generic_file_rw_checks(file_in, file_out);
 	if (ret)
 		return ret;
@@ -1474,9 +1509,6 @@  ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 {
 	ssize_t ret;
 
-	if (flags != 0)
-		return -EINVAL;
-
 	ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
 				       flags);
 	if (unlikely(ret))
@@ -1511,6 +1543,9 @@  ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 			ret = cloned;
 			goto done;
 		}
+		ret = fops_copy_file_checks(file_in, file_out, flags);
+		if (ret)
+			return ret;
 	}
 
 	ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
@@ -1543,6 +1578,9 @@  SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
 	struct fd f_out;
 	ssize_t ret = -EBADF;
 
+	if (flags != 0)
+		return -EINVAL;
+
 	f_in = fdget(fd_in);
 	if (!f_in.file)
 		goto out2;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fd47deea7c17..6f604926d955 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1815,6 +1815,13 @@  struct dir_context {
  */
 #define REMAP_FILE_ADVISORY		(REMAP_FILE_CAN_SHORTEN)
 
+/*
+ * This flag control the behavior of copy_file_range from internal (kernel)
+ * users.  It can be used to override the policy of forbidding copies when
+ * source and destination filesystems are different.
+ */
+#define COPY_FILE_SPLICE		(1 << 0)
+
 struct iov_iter;
 
 struct file_operations {