diff mbox series

ovl: factor out some common helpers for backing files io

Message ID 20230912185408.3343163-1-amir73il@gmail.com (mailing list archive)
State New, archived
Headers show
Series ovl: factor out some common helpers for backing files io | expand

Commit Message

Amir Goldstein Sept. 12, 2023, 6:54 p.m. UTC
Overlayfs stores its files data in backing files on other filesystems.

Factor out some common helpers to perform io to backing files, that will
later be reused by fuse passthrough code.

Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
Link: https://lore.kernel.org/r/CAJfpeguhmZbjP3JLqtUy0AdWaHOkAPWeP827BBWwRFEAUgnUcQ@mail.gmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---

Miklos,

This is the re-factoring that you suggested in the FUSE passthrough
patches discussion linked above.

This patch is based on the overlayfs prep patch set I just posted [1].

Although overlayfs currently is the only user of these backing file
helpers, I am sending this patch to a wider audience in case other
filesystem developers want to comment on the abstraction.

We could perhaps later considering moving backing_file_open() helper
and related code to backing_file.c.

In any case, if there are no objections, I plan to queue this work
for 6.7 via the overlayfs tree.

Thanks,
Amir.

[1] https://lore.kernel.org/linux-unionfs/20230912173653.3317828-1-amir73il@gmail.com/


 MAINTAINERS                  |   2 +
 fs/Kconfig                   |   4 +
 fs/Makefile                  |   1 +
 fs/backing_file.c            | 160 +++++++++++++++++++++++++++++++++++
 fs/overlayfs/Kconfig         |   1 +
 fs/overlayfs/file.c          | 137 ++----------------------------
 fs/overlayfs/overlayfs.h     |   2 -
 fs/overlayfs/super.c         |  11 +--
 include/linux/backing_file.h |  22 +++++
 9 files changed, 199 insertions(+), 141 deletions(-)
 create mode 100644 fs/backing_file.c
 create mode 100644 include/linux/backing_file.h

Comments

Amir Goldstein Sept. 13, 2023, 5:59 a.m. UTC | #1
[forgot to CC overlayfs list]

On Tue, Sep 12, 2023 at 9:54 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> Overlayfs stores its files data in backing files on other filesystems.
>
> Factor out some common helpers to perform io to backing files, that will
> later be reused by fuse passthrough code.
>
> Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
> Link: https://lore.kernel.org/r/CAJfpeguhmZbjP3JLqtUy0AdWaHOkAPWeP827BBWwRFEAUgnUcQ@mail.gmail.com
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>
> Miklos,
>
> This is the re-factoring that you suggested in the FUSE passthrough
> patches discussion linked above.
>
> This patch is based on the overlayfs prep patch set I just posted [1].
>
> Although overlayfs currently is the only user of these backing file
> helpers, I am sending this patch to a wider audience in case other
> filesystem developers want to comment on the abstraction.
>
> We could perhaps later considering moving backing_file_open() helper
> and related code to backing_file.c.
>
> In any case, if there are no objections, I plan to queue this work
> for 6.7 via the overlayfs tree.
>
> Thanks,
> Amir.
>
> [1] https://lore.kernel.org/linux-unionfs/20230912173653.3317828-1-amir73il@gmail.com/
>
>
>  MAINTAINERS                  |   2 +
>  fs/Kconfig                   |   4 +
>  fs/Makefile                  |   1 +
>  fs/backing_file.c            | 160 +++++++++++++++++++++++++++++++++++
>  fs/overlayfs/Kconfig         |   1 +
>  fs/overlayfs/file.c          | 137 ++----------------------------
>  fs/overlayfs/overlayfs.h     |   2 -
>  fs/overlayfs/super.c         |  11 +--
>  include/linux/backing_file.h |  22 +++++
>  9 files changed, 199 insertions(+), 141 deletions(-)
>  create mode 100644 fs/backing_file.c
>  create mode 100644 include/linux/backing_file.h
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 90f13281d297..4e1d21773e0e 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -16092,7 +16092,9 @@ L:      linux-unionfs@vger.kernel.org
>  S:     Supported
>  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git
>  F:     Documentation/filesystems/overlayfs.rst
> +F:     fs/backing_file.c
>  F:     fs/overlayfs/
> +F:     include/linux/backing_file.h
>
>  P54 WIRELESS DRIVER
>  M:     Christian Lamparter <chunkeey@googlemail.com>
> diff --git a/fs/Kconfig b/fs/Kconfig
> index aa7e03cc1941..9027a88ffa47 100644
> --- a/fs/Kconfig
> +++ b/fs/Kconfig
> @@ -26,6 +26,10 @@ config LEGACY_DIRECT_IO
>         depends on BUFFER_HEAD
>         bool
>
> +# Common backing file helpers
> +config FS_BACKING_FILE
> +       bool
> +
>  if BLOCK
>
>  source "fs/ext2/Kconfig"
> diff --git a/fs/Makefile b/fs/Makefile
> index f9541f40be4e..95ef06cff388 100644
> --- a/fs/Makefile
> +++ b/fs/Makefile
> @@ -39,6 +39,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF)       += compat_binfmt_elf.o
>  obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o
>  obj-$(CONFIG_BINFMT_FLAT)      += binfmt_flat.o
>
> +obj-$(CONFIG_FS_BACKING_FILE)  += backing_file.o
>  obj-$(CONFIG_FS_MBCACHE)       += mbcache.o
>  obj-$(CONFIG_FS_POSIX_ACL)     += posix_acl.o
>  obj-$(CONFIG_NFS_COMMON)       += nfs_common/
> diff --git a/fs/backing_file.c b/fs/backing_file.c
> new file mode 100644
> index 000000000000..ea895ca1639d
> --- /dev/null
> +++ b/fs/backing_file.c
> @@ -0,0 +1,160 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Common helpers for backing file io.
> + * Forked from fs/overlayfs/file.c.
> + *
> + * Copyright (C) 2017 Red Hat, Inc.
> + * Copyright (C) 2023 CTERA Networks.
> + */
> +
> +#include <linux/backing_file.h>
> +
> +struct backing_aio_req {
> +       struct kiocb iocb;
> +       refcount_t ref;
> +       struct kiocb *orig_iocb;
> +       void (*cleanup)(struct kiocb *, long);
> +};
> +
> +static struct kmem_cache *backing_aio_req_cachep;
> +
> +#define BACKING_IOCB_MASK \
> +       (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
> +
> +static rwf_t iocb_to_rw_flags(int flags)
> +{
> +       return (__force rwf_t)(flags & BACKING_IOCB_MASK);
> +}
> +
> +static void backing_aio_put(struct backing_aio_req *aio_req)
> +{
> +       if (refcount_dec_and_test(&aio_req->ref)) {
> +               fput(aio_req->iocb.ki_filp);
> +               kmem_cache_free(backing_aio_req_cachep, aio_req);
> +       }
> +}
> +
> +/* Completion for submitted/failed async rw io */
> +static void backing_aio_cleanup(struct backing_aio_req *aio_req, long res)
> +{
> +       struct kiocb *iocb = &aio_req->iocb;
> +       struct kiocb *orig_iocb = aio_req->orig_iocb;
> +
> +       if (iocb->ki_flags & IOCB_WRITE)
> +               kiocb_end_write(iocb);
> +
> +       orig_iocb->ki_pos = iocb->ki_pos;
> +       if (aio_req->cleanup)
> +               aio_req->cleanup(orig_iocb, res);
> +
> +       backing_aio_put(aio_req);
> +}
> +
> +/* Completion for submitted async rw io */
> +static void backing_aio_rw_complete(struct kiocb *iocb, long res)
> +{
> +       struct backing_aio_req *aio_req = container_of(iocb,
> +                                              struct backing_aio_req, iocb);
> +       struct kiocb *orig_iocb = aio_req->orig_iocb;
> +
> +       backing_aio_cleanup(aio_req, res);
> +       orig_iocb->ki_complete(orig_iocb, res);
> +}
> +
> +
> +ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
> +                              struct kiocb *iocb, int flags,
> +                              void (*cleanup)(struct kiocb *, long))
> +{
> +       struct backing_aio_req *aio_req = NULL;
> +       ssize_t ret;
> +
> +       if (!iov_iter_count(iter))
> +               return 0;
> +
> +       if (iocb->ki_flags & IOCB_DIRECT &&
> +           !(file->f_mode & FMODE_CAN_ODIRECT))
> +               return -EINVAL;
> +
> +       if (is_sync_kiocb(iocb)) {
> +               rwf_t rwf = iocb_to_rw_flags(flags);
> +
> +               ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
> +               if (cleanup)
> +                       cleanup(iocb, ret);
> +       } else {
> +               aio_req = kmem_cache_zalloc(backing_aio_req_cachep, GFP_KERNEL);
> +               if (!aio_req)
> +                       return -ENOMEM;
> +
> +               aio_req->orig_iocb = iocb;
> +               aio_req->cleanup = cleanup;
> +               kiocb_clone(&aio_req->iocb, iocb, get_file(file));
> +               aio_req->iocb.ki_complete = backing_aio_rw_complete;
> +               refcount_set(&aio_req->ref, 2);
> +               ret = vfs_iocb_iter_read(file, &aio_req->iocb, iter);
> +               backing_aio_put(aio_req);
> +               if (ret != -EIOCBQUEUED)
> +                       backing_aio_cleanup(aio_req, ret);
> +       }
> +
> +       return ret;
> +}
> +EXPORT_SYMBOL_GPL(backing_file_read_iter);
> +
> +ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
> +                               struct kiocb *iocb, int flags,
> +                               void (*cleanup)(struct kiocb *, long))
> +{
> +       ssize_t ret;
> +
> +       if (!iov_iter_count(iter))
> +               return 0;
> +
> +       if (iocb->ki_flags & IOCB_DIRECT &&
> +           !(file->f_mode & FMODE_CAN_ODIRECT))
> +               return -EINVAL;
> +
> +       if (is_sync_kiocb(iocb)) {
> +               rwf_t rwf = iocb_to_rw_flags(flags);
> +
> +               file_start_write(file);
> +               ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
> +               file_end_write(file);
> +               if (cleanup)
> +                       cleanup(iocb, ret);
> +       } else {
> +               struct backing_aio_req *aio_req;
> +
> +               aio_req = kmem_cache_zalloc(backing_aio_req_cachep, GFP_KERNEL);
> +               if (!aio_req)
> +                       return -ENOMEM;
> +
> +               aio_req->orig_iocb = iocb;
> +               aio_req->cleanup = cleanup;
> +               kiocb_clone(&aio_req->iocb, iocb, get_file(file));
> +               aio_req->iocb.ki_flags = flags;
> +               aio_req->iocb.ki_complete = backing_aio_rw_complete;
> +               refcount_set(&aio_req->ref, 2);
> +               kiocb_start_write(&aio_req->iocb);
> +               ret = vfs_iocb_iter_write(file, &aio_req->iocb, iter);
> +               backing_aio_put(aio_req);
> +               if (ret != -EIOCBQUEUED)
> +                       backing_aio_cleanup(aio_req, ret);
> +       }
> +
> +       return ret;
> +}
> +EXPORT_SYMBOL_GPL(backing_file_write_iter);
> +
> +static int __init backing_aio_init(void)
> +{
> +       backing_aio_req_cachep = kmem_cache_create("backing_aio_req",
> +                                          sizeof(struct backing_aio_req),
> +                                          0, SLAB_HWCACHE_ALIGN, NULL);
> +       if (!backing_aio_req_cachep)
> +               return -ENOMEM;
> +
> +       return 0;
> +}
> +fs_initcall(backing_aio_init);
> diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
> index fec5020c3495..7f52d9031cff 100644
> --- a/fs/overlayfs/Kconfig
> +++ b/fs/overlayfs/Kconfig
> @@ -1,6 +1,7 @@
>  # SPDX-License-Identifier: GPL-2.0-only
>  config OVERLAY_FS
>         tristate "Overlay filesystem support"
> +       select FS_BACKING_FILE
>         select EXPORTFS
>         help
>           An overlay filesystem combines two filesystems - an 'upper' filesystem
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 05ec614f7054..81fe6a85cad9 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -13,16 +13,9 @@
>  #include <linux/security.h>
>  #include <linux/mm.h>
>  #include <linux/fs.h>
> +#include <linux/backing_file.h>
>  #include "overlayfs.h"
>
> -struct ovl_aio_req {
> -       struct kiocb iocb;
> -       refcount_t ref;
> -       struct kiocb *orig_iocb;
> -};
> -
> -static struct kmem_cache *ovl_aio_request_cachep;
> -
>  static char ovl_whatisit(struct inode *inode, struct inode *realinode)
>  {
>         if (realinode != ovl_inode_upper(inode))
> @@ -262,24 +255,8 @@ static void ovl_file_accessed(struct file *file)
>         touch_atime(&file->f_path);
>  }
>
> -#define OVL_IOCB_MASK \
> -       (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
> -
> -static rwf_t iocb_to_rw_flags(int flags)
> -{
> -       return (__force rwf_t)(flags & OVL_IOCB_MASK);
> -}
> -
> -static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
> -{
> -       if (refcount_dec_and_test(&aio_req->ref)) {
> -               fput(aio_req->iocb.ki_filp);
> -               kmem_cache_free(ovl_aio_request_cachep, aio_req);
> -       }
> -}
> -
>  /* Completion for submitted/failed sync/async rw io */
> -static void ovl_rw_complete(struct kiocb *orig_iocb)
> +static void ovl_rw_complete(struct kiocb *orig_iocb, long res)
>  {
>         struct file *file = orig_iocb->ki_filp;
>
> @@ -292,32 +269,6 @@ static void ovl_rw_complete(struct kiocb *orig_iocb)
>         }
>  }
>
> -/* Completion for submitted/failed async rw io */
> -static void ovl_aio_cleanup(struct ovl_aio_req *aio_req)
> -{
> -       struct kiocb *iocb = &aio_req->iocb;
> -       struct kiocb *orig_iocb = aio_req->orig_iocb;
> -
> -       if (iocb->ki_flags & IOCB_WRITE)
> -               kiocb_end_write(iocb);
> -
> -       orig_iocb->ki_pos = iocb->ki_pos;
> -       ovl_rw_complete(orig_iocb);
> -
> -       ovl_aio_put(aio_req);
> -}
> -
> -/* Completion for submitted async rw io */
> -static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
> -{
> -       struct ovl_aio_req *aio_req = container_of(iocb,
> -                                                  struct ovl_aio_req, iocb);
> -       struct kiocb *orig_iocb = aio_req->orig_iocb;
> -
> -       ovl_aio_cleanup(aio_req);
> -       orig_iocb->ki_complete(orig_iocb, res);
> -}
> -
>  static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
>  {
>         struct file *file = iocb->ki_filp;
> @@ -332,38 +283,10 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
>         if (ret)
>                 return ret;
>
> -       ret = -EINVAL;
> -       if (iocb->ki_flags & IOCB_DIRECT &&
> -           !(real.file->f_mode & FMODE_CAN_ODIRECT))
> -               goto out_fdput;
> -
>         old_cred = ovl_override_creds(file_inode(file)->i_sb);
> -       if (is_sync_kiocb(iocb)) {
> -               rwf_t rwf = iocb_to_rw_flags(iocb->ki_flags);
> -
> -               ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, rwf);
> -               ovl_rw_complete(iocb);
> -       } else {
> -               struct ovl_aio_req *aio_req;
> -
> -               ret = -ENOMEM;
> -               aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
> -               if (!aio_req)
> -                       goto out;
> -
> -               real.flags = 0;
> -               aio_req->orig_iocb = iocb;
> -               kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
> -               aio_req->iocb.ki_complete = ovl_aio_rw_complete;
> -               refcount_set(&aio_req->ref, 2);
> -               ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
> -               ovl_aio_put(aio_req);
> -               if (ret != -EIOCBQUEUED)
> -                       ovl_aio_cleanup(aio_req);
> -       }
> -out:
> +       ret = backing_file_read_iter(real.file, iter, iocb, iocb->ki_flags,
> +                                    ovl_rw_complete);
>         revert_creds(old_cred);
> -out_fdput:
>         fdput(real);
>
>         return ret;
> @@ -392,45 +315,13 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
>         if (ret)
>                 goto out_unlock;
>
> -       ret = -EINVAL;
> -       if (iocb->ki_flags & IOCB_DIRECT &&
> -           !(real.file->f_mode & FMODE_CAN_ODIRECT))
> -               goto out_fdput;
> -
>         if (!ovl_should_sync(OVL_FS(inode->i_sb)))
>                 flags &= ~(IOCB_DSYNC | IOCB_SYNC);
>
>         old_cred = ovl_override_creds(inode->i_sb);
> -       if (is_sync_kiocb(iocb)) {
> -               rwf_t rwf = iocb_to_rw_flags(flags);
> -
> -               file_start_write(real.file);
> -               ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf);
> -               file_end_write(real.file);
> -               ovl_rw_complete(iocb);
> -       } else {
> -               struct ovl_aio_req *aio_req;
> -
> -               ret = -ENOMEM;
> -               aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
> -               if (!aio_req)
> -                       goto out;
> -
> -               real.flags = 0;
> -               aio_req->orig_iocb = iocb;
> -               kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
> -               aio_req->iocb.ki_flags = flags;
> -               aio_req->iocb.ki_complete = ovl_aio_rw_complete;
> -               refcount_set(&aio_req->ref, 2);
> -               kiocb_start_write(&aio_req->iocb);
> -               ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
> -               ovl_aio_put(aio_req);
> -               if (ret != -EIOCBQUEUED)
> -                       ovl_aio_cleanup(aio_req);
> -       }
> -out:
> +       ret = backing_file_write_iter(real.file, iter, iocb, flags,
> +                                     ovl_rw_complete);
>         revert_creds(old_cred);
> -out_fdput:
>         fdput(real);
>
>  out_unlock:
> @@ -742,19 +633,3 @@ const struct file_operations ovl_file_operations = {
>         .copy_file_range        = ovl_copy_file_range,
>         .remap_file_range       = ovl_remap_file_range,
>  };
> -
> -int __init ovl_aio_request_cache_init(void)
> -{
> -       ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
> -                                                  sizeof(struct ovl_aio_req),
> -                                                  0, SLAB_HWCACHE_ALIGN, NULL);
> -       if (!ovl_aio_request_cachep)
> -               return -ENOMEM;
> -
> -       return 0;
> -}
> -
> -void ovl_aio_request_cache_destroy(void)
> -{
> -       kmem_cache_destroy(ovl_aio_request_cachep);
> -}
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 9817b2dcb132..64b98e67e826 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -799,8 +799,6 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
>
>  /* file.c */
>  extern const struct file_operations ovl_file_operations;
> -int __init ovl_aio_request_cache_init(void);
> -void ovl_aio_request_cache_destroy(void);
>  int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa);
>  int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa);
>  int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa);
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index def266b5e2a3..8c132467fca1 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -1530,14 +1530,10 @@ static int __init ovl_init(void)
>         if (ovl_inode_cachep == NULL)
>                 return -ENOMEM;
>
> -       err = ovl_aio_request_cache_init();
> -       if (!err) {
> -               err = register_filesystem(&ovl_fs_type);
> -               if (!err)
> -                       return 0;
> +       err = register_filesystem(&ovl_fs_type);
> +       if (!err)
> +               return 0;
>
> -               ovl_aio_request_cache_destroy();
> -       }
>         kmem_cache_destroy(ovl_inode_cachep);
>
>         return err;
> @@ -1553,7 +1549,6 @@ static void __exit ovl_exit(void)
>          */
>         rcu_barrier();
>         kmem_cache_destroy(ovl_inode_cachep);
> -       ovl_aio_request_cache_destroy();
>  }
>
>  module_init(ovl_init);
> diff --git a/include/linux/backing_file.h b/include/linux/backing_file.h
> new file mode 100644
> index 000000000000..1428fe7b26bb
> --- /dev/null
> +++ b/include/linux/backing_file.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Common helpers for backing file io.
> + *
> + * Copyright (C) 2023 CTERA Networks.
> + */
> +
> +#ifndef _LINUX_BACKING_FILE_H
> +#define _LINUX_BACKING_FILE_H
> +
> +#include <linux/file.h>
> +#include <linux/uio.h>
> +#include <linux/fs.h>
> +
> +ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
> +                              struct kiocb *iocb, int flags,
> +                              void (*cleanup)(struct kiocb *, long));
> +ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
> +                               struct kiocb *iocb, int flags,
> +                               void (*cleanup)(struct kiocb *, long));
> +
> +#endif /* _LINUX_BACKING_FILE_H */
> --
> 2.34.1
>
Christian Brauner Sept. 13, 2023, 8:29 a.m. UTC | #2
On Tue, Sep 12, 2023 at 09:54:08PM +0300, Amir Goldstein wrote:
> Overlayfs stores its files data in backing files on other filesystems.
> 
> Factor out some common helpers to perform io to backing files, that will
> later be reused by fuse passthrough code.
> 
> Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
> Link: https://lore.kernel.org/r/CAJfpeguhmZbjP3JLqtUy0AdWaHOkAPWeP827BBWwRFEAUgnUcQ@mail.gmail.com
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
> 
> Miklos,
> 
> This is the re-factoring that you suggested in the FUSE passthrough
> patches discussion linked above.
> 
> This patch is based on the overlayfs prep patch set I just posted [1].
> 
> Although overlayfs currently is the only user of these backing file
> helpers, I am sending this patch to a wider audience in case other
> filesystem developers want to comment on the abstraction.
> 
> We could perhaps later considering moving backing_file_open() helper
> and related code to backing_file.c.
> 
> In any case, if there are no objections, I plan to queue this work
> for 6.7 via the overlayfs tree.
> 
> Thanks,
> Amir.
> 
> [1] https://lore.kernel.org/linux-unionfs/20230912173653.3317828-1-amir73il@gmail.com/
> 
> 
>  MAINTAINERS                  |   2 +
>  fs/Kconfig                   |   4 +
>  fs/Makefile                  |   1 +
>  fs/backing_file.c            | 160 +++++++++++++++++++++++++++++++++++

I'm sorry but I'm missing mountains of context.
How is that related to the backing file stuff exactly?
The backing file stuff has this unpleasant

file->f_inode == real_inode != file->f_path->dentry->d_inode

that we all agree is something we really don't like. Is FUSE trying to
do the same thing and build an read_iter/write_iter abstraction around
it? I really really hope that's not the case.

And why are we rushing this to a VFS API? This should be part of the
FUSE series that make it necessary to hoist into the VFS not in
overlayfs work that prematurely moves this into the VFS.
Christian Brauner Sept. 13, 2023, 8:37 a.m. UTC | #3
On Tue, Sep 12, 2023 at 09:54:08PM +0300, Amir Goldstein wrote:
> Overlayfs stores its files data in backing files on other filesystems.
> 
> Factor out some common helpers to perform io to backing files, that will
> later be reused by fuse passthrough code.
> 
> Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
> Link: https://lore.kernel.org/r/CAJfpeguhmZbjP3JLqtUy0AdWaHOkAPWeP827BBWwRFEAUgnUcQ@mail.gmail.com
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
> 
> Miklos,
> 
> This is the re-factoring that you suggested in the FUSE passthrough
> patches discussion linked above.
> 
> This patch is based on the overlayfs prep patch set I just posted [1].
> 
> Although overlayfs currently is the only user of these backing file
> helpers, I am sending this patch to a wider audience in case other
> filesystem developers want to comment on the abstraction.
> 
> We could perhaps later considering moving backing_file_open() helper
> and related code to backing_file.c.
> 
> In any case, if there are no objections, I plan to queue this work
> for 6.7 via the overlayfs tree.
> 
> Thanks,
> Amir.
> 
> [1] https://lore.kernel.org/linux-unionfs/20230912173653.3317828-1-amir73il@gmail.com/
> 
> 
>  MAINTAINERS                  |   2 +
>  fs/Kconfig                   |   4 +
>  fs/Makefile                  |   1 +
>  fs/backing_file.c            | 160 +++++++++++++++++++++++++++++++++++
>  fs/overlayfs/Kconfig         |   1 +
>  fs/overlayfs/file.c          | 137 ++----------------------------
>  fs/overlayfs/overlayfs.h     |   2 -
>  fs/overlayfs/super.c         |  11 +--
>  include/linux/backing_file.h |  22 +++++
>  9 files changed, 199 insertions(+), 141 deletions(-)
>  create mode 100644 fs/backing_file.c
>  create mode 100644 include/linux/backing_file.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 90f13281d297..4e1d21773e0e 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -16092,7 +16092,9 @@ L:	linux-unionfs@vger.kernel.org
>  S:	Supported
>  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git
>  F:	Documentation/filesystems/overlayfs.rst
> +F:	fs/backing_file.c
>  F:	fs/overlayfs/
> +F:	include/linux/backing_file.h

I'd like to do this slightly differently, please. All vfs infra goes
through vfs trees but for new infra like this where someone steps up to
be a maintainer we add a new section (like bpf or networking does):

VFS [BACKING FILE]
M:      Miklos Szeredi <miklos@szeredi.hu>
M:      Amir Goldstein <amir73il@gmail.com>
F:      fs/backing_file.c
F:      include/linux/backing_file.h
L:	linux-fsdevel@vger.kernel.org
S:	Maintained
Amir Goldstein Sept. 13, 2023, 11:24 a.m. UTC | #4
On Wed, Sep 13, 2023 at 11:29 AM Christian Brauner <brauner@kernel.org> wrote:
>
> On Tue, Sep 12, 2023 at 09:54:08PM +0300, Amir Goldstein wrote:
> > Overlayfs stores its files data in backing files on other filesystems.
> >
> > Factor out some common helpers to perform io to backing files, that will
> > later be reused by fuse passthrough code.
> >
> > Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
> > Link: https://lore.kernel.org/r/CAJfpeguhmZbjP3JLqtUy0AdWaHOkAPWeP827BBWwRFEAUgnUcQ@mail.gmail.com
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> >
> > Miklos,
> >
> > This is the re-factoring that you suggested in the FUSE passthrough
> > patches discussion linked above.
> >
> > This patch is based on the overlayfs prep patch set I just posted [1].
> >
> > Although overlayfs currently is the only user of these backing file
> > helpers, I am sending this patch to a wider audience in case other
> > filesystem developers want to comment on the abstraction.
> >
> > We could perhaps later considering moving backing_file_open() helper
> > and related code to backing_file.c.
> >
> > In any case, if there are no objections, I plan to queue this work
> > for 6.7 via the overlayfs tree.
> >
> > Thanks,
> > Amir.
> >
> > [1] https://lore.kernel.org/linux-unionfs/20230912173653.3317828-1-amir73il@gmail.com/
> >
> >
> >  MAINTAINERS                  |   2 +
> >  fs/Kconfig                   |   4 +
> >  fs/Makefile                  |   1 +
> >  fs/backing_file.c            | 160 +++++++++++++++++++++++++++++++++++
>
> I'm sorry but I'm missing mountains of context.
> How is that related to the backing file stuff exactly?
> The backing file stuff has this unpleasant
>
> file->f_inode == real_inode != file->f_path->dentry->d_inode
>
> that we all agree is something we really don't like. Is FUSE trying to
> do the same thing and build an read_iter/write_iter abstraction around
> it? I really really hope that's not the case.

That is not the case.
The commonality between FUSE passthrough and overlayfs is that
a "virtual" file (i.e. ovl/fuse), which has no backing blockdev of its own
"forwards" the io requests to a backing file on another filesystem.

The name "backing file" is therefore a pretty accurate description
for both cases. HOWEVER, FUSE does not need to use the
backing_file struct to hold an alternative path, so FUSE backing files
do not have FMODE_BACKING, same as cachefiles uses backing
files, but does not use the FMODE_BACKING/file_backing struct.

Yes, it's a bit of a naming mess.
I don't have any good ideas on how to do better naming.
Ideally, we will get rid of struct backing_file, so we won't need
to care about the confusing names...

>
> And why are we rushing this to a VFS API? This should be part of the
> FUSE series that make it necessary to hoist into the VFS not in
> overlayfs work that prematurely moves this into the VFS.

Fair enough, I will not rush this patch and will post it
along with the FUSE passthrough patches.

I posted it because I wanted to get early feedback - mission accomplished :)
In retrospect, I should have labeled it [RFC].

Thanks,
Amir.
Amir Goldstein Sept. 13, 2023, 11:26 a.m. UTC | #5
On Wed, Sep 13, 2023 at 11:37 AM Christian Brauner <brauner@kernel.org> wrote:
>
> On Tue, Sep 12, 2023 at 09:54:08PM +0300, Amir Goldstein wrote:
> > Overlayfs stores its files data in backing files on other filesystems.
> >
> > Factor out some common helpers to perform io to backing files, that will
> > later be reused by fuse passthrough code.
> >
> > Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
> > Link: https://lore.kernel.org/r/CAJfpeguhmZbjP3JLqtUy0AdWaHOkAPWeP827BBWwRFEAUgnUcQ@mail.gmail.com
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> >
> > Miklos,
> >
> > This is the re-factoring that you suggested in the FUSE passthrough
> > patches discussion linked above.
> >
> > This patch is based on the overlayfs prep patch set I just posted [1].
> >
> > Although overlayfs currently is the only user of these backing file
> > helpers, I am sending this patch to a wider audience in case other
> > filesystem developers want to comment on the abstraction.
> >
> > We could perhaps later considering moving backing_file_open() helper
> > and related code to backing_file.c.
> >
> > In any case, if there are no objections, I plan to queue this work
> > for 6.7 via the overlayfs tree.
> >
> > Thanks,
> > Amir.
> >
> > [1] https://lore.kernel.org/linux-unionfs/20230912173653.3317828-1-amir73il@gmail.com/
> >
> >
> >  MAINTAINERS                  |   2 +
> >  fs/Kconfig                   |   4 +
> >  fs/Makefile                  |   1 +
> >  fs/backing_file.c            | 160 +++++++++++++++++++++++++++++++++++
> >  fs/overlayfs/Kconfig         |   1 +
> >  fs/overlayfs/file.c          | 137 ++----------------------------
> >  fs/overlayfs/overlayfs.h     |   2 -
> >  fs/overlayfs/super.c         |  11 +--
> >  include/linux/backing_file.h |  22 +++++
> >  9 files changed, 199 insertions(+), 141 deletions(-)
> >  create mode 100644 fs/backing_file.c
> >  create mode 100644 include/linux/backing_file.h
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 90f13281d297..4e1d21773e0e 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -16092,7 +16092,9 @@ L:    linux-unionfs@vger.kernel.org
> >  S:   Supported
> >  T:   git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git
> >  F:   Documentation/filesystems/overlayfs.rst
> > +F:   fs/backing_file.c
> >  F:   fs/overlayfs/
> > +F:   include/linux/backing_file.h
>
> I'd like to do this slightly differently, please. All vfs infra goes
> through vfs trees

OK. it will take a bit more git collaboration for a new
infra that is not used by any fs yet, but it's fine by me.

> but for new infra like this where someone steps up to
> be a maintainer we add a new section (like bpf or networking does):
>
> VFS [BACKING FILE]
> M:      Miklos Szeredi <miklos@szeredi.hu>
> M:      Amir Goldstein <amir73il@gmail.com>
> F:      fs/backing_file.c
> F:      include/linux/backing_file.h
> L:      linux-fsdevel@vger.kernel.org
> S:      Maintained

That sounds good.

Thanks,
Amir.
Amir Goldstein Sept. 13, 2023, noon UTC | #6
On Wed, Sep 13, 2023 at 2:26 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Wed, Sep 13, 2023 at 11:37 AM Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Tue, Sep 12, 2023 at 09:54:08PM +0300, Amir Goldstein wrote:
> > > Overlayfs stores its files data in backing files on other filesystems.
> > >
> > > Factor out some common helpers to perform io to backing files, that will
> > > later be reused by fuse passthrough code.
> > >
> > > Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
> > > Link: https://lore.kernel.org/r/CAJfpeguhmZbjP3JLqtUy0AdWaHOkAPWeP827BBWwRFEAUgnUcQ@mail.gmail.com
> > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > ---
> > >
> > > Miklos,
> > >
> > > This is the re-factoring that you suggested in the FUSE passthrough
> > > patches discussion linked above.
> > >
> > > This patch is based on the overlayfs prep patch set I just posted [1].
> > >
> > > Although overlayfs currently is the only user of these backing file
> > > helpers, I am sending this patch to a wider audience in case other
> > > filesystem developers want to comment on the abstraction.
> > >
> > > We could perhaps later considering moving backing_file_open() helper
> > > and related code to backing_file.c.
> > >
> > > In any case, if there are no objections, I plan to queue this work
> > > for 6.7 via the overlayfs tree.
> > >
> > > Thanks,
> > > Amir.
> > >
> > > [1] https://lore.kernel.org/linux-unionfs/20230912173653.3317828-1-amir73il@gmail.com/
> > >
> > >
> > >  MAINTAINERS                  |   2 +
> > >  fs/Kconfig                   |   4 +
> > >  fs/Makefile                  |   1 +
> > >  fs/backing_file.c            | 160 +++++++++++++++++++++++++++++++++++
> > >  fs/overlayfs/Kconfig         |   1 +
> > >  fs/overlayfs/file.c          | 137 ++----------------------------
> > >  fs/overlayfs/overlayfs.h     |   2 -
> > >  fs/overlayfs/super.c         |  11 +--
> > >  include/linux/backing_file.h |  22 +++++
> > >  9 files changed, 199 insertions(+), 141 deletions(-)
> > >  create mode 100644 fs/backing_file.c
> > >  create mode 100644 include/linux/backing_file.h
> > >
> > > diff --git a/MAINTAINERS b/MAINTAINERS
> > > index 90f13281d297..4e1d21773e0e 100644
> > > --- a/MAINTAINERS
> > > +++ b/MAINTAINERS
> > > @@ -16092,7 +16092,9 @@ L:    linux-unionfs@vger.kernel.org
> > >  S:   Supported
> > >  T:   git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git
> > >  F:   Documentation/filesystems/overlayfs.rst
> > > +F:   fs/backing_file.c
> > >  F:   fs/overlayfs/
> > > +F:   include/linux/backing_file.h
> >
> > I'd like to do this slightly differently, please. All vfs infra goes
> > through vfs trees
>
> OK. it will take a bit more git collaboration for a new
> infra that is not used by any fs yet, but it's fine by me.
>
> > but for new infra like this where someone steps up to
> > be a maintainer we add a new section (like bpf or networking does):
> >
> > VFS [BACKING FILE]
> > M:      Miklos Szeredi <miklos@szeredi.hu>
> > M:      Amir Goldstein <amir73il@gmail.com>
> > F:      fs/backing_file.c
> > F:      include/linux/backing_file.h
> > L:      linux-fsdevel@vger.kernel.org
> > S:      Maintained
>
> That sounds good.

But if you want to follow the ways of BFP and NETWORKING,
we first need to agree on the prefix, because there is no
^VFS entry at the moment there is:

FILESYSTEMS (VFS and infrastructure)

So do you intend to change that, or should we carry on with
the ^FILESYSTEM prefix, such as the entry:
FILESYSTEM DIRECT ACCESS (DAX)

and add the entry:
FILESYSTEM [BACKING FILE]

Thanks,
Amir.
Christian Brauner Sept. 13, 2023, 4:41 p.m. UTC | #7
> and add the entry:
> FILESYSTEM [BACKING FILE]

Yeah, sorry. I didn't look at the prefix.
FILESYSTEMS is obviously fine!
Amir Goldstein Sept. 21, 2023, 3:51 p.m. UTC | #8
On Wed, Sep 13, 2023 at 2:24 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Wed, Sep 13, 2023 at 11:29 AM Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Tue, Sep 12, 2023 at 09:54:08PM +0300, Amir Goldstein wrote:
> > > Overlayfs stores its files data in backing files on other filesystems.
> > >
> > > Factor out some common helpers to perform io to backing files, that will
> > > later be reused by fuse passthrough code.
> > >
> > > Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
> > > Link: https://lore.kernel.org/r/CAJfpeguhmZbjP3JLqtUy0AdWaHOkAPWeP827BBWwRFEAUgnUcQ@mail.gmail.com
> > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > ---
> > >
> > > Miklos,
> > >
> > > This is the re-factoring that you suggested in the FUSE passthrough
> > > patches discussion linked above.
> > >
> > > This patch is based on the overlayfs prep patch set I just posted [1].
> > >
> > > Although overlayfs currently is the only user of these backing file
> > > helpers, I am sending this patch to a wider audience in case other
> > > filesystem developers want to comment on the abstraction.
> > >
> > > We could perhaps later considering moving backing_file_open() helper
> > > and related code to backing_file.c.
> > >
> > > In any case, if there are no objections, I plan to queue this work
> > > for 6.7 via the overlayfs tree.
> > >
> > > Thanks,
> > > Amir.
> > >
> > > [1] https://lore.kernel.org/linux-unionfs/20230912173653.3317828-1-amir73il@gmail.com/
> > >
> > >
> > >  MAINTAINERS                  |   2 +
> > >  fs/Kconfig                   |   4 +
> > >  fs/Makefile                  |   1 +
> > >  fs/backing_file.c            | 160 +++++++++++++++++++++++++++++++++++
> >
> > I'm sorry but I'm missing mountains of context.
> > How is that related to the backing file stuff exactly?
> > The backing file stuff has this unpleasant
> >
> > file->f_inode == real_inode != file->f_path->dentry->d_inode
> >
> > that we all agree is something we really don't like. Is FUSE trying to
> > do the same thing and build an read_iter/write_iter abstraction around
> > it? I really really hope that's not the case.
>
> That is not the case.
> The commonality between FUSE passthrough and overlayfs is that
> a "virtual" file (i.e. ovl/fuse), which has no backing blockdev of its own
> "forwards" the io requests to a backing file on another filesystem.
>
> The name "backing file" is therefore a pretty accurate description
> for both cases. HOWEVER, FUSE does not need to use the
> backing_file struct to hold an alternative path, so FUSE backing files
> do not have FMODE_BACKING, same as cachefiles uses backing
> files, but does not use the FMODE_BACKING/file_backing struct.
>
> Yes, it's a bit of a naming mess.
> I don't have any good ideas on how to do better naming.
> Ideally, we will get rid of struct backing_file, so we won't need
> to care about the confusing names...
>

Alas, my explanation about FUSE passthrough backing files
turned out to be inaccurate.

FUSE IO passthrough to backing file is very similar to overlayfs
IO "passthrough" to lower/upper backing file.

Which creates the same problem w.r.t mmap'ing the FUSE file
to the underlying backing file inode.
That problem in overlayfs caused the inception of the fake path file
now embedded in the backing_file object.
So yes, FUSE is trying to do the same thing.

However, the helpers in this patch are not dealing with the fake path
aspect of those backing files specifically.
They are common code for a "stacked fs" (i.e. s_stack_depth > 0)
which delegates IO to files on the underlying fs.

I will give it some thought on how we can at least narrow the amount
of filesystem code that could be exposed to those fake path files.
The read/write/splice iterators do not really need to operate of fake path
files, so this is something that I can try to avoid.

Thanks,
Amir.
Amir Goldstein Sept. 23, 2023, 9:52 a.m. UTC | #9
On Thu, Sep 21, 2023 at 6:51 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Wed, Sep 13, 2023 at 2:24 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Wed, Sep 13, 2023 at 11:29 AM Christian Brauner <brauner@kernel.org> wrote:
> > >
> > > On Tue, Sep 12, 2023 at 09:54:08PM +0300, Amir Goldstein wrote:
> > > > Overlayfs stores its files data in backing files on other filesystems.
> > > >
> > > > Factor out some common helpers to perform io to backing files, that will
> > > > later be reused by fuse passthrough code.
> > > >
> > > > Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
> > > > Link: https://lore.kernel.org/r/CAJfpeguhmZbjP3JLqtUy0AdWaHOkAPWeP827BBWwRFEAUgnUcQ@mail.gmail.com
> > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > > ---
> > > >
> > > > Miklos,
> > > >
> > > > This is the re-factoring that you suggested in the FUSE passthrough
> > > > patches discussion linked above.
> > > >
> > > > This patch is based on the overlayfs prep patch set I just posted [1].
> > > >
> > > > Although overlayfs currently is the only user of these backing file
> > > > helpers, I am sending this patch to a wider audience in case other
> > > > filesystem developers want to comment on the abstraction.
> > > >
> > > > We could perhaps later considering moving backing_file_open() helper
> > > > and related code to backing_file.c.
> > > >
> > > > In any case, if there are no objections, I plan to queue this work
> > > > for 6.7 via the overlayfs tree.
> > > >
> > > > Thanks,
> > > > Amir.
> > > >
> > > > [1] https://lore.kernel.org/linux-unionfs/20230912173653.3317828-1-amir73il@gmail.com/
> > > >
> > > >
> > > >  MAINTAINERS                  |   2 +
> > > >  fs/Kconfig                   |   4 +
> > > >  fs/Makefile                  |   1 +
> > > >  fs/backing_file.c            | 160 +++++++++++++++++++++++++++++++++++
> > >
> > > I'm sorry but I'm missing mountains of context.
> > > How is that related to the backing file stuff exactly?
> > > The backing file stuff has this unpleasant
> > >
> > > file->f_inode == real_inode != file->f_path->dentry->d_inode
> > >
> > > that we all agree is something we really don't like. Is FUSE trying to
> > > do the same thing and build an read_iter/write_iter abstraction around
> > > it? I really really hope that's not the case.
> >
> > That is not the case.
> > The commonality between FUSE passthrough and overlayfs is that
> > a "virtual" file (i.e. ovl/fuse), which has no backing blockdev of its own
> > "forwards" the io requests to a backing file on another filesystem.
> >
> > The name "backing file" is therefore a pretty accurate description
> > for both cases. HOWEVER, FUSE does not need to use the
> > backing_file struct to hold an alternative path, so FUSE backing files
> > do not have FMODE_BACKING, same as cachefiles uses backing
> > files, but does not use the FMODE_BACKING/file_backing struct.
> >
> > Yes, it's a bit of a naming mess.
> > I don't have any good ideas on how to do better naming.
> > Ideally, we will get rid of struct backing_file, so we won't need
> > to care about the confusing names...
> >
>
> Alas, my explanation about FUSE passthrough backing files
> turned out to be inaccurate.
>
> FUSE IO passthrough to backing file is very similar to overlayfs
> IO "passthrough" to lower/upper backing file.
>
> Which creates the same problem w.r.t mmap'ing the FUSE file
> to the underlying backing file inode.
> That problem in overlayfs caused the inception of the fake path file
> now embedded in the backing_file object.
> So yes, FUSE is trying to do the same thing.
>
> However, the helpers in this patch are not dealing with the fake path
> aspect of those backing files specifically.
> They are common code for a "stacked fs" (i.e. s_stack_depth > 0)
> which delegates IO to files on the underlying fs.
>

Funny story: following my clarification of the terminology above
I was going to rename the common helpers and I wanted to create
include/linux/fs_stack.h, but what do you know, it already exists.

It was created in 2006 to have common helpers for eCryptfs and
*Unionfs* (not overlayfs) and indeed, today it has a few helpers
which only ecryptfs uses.

Not only that, but it appears that the copy_attr_size helper does
things that ovl does not do - need to look into that and copy_attr_all
does not deal with uid translations and atomicity (which I just added
to the respective ovl helper).

So I am going to shake the dust from fs/stack.c and merge those
diverged helpers for starters and then go on to adding the new
common helpers for read/write/splice iter and code for handling the
backing_file (or stacked_file) fake path anomaly.

> I will give it some thought on how we can at least narrow the amount
> of filesystem code that could be exposed to those fake path files.
> The read/write/splice iterators do not really need to operate of fake path
> files, so this is something that I can try to avoid.
>

I am not sure but there may be another problem related to mmap and
the backing_file because ovl_mmap() of writable mapping does not
keep an elevated mount writer count on the upper fs mount.

The easy solution is perhaps for overlayfs mount to keep the elevated
mount writer count on the upper fs mount. Not sure if this is really
something to worry about. I will need to take a closer look.

Thanks,
Amir.
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 90f13281d297..4e1d21773e0e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16092,7 +16092,9 @@  L:	linux-unionfs@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git
 F:	Documentation/filesystems/overlayfs.rst
+F:	fs/backing_file.c
 F:	fs/overlayfs/
+F:	include/linux/backing_file.h
 
 P54 WIRELESS DRIVER
 M:	Christian Lamparter <chunkeey@googlemail.com>
diff --git a/fs/Kconfig b/fs/Kconfig
index aa7e03cc1941..9027a88ffa47 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -26,6 +26,10 @@  config LEGACY_DIRECT_IO
 	depends on BUFFER_HEAD
 	bool
 
+# Common backing file helpers
+config FS_BACKING_FILE
+	bool
+
 if BLOCK
 
 source "fs/ext2/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index f9541f40be4e..95ef06cff388 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -39,6 +39,7 @@  obj-$(CONFIG_COMPAT_BINFMT_ELF)	+= compat_binfmt_elf.o
 obj-$(CONFIG_BINFMT_ELF_FDPIC)	+= binfmt_elf_fdpic.o
 obj-$(CONFIG_BINFMT_FLAT)	+= binfmt_flat.o
 
+obj-$(CONFIG_FS_BACKING_FILE)	+= backing_file.o
 obj-$(CONFIG_FS_MBCACHE)	+= mbcache.o
 obj-$(CONFIG_FS_POSIX_ACL)	+= posix_acl.o
 obj-$(CONFIG_NFS_COMMON)	+= nfs_common/
diff --git a/fs/backing_file.c b/fs/backing_file.c
new file mode 100644
index 000000000000..ea895ca1639d
--- /dev/null
+++ b/fs/backing_file.c
@@ -0,0 +1,160 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Common helpers for backing file io.
+ * Forked from fs/overlayfs/file.c.
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ * Copyright (C) 2023 CTERA Networks.
+ */
+
+#include <linux/backing_file.h>
+
+struct backing_aio_req {
+	struct kiocb iocb;
+	refcount_t ref;
+	struct kiocb *orig_iocb;
+	void (*cleanup)(struct kiocb *, long);
+};
+
+static struct kmem_cache *backing_aio_req_cachep;
+
+#define BACKING_IOCB_MASK \
+	(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
+
+static rwf_t iocb_to_rw_flags(int flags)
+{
+	return (__force rwf_t)(flags & BACKING_IOCB_MASK);
+}
+
+static void backing_aio_put(struct backing_aio_req *aio_req)
+{
+	if (refcount_dec_and_test(&aio_req->ref)) {
+		fput(aio_req->iocb.ki_filp);
+		kmem_cache_free(backing_aio_req_cachep, aio_req);
+	}
+}
+
+/* Completion for submitted/failed async rw io */
+static void backing_aio_cleanup(struct backing_aio_req *aio_req, long res)
+{
+	struct kiocb *iocb = &aio_req->iocb;
+	struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+	if (iocb->ki_flags & IOCB_WRITE)
+		kiocb_end_write(iocb);
+
+	orig_iocb->ki_pos = iocb->ki_pos;
+	if (aio_req->cleanup)
+		aio_req->cleanup(orig_iocb, res);
+
+	backing_aio_put(aio_req);
+}
+
+/* Completion for submitted async rw io */
+static void backing_aio_rw_complete(struct kiocb *iocb, long res)
+{
+	struct backing_aio_req *aio_req = container_of(iocb,
+					       struct backing_aio_req, iocb);
+	struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+	backing_aio_cleanup(aio_req, res);
+	orig_iocb->ki_complete(orig_iocb, res);
+}
+
+
+ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
+			       struct kiocb *iocb, int flags,
+			       void (*cleanup)(struct kiocb *, long))
+{
+	struct backing_aio_req *aio_req = NULL;
+	ssize_t ret;
+
+	if (!iov_iter_count(iter))
+		return 0;
+
+	if (iocb->ki_flags & IOCB_DIRECT &&
+	    !(file->f_mode & FMODE_CAN_ODIRECT))
+		return -EINVAL;
+
+	if (is_sync_kiocb(iocb)) {
+		rwf_t rwf = iocb_to_rw_flags(flags);
+
+		ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
+		if (cleanup)
+			cleanup(iocb, ret);
+	} else {
+		aio_req = kmem_cache_zalloc(backing_aio_req_cachep, GFP_KERNEL);
+		if (!aio_req)
+			return -ENOMEM;
+
+		aio_req->orig_iocb = iocb;
+		aio_req->cleanup = cleanup;
+		kiocb_clone(&aio_req->iocb, iocb, get_file(file));
+		aio_req->iocb.ki_complete = backing_aio_rw_complete;
+		refcount_set(&aio_req->ref, 2);
+		ret = vfs_iocb_iter_read(file, &aio_req->iocb, iter);
+		backing_aio_put(aio_req);
+		if (ret != -EIOCBQUEUED)
+			backing_aio_cleanup(aio_req, ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(backing_file_read_iter);
+
+ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
+				struct kiocb *iocb, int flags,
+				void (*cleanup)(struct kiocb *, long))
+{
+	ssize_t ret;
+
+	if (!iov_iter_count(iter))
+		return 0;
+
+	if (iocb->ki_flags & IOCB_DIRECT &&
+	    !(file->f_mode & FMODE_CAN_ODIRECT))
+		return -EINVAL;
+
+	if (is_sync_kiocb(iocb)) {
+		rwf_t rwf = iocb_to_rw_flags(flags);
+
+		file_start_write(file);
+		ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
+		file_end_write(file);
+		if (cleanup)
+			cleanup(iocb, ret);
+	} else {
+		struct backing_aio_req *aio_req;
+
+		aio_req = kmem_cache_zalloc(backing_aio_req_cachep, GFP_KERNEL);
+		if (!aio_req)
+			return -ENOMEM;
+
+		aio_req->orig_iocb = iocb;
+		aio_req->cleanup = cleanup;
+		kiocb_clone(&aio_req->iocb, iocb, get_file(file));
+		aio_req->iocb.ki_flags = flags;
+		aio_req->iocb.ki_complete = backing_aio_rw_complete;
+		refcount_set(&aio_req->ref, 2);
+		kiocb_start_write(&aio_req->iocb);
+		ret = vfs_iocb_iter_write(file, &aio_req->iocb, iter);
+		backing_aio_put(aio_req);
+		if (ret != -EIOCBQUEUED)
+			backing_aio_cleanup(aio_req, ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(backing_file_write_iter);
+
+static int __init backing_aio_init(void)
+{
+	backing_aio_req_cachep = kmem_cache_create("backing_aio_req",
+					   sizeof(struct backing_aio_req),
+					   0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!backing_aio_req_cachep)
+		return -ENOMEM;
+
+	return 0;
+}
+fs_initcall(backing_aio_init);
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index fec5020c3495..7f52d9031cff 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -1,6 +1,7 @@ 
 # SPDX-License-Identifier: GPL-2.0-only
 config OVERLAY_FS
 	tristate "Overlay filesystem support"
+	select FS_BACKING_FILE
 	select EXPORTFS
 	help
 	  An overlay filesystem combines two filesystems - an 'upper' filesystem
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 05ec614f7054..81fe6a85cad9 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -13,16 +13,9 @@ 
 #include <linux/security.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
+#include <linux/backing_file.h>
 #include "overlayfs.h"
 
-struct ovl_aio_req {
-	struct kiocb iocb;
-	refcount_t ref;
-	struct kiocb *orig_iocb;
-};
-
-static struct kmem_cache *ovl_aio_request_cachep;
-
 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
 {
 	if (realinode != ovl_inode_upper(inode))
@@ -262,24 +255,8 @@  static void ovl_file_accessed(struct file *file)
 	touch_atime(&file->f_path);
 }
 
-#define OVL_IOCB_MASK \
-	(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
-
-static rwf_t iocb_to_rw_flags(int flags)
-{
-	return (__force rwf_t)(flags & OVL_IOCB_MASK);
-}
-
-static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
-{
-	if (refcount_dec_and_test(&aio_req->ref)) {
-		fput(aio_req->iocb.ki_filp);
-		kmem_cache_free(ovl_aio_request_cachep, aio_req);
-	}
-}
-
 /* Completion for submitted/failed sync/async rw io */
-static void ovl_rw_complete(struct kiocb *orig_iocb)
+static void ovl_rw_complete(struct kiocb *orig_iocb, long res)
 {
 	struct file *file = orig_iocb->ki_filp;
 
@@ -292,32 +269,6 @@  static void ovl_rw_complete(struct kiocb *orig_iocb)
 	}
 }
 
-/* Completion for submitted/failed async rw io */
-static void ovl_aio_cleanup(struct ovl_aio_req *aio_req)
-{
-	struct kiocb *iocb = &aio_req->iocb;
-	struct kiocb *orig_iocb = aio_req->orig_iocb;
-
-	if (iocb->ki_flags & IOCB_WRITE)
-		kiocb_end_write(iocb);
-
-	orig_iocb->ki_pos = iocb->ki_pos;
-	ovl_rw_complete(orig_iocb);
-
-	ovl_aio_put(aio_req);
-}
-
-/* Completion for submitted async rw io */
-static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
-{
-	struct ovl_aio_req *aio_req = container_of(iocb,
-						   struct ovl_aio_req, iocb);
-	struct kiocb *orig_iocb = aio_req->orig_iocb;
-
-	ovl_aio_cleanup(aio_req);
-	orig_iocb->ki_complete(orig_iocb, res);
-}
-
 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct file *file = iocb->ki_filp;
@@ -332,38 +283,10 @@  static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 	if (ret)
 		return ret;
 
-	ret = -EINVAL;
-	if (iocb->ki_flags & IOCB_DIRECT &&
-	    !(real.file->f_mode & FMODE_CAN_ODIRECT))
-		goto out_fdput;
-
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
-	if (is_sync_kiocb(iocb)) {
-		rwf_t rwf = iocb_to_rw_flags(iocb->ki_flags);
-
-		ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, rwf);
-		ovl_rw_complete(iocb);
-	} else {
-		struct ovl_aio_req *aio_req;
-
-		ret = -ENOMEM;
-		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
-		if (!aio_req)
-			goto out;
-
-		real.flags = 0;
-		aio_req->orig_iocb = iocb;
-		kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
-		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
-		refcount_set(&aio_req->ref, 2);
-		ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
-		ovl_aio_put(aio_req);
-		if (ret != -EIOCBQUEUED)
-			ovl_aio_cleanup(aio_req);
-	}
-out:
+	ret = backing_file_read_iter(real.file, iter, iocb, iocb->ki_flags,
+				     ovl_rw_complete);
 	revert_creds(old_cred);
-out_fdput:
 	fdput(real);
 
 	return ret;
@@ -392,45 +315,13 @@  static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	if (ret)
 		goto out_unlock;
 
-	ret = -EINVAL;
-	if (iocb->ki_flags & IOCB_DIRECT &&
-	    !(real.file->f_mode & FMODE_CAN_ODIRECT))
-		goto out_fdput;
-
 	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
 		flags &= ~(IOCB_DSYNC | IOCB_SYNC);
 
 	old_cred = ovl_override_creds(inode->i_sb);
-	if (is_sync_kiocb(iocb)) {
-		rwf_t rwf = iocb_to_rw_flags(flags);
-
-		file_start_write(real.file);
-		ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf);
-		file_end_write(real.file);
-		ovl_rw_complete(iocb);
-	} else {
-		struct ovl_aio_req *aio_req;
-
-		ret = -ENOMEM;
-		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
-		if (!aio_req)
-			goto out;
-
-		real.flags = 0;
-		aio_req->orig_iocb = iocb;
-		kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
-		aio_req->iocb.ki_flags = flags;
-		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
-		refcount_set(&aio_req->ref, 2);
-		kiocb_start_write(&aio_req->iocb);
-		ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
-		ovl_aio_put(aio_req);
-		if (ret != -EIOCBQUEUED)
-			ovl_aio_cleanup(aio_req);
-	}
-out:
+	ret = backing_file_write_iter(real.file, iter, iocb, flags,
+				      ovl_rw_complete);
 	revert_creds(old_cred);
-out_fdput:
 	fdput(real);
 
 out_unlock:
@@ -742,19 +633,3 @@  const struct file_operations ovl_file_operations = {
 	.copy_file_range	= ovl_copy_file_range,
 	.remap_file_range	= ovl_remap_file_range,
 };
-
-int __init ovl_aio_request_cache_init(void)
-{
-	ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
-						   sizeof(struct ovl_aio_req),
-						   0, SLAB_HWCACHE_ALIGN, NULL);
-	if (!ovl_aio_request_cachep)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void ovl_aio_request_cache_destroy(void)
-{
-	kmem_cache_destroy(ovl_aio_request_cachep);
-}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 9817b2dcb132..64b98e67e826 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -799,8 +799,6 @@  struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
 
 /* file.c */
 extern const struct file_operations ovl_file_operations;
-int __init ovl_aio_request_cache_init(void);
-void ovl_aio_request_cache_destroy(void);
 int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa);
 int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa);
 int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index def266b5e2a3..8c132467fca1 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1530,14 +1530,10 @@  static int __init ovl_init(void)
 	if (ovl_inode_cachep == NULL)
 		return -ENOMEM;
 
-	err = ovl_aio_request_cache_init();
-	if (!err) {
-		err = register_filesystem(&ovl_fs_type);
-		if (!err)
-			return 0;
+	err = register_filesystem(&ovl_fs_type);
+	if (!err)
+		return 0;
 
-		ovl_aio_request_cache_destroy();
-	}
 	kmem_cache_destroy(ovl_inode_cachep);
 
 	return err;
@@ -1553,7 +1549,6 @@  static void __exit ovl_exit(void)
 	 */
 	rcu_barrier();
 	kmem_cache_destroy(ovl_inode_cachep);
-	ovl_aio_request_cache_destroy();
 }
 
 module_init(ovl_init);
diff --git a/include/linux/backing_file.h b/include/linux/backing_file.h
new file mode 100644
index 000000000000..1428fe7b26bb
--- /dev/null
+++ b/include/linux/backing_file.h
@@ -0,0 +1,22 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Common helpers for backing file io.
+ *
+ * Copyright (C) 2023 CTERA Networks.
+ */
+
+#ifndef _LINUX_BACKING_FILE_H
+#define _LINUX_BACKING_FILE_H
+
+#include <linux/file.h>
+#include <linux/uio.h>
+#include <linux/fs.h>
+
+ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
+			       struct kiocb *iocb, int flags,
+			       void (*cleanup)(struct kiocb *, long));
+ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
+				struct kiocb *iocb, int flags,
+				void (*cleanup)(struct kiocb *, long));
+
+#endif	/* _LINUX_BACKING_FILE_H */