diff mbox series

[15/26] fuse-bpf: Add support for read/write iter

Message ID 20220926231822.994383-16-drosen@google.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series FUSE BPF: A Stacked Filesystem Extension for FUSE | expand

Checks

Context Check Description
bpf/vmtest-bpf-PR fail PR summary
bpf/vmtest-bpf-VM_Test-1 pending Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain }}
bpf/vmtest-bpf-VM_Test-2 fail Logs for build for s390x with gcc
bpf/vmtest-bpf-VM_Test-3 fail Logs for build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-4 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-5 success Logs for llvm-toolchain
bpf/vmtest-bpf-VM_Test-6 success Logs for set-matrix

Commit Message

Daniel Rosenberg Sept. 26, 2022, 11:18 p.m. UTC
Signed-off-by: Daniel Rosenberg <drosen@google.com>
Signed-off-by: Paul Lawrence <paullawrence@google.com>
---
 fs/fuse/backing.c | 291 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/control.c |   2 +-
 fs/fuse/file.c    |  28 +++++
 fs/fuse/fuse_i.h  |  42 ++++++-
 fs/fuse/inode.c   |  13 +++
 5 files changed, 374 insertions(+), 2 deletions(-)

Comments

Amir Goldstein Oct. 1, 2022, 6:53 a.m. UTC | #1
On Tue, Sep 27, 2022 at 2:46 AM Daniel Rosenberg <drosen@google.com> wrote:
>
> Signed-off-by: Daniel Rosenberg <drosen@google.com>
> Signed-off-by: Paul Lawrence <paullawrence@google.com>
> ---
>  fs/fuse/backing.c | 291 ++++++++++++++++++++++++++++++++++++++++++++++
>  fs/fuse/control.c |   2 +-
>  fs/fuse/file.c    |  28 +++++
>  fs/fuse/fuse_i.h  |  42 ++++++-
>  fs/fuse/inode.c   |  13 +++
>  5 files changed, 374 insertions(+), 2 deletions(-)
>
> diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c
> index 1fe61177cdfb..cf4ad9f4fe10 100644
> --- a/fs/fuse/backing.c
> +++ b/fs/fuse/backing.c
> @@ -12,6 +12,47 @@
>  #include <linux/namei.h>
>  #include <linux/bpf_fuse.h>
>
> +#define FUSE_BPF_IOCB_MASK (IOCB_APPEND | IOCB_DSYNC | IOCB_HIPRI | IOCB_NOWAIT | IOCB_SYNC)
> +
> +struct fuse_bpf_aio_req {
> +       struct kiocb iocb;
> +       refcount_t ref;
> +       struct kiocb *iocb_orig;
> +};
> +
> +static struct kmem_cache *fuse_bpf_aio_request_cachep;
> +
> +static void fuse_file_accessed(struct file *dst_file, struct file *src_file)
> +{
> +       struct inode *dst_inode;
> +       struct inode *src_inode;
> +
> +       if (dst_file->f_flags & O_NOATIME)
> +               return;
> +
> +       dst_inode = file_inode(dst_file);
> +       src_inode = file_inode(src_file);
> +
> +       if ((!timespec64_equal(&dst_inode->i_mtime, &src_inode->i_mtime) ||
> +            !timespec64_equal(&dst_inode->i_ctime, &src_inode->i_ctime))) {
> +               dst_inode->i_mtime = src_inode->i_mtime;
> +               dst_inode->i_ctime = src_inode->i_ctime;
> +       }
> +
> +       touch_atime(&dst_file->f_path);
> +}
> +
> +static void fuse_copyattr(struct file *dst_file, struct file *src_file)
> +{
> +       struct inode *dst = file_inode(dst_file);
> +       struct inode *src = file_inode(src_file);
> +
> +       dst->i_atime = src->i_atime;
> +       dst->i_mtime = src->i_mtime;
> +       dst->i_ctime = src->i_ctime;
> +       i_size_write(dst, i_size_read(src));
> +}
> +
>  struct bpf_prog *fuse_get_bpf_prog(struct file *file)
>  {
>         struct bpf_prog *bpf_prog = ERR_PTR(-EINVAL);
> @@ -469,6 +510,241 @@ int fuse_lseek_finalize(struct bpf_fuse_args *fa, loff_t *out,
>         return 0;
>  }
>
> +static inline void fuse_bpf_aio_put(struct fuse_bpf_aio_req *aio_req)
> +{
> +       if (refcount_dec_and_test(&aio_req->ref))
> +               kmem_cache_free(fuse_bpf_aio_request_cachep, aio_req);
> +}
> +
> +static void fuse_bpf_aio_cleanup_handler(struct fuse_bpf_aio_req *aio_req)
> +{
> +       struct kiocb *iocb = &aio_req->iocb;
> +       struct kiocb *iocb_orig = aio_req->iocb_orig;
> +
> +       if (iocb->ki_flags & IOCB_WRITE) {
> +               __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
> +                                     SB_FREEZE_WRITE);
> +               file_end_write(iocb->ki_filp);
> +               fuse_copyattr(iocb_orig->ki_filp, iocb->ki_filp);
> +       }
> +       iocb_orig->ki_pos = iocb->ki_pos;
> +       fuse_bpf_aio_put(aio_req);
> +}
> +
> +static void fuse_bpf_aio_rw_complete(struct kiocb *iocb, long res)
> +{
> +       struct fuse_bpf_aio_req *aio_req =
> +               container_of(iocb, struct fuse_bpf_aio_req, iocb);
> +       struct kiocb *iocb_orig = aio_req->iocb_orig;
> +
> +       fuse_bpf_aio_cleanup_handler(aio_req);
> +       iocb_orig->ki_complete(iocb_orig, res);
> +}
> +
> +int fuse_file_read_iter_initialize_in(struct bpf_fuse_args *fa, struct fuse_file_read_iter_io *fri,
> +                                     struct kiocb *iocb, struct iov_iter *to)
> +{
> +       struct file *file = iocb->ki_filp;
> +       struct fuse_file *ff = file->private_data;
> +
> +       fri->fri = (struct fuse_read_in) {
> +               .fh = ff->fh,
> +               .offset = iocb->ki_pos,
> +               .size = to->count,
> +       };
> +
> +       /* TODO we can't assume 'to' is a kvec */
> +       /* TODO we also can't assume the vector has only one component */
> +       *fa = (struct bpf_fuse_args) {
> +               .opcode = FUSE_READ,
> +               .nodeid = ff->nodeid,
> +               .in_numargs = 1,
> +               .in_args[0].size = sizeof(fri->fri),
> +               .in_args[0].value = &fri->fri,
> +               /*
> +                * TODO Design this properly.
> +                * Possible approach: do not pass buf to bpf
> +                * If going to userland, do a deep copy
> +                * For extra credit, do that to/from the vector, rather than
> +                * making an extra copy in the kernel
> +                */
> +       };
> +
> +       return 0;
> +}
> +
> +int fuse_file_read_iter_initialize_out(struct bpf_fuse_args *fa, struct fuse_file_read_iter_io *fri,
> +                                      struct kiocb *iocb, struct iov_iter *to)
> +{
> +       fri->frio = (struct fuse_read_iter_out) {
> +               .ret = fri->fri.size,
> +       };
> +
> +       fa->out_numargs = 1;
> +       fa->out_args[0].size = sizeof(fri->frio);
> +       fa->out_args[0].value = &fri->frio;
> +
> +       return 0;
> +}
> +
> +int fuse_file_read_iter_backing(struct bpf_fuse_args *fa, ssize_t *out,
> +                               struct kiocb *iocb, struct iov_iter *to)
> +{
> +       struct fuse_read_iter_out *frio = fa->out_args[0].value;
> +       struct file *file = iocb->ki_filp;
> +       struct fuse_file *ff = file->private_data;
> +
> +       if (!iov_iter_count(to))
> +               return 0;
> +
> +       if ((iocb->ki_flags & IOCB_DIRECT) &&
> +           (!ff->backing_file->f_mapping->a_ops ||
> +            !ff->backing_file->f_mapping->a_ops->direct_IO))
> +               return -EINVAL;
> +
> +       /* TODO This just plain ignores any change to fuse_read_in */
> +       if (is_sync_kiocb(iocb)) {
> +               *out = vfs_iter_read(ff->backing_file, to, &iocb->ki_pos,
> +                               iocb_to_rw_flags(iocb->ki_flags, FUSE_BPF_IOCB_MASK));
> +       } else {
> +               struct fuse_bpf_aio_req *aio_req;
> +
> +               *out = -ENOMEM;
> +               aio_req = kmem_cache_zalloc(fuse_bpf_aio_request_cachep, GFP_KERNEL);
> +               if (!aio_req)
> +                       goto out;
> +
> +               aio_req->iocb_orig = iocb;
> +               kiocb_clone(&aio_req->iocb, iocb, ff->backing_file);
> +               aio_req->iocb.ki_complete = fuse_bpf_aio_rw_complete;
> +               refcount_set(&aio_req->ref, 2);
> +               *out = vfs_iocb_iter_read(ff->backing_file, &aio_req->iocb, to);
> +               fuse_bpf_aio_put(aio_req);
> +               if (*out != -EIOCBQUEUED)
> +                       fuse_bpf_aio_cleanup_handler(aio_req);
> +       }
> +
> +       frio->ret = *out;
> +
> +       /* TODO Need to point value at the buffer for post-modification */
> +
> +out:
> +       fuse_file_accessed(file, ff->backing_file);
> +
> +       return *out;
> +}
> +
> +int fuse_file_read_iter_finalize(struct bpf_fuse_args *fa, ssize_t *out,
> +                                struct kiocb *iocb, struct iov_iter *to)
> +{
> +       struct fuse_read_iter_out *frio = fa->out_args[0].value;
> +
> +       *out = frio->ret;
> +
> +       return 0;
> +}
> +
> +int fuse_file_write_iter_initialize_in(struct bpf_fuse_args *fa,
> +                                      struct fuse_file_write_iter_io *fwio,
> +                                      struct kiocb *iocb, struct iov_iter *from)
> +{
> +       struct file *file = iocb->ki_filp;
> +       struct fuse_file *ff = file->private_data;
> +
> +       *fwio = (struct fuse_file_write_iter_io) {
> +               .fwi.fh = ff->fh,
> +               .fwi.offset = iocb->ki_pos,
> +               .fwi.size = from->count,
> +       };
> +
> +       /* TODO we can't assume 'from' is a kvec */
> +       *fa = (struct bpf_fuse_args) {
> +               .opcode = FUSE_WRITE,
> +               .nodeid = ff->nodeid,
> +               .in_numargs = 2,
> +               .in_args[0].size = sizeof(fwio->fwi),
> +               .in_args[0].value = &fwio->fwi,
> +               .in_args[1].size = fwio->fwi.size,
> +               .in_args[1].value = from->kvec->iov_base,
> +       };
> +
> +       return 0;
> +}
> +
> +int fuse_file_write_iter_initialize_out(struct bpf_fuse_args *fa,
> +                                       struct fuse_file_write_iter_io *fwio,
> +                                       struct kiocb *iocb, struct iov_iter *from)
> +{
> +       /* TODO we can't assume 'from' is a kvec */
> +       fa->out_numargs = 1;
> +       fa->out_args[0].size = sizeof(fwio->fwio);
> +       fa->out_args[0].value = &fwio->fwio;
> +
> +       return 0;
> +}
> +
> +int fuse_file_write_iter_backing(struct bpf_fuse_args *fa, ssize_t *out,
> +                                struct kiocb *iocb, struct iov_iter *from)
> +{
> +       struct file *file = iocb->ki_filp;
> +       struct fuse_file *ff = file->private_data;
> +       struct fuse_write_iter_out *fwio = fa->out_args[0].value;
> +
> +       if (!iov_iter_count(from))
> +               return 0;
> +
> +       /* TODO This just plain ignores any change to fuse_write_in */
> +       /* TODO uint32_t seems smaller than ssize_t.... right? */
> +       inode_lock(file_inode(file));
> +
> +       fuse_copyattr(file, ff->backing_file);
> +
> +       if (is_sync_kiocb(iocb)) {
> +               file_start_write(ff->backing_file);
> +               *out = vfs_iter_write(ff->backing_file, from, &iocb->ki_pos,
> +                                          iocb_to_rw_flags(iocb->ki_flags, FUSE_BPF_IOCB_MASK));
> +               file_end_write(ff->backing_file);
> +
> +               /* Must reflect change in size of backing file to upper file */
> +               if (*out > 0)
> +                       fuse_copyattr(file, ff->backing_file);

Regarding attribute cache, things can get a tad more complicated
when the inode is not purely passthrough.

To put things in context, the reason that ovl_copyattr() is correct
in ovl_write_iter() is because the overlayfs inode is purely passthrough
to the backing inode, that is, all operations are passthrough.
The only incident when backing inode changes (copy up) takes
care of copying inode attributes.

This is not the case with FUSE passthrough.
Not in Alessio's FUSE_PASSTHROUGH patches and not in
this proposal.

With FUSE passthrough, every inode is hybrid, some operations
can be served from the backing inode and some operations served
from the server.

My FUSE passthrough branch [1] has two fixes on top of Allesio's patches
to fix two issues regarding attribute caches (size and times).

[1] https://github.com/amir73il/linux/commits/linux-5.10.y-fuse-passthrough

This problem is not unique to FUSE.
It also exists for attribute caches of NFS clients and the NFS protocol
has several techniques to deal with this problem, but it is certainly not
a trivial issue.

As a matter of fact, I found the problems fixed in my branch above
by running the nfstest_posix [2] tests on the FUSE passthrough code.

[2] https://wiki.linux-nfs.org/wiki/index.php/NFStest#nfstest_posix_-_POSIX_file_system_level_access_tests

The easiest way out is to declare one of the copies (backing or remote)
the authoritative copy w.r.t. attributes (like noac nfs mount option).
Declaring the remote attribute copy authoritative (as FUSE usually does)
has performance implications.

I guess if FUSE-bpf attaches a backing inode to FUSE inode on lookup
then the option of making the backing inode attributes authoritative
(like in overlayfs) is valid, but I think this needs to be spelled out.

Thanks,
Amir.
diff mbox series

Patch

diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c
index 1fe61177cdfb..cf4ad9f4fe10 100644
--- a/fs/fuse/backing.c
+++ b/fs/fuse/backing.c
@@ -12,6 +12,47 @@ 
 #include <linux/namei.h>
 #include <linux/bpf_fuse.h>
 
+#define FUSE_BPF_IOCB_MASK (IOCB_APPEND | IOCB_DSYNC | IOCB_HIPRI | IOCB_NOWAIT | IOCB_SYNC)
+
+struct fuse_bpf_aio_req {
+	struct kiocb iocb;
+	refcount_t ref;
+	struct kiocb *iocb_orig;
+};
+
+static struct kmem_cache *fuse_bpf_aio_request_cachep;
+
+static void fuse_file_accessed(struct file *dst_file, struct file *src_file)
+{
+	struct inode *dst_inode;
+	struct inode *src_inode;
+
+	if (dst_file->f_flags & O_NOATIME)
+		return;
+
+	dst_inode = file_inode(dst_file);
+	src_inode = file_inode(src_file);
+
+	if ((!timespec64_equal(&dst_inode->i_mtime, &src_inode->i_mtime) ||
+	     !timespec64_equal(&dst_inode->i_ctime, &src_inode->i_ctime))) {
+		dst_inode->i_mtime = src_inode->i_mtime;
+		dst_inode->i_ctime = src_inode->i_ctime;
+	}
+
+	touch_atime(&dst_file->f_path);
+}
+
+static void fuse_copyattr(struct file *dst_file, struct file *src_file)
+{
+	struct inode *dst = file_inode(dst_file);
+	struct inode *src = file_inode(src_file);
+
+	dst->i_atime = src->i_atime;
+	dst->i_mtime = src->i_mtime;
+	dst->i_ctime = src->i_ctime;
+	i_size_write(dst, i_size_read(src));
+}
+
 struct bpf_prog *fuse_get_bpf_prog(struct file *file)
 {
 	struct bpf_prog *bpf_prog = ERR_PTR(-EINVAL);
@@ -469,6 +510,241 @@  int fuse_lseek_finalize(struct bpf_fuse_args *fa, loff_t *out,
 	return 0;
 }
 
+static inline void fuse_bpf_aio_put(struct fuse_bpf_aio_req *aio_req)
+{
+	if (refcount_dec_and_test(&aio_req->ref))
+		kmem_cache_free(fuse_bpf_aio_request_cachep, aio_req);
+}
+
+static void fuse_bpf_aio_cleanup_handler(struct fuse_bpf_aio_req *aio_req)
+{
+	struct kiocb *iocb = &aio_req->iocb;
+	struct kiocb *iocb_orig = aio_req->iocb_orig;
+
+	if (iocb->ki_flags & IOCB_WRITE) {
+		__sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
+				      SB_FREEZE_WRITE);
+		file_end_write(iocb->ki_filp);
+		fuse_copyattr(iocb_orig->ki_filp, iocb->ki_filp);
+	}
+	iocb_orig->ki_pos = iocb->ki_pos;
+	fuse_bpf_aio_put(aio_req);
+}
+
+static void fuse_bpf_aio_rw_complete(struct kiocb *iocb, long res)
+{
+	struct fuse_bpf_aio_req *aio_req =
+		container_of(iocb, struct fuse_bpf_aio_req, iocb);
+	struct kiocb *iocb_orig = aio_req->iocb_orig;
+
+	fuse_bpf_aio_cleanup_handler(aio_req);
+	iocb_orig->ki_complete(iocb_orig, res);
+}
+
+int fuse_file_read_iter_initialize_in(struct bpf_fuse_args *fa, struct fuse_file_read_iter_io *fri,
+				      struct kiocb *iocb, struct iov_iter *to)
+{
+	struct file *file = iocb->ki_filp;
+	struct fuse_file *ff = file->private_data;
+
+	fri->fri = (struct fuse_read_in) {
+		.fh = ff->fh,
+		.offset = iocb->ki_pos,
+		.size = to->count,
+	};
+
+	/* TODO we can't assume 'to' is a kvec */
+	/* TODO we also can't assume the vector has only one component */
+	*fa = (struct bpf_fuse_args) {
+		.opcode = FUSE_READ,
+		.nodeid = ff->nodeid,
+		.in_numargs = 1,
+		.in_args[0].size = sizeof(fri->fri),
+		.in_args[0].value = &fri->fri,
+		/*
+		 * TODO Design this properly.
+		 * Possible approach: do not pass buf to bpf
+		 * If going to userland, do a deep copy
+		 * For extra credit, do that to/from the vector, rather than
+		 * making an extra copy in the kernel
+		 */
+	};
+
+	return 0;
+}
+
+int fuse_file_read_iter_initialize_out(struct bpf_fuse_args *fa, struct fuse_file_read_iter_io *fri,
+				       struct kiocb *iocb, struct iov_iter *to)
+{
+	fri->frio = (struct fuse_read_iter_out) {
+		.ret = fri->fri.size,
+	};
+
+	fa->out_numargs = 1;
+	fa->out_args[0].size = sizeof(fri->frio);
+	fa->out_args[0].value = &fri->frio;
+
+	return 0;
+}
+
+int fuse_file_read_iter_backing(struct bpf_fuse_args *fa, ssize_t *out,
+				struct kiocb *iocb, struct iov_iter *to)
+{
+	struct fuse_read_iter_out *frio = fa->out_args[0].value;
+	struct file *file = iocb->ki_filp;
+	struct fuse_file *ff = file->private_data;
+
+	if (!iov_iter_count(to))
+		return 0;
+
+	if ((iocb->ki_flags & IOCB_DIRECT) &&
+	    (!ff->backing_file->f_mapping->a_ops ||
+	     !ff->backing_file->f_mapping->a_ops->direct_IO))
+		return -EINVAL;
+
+	/* TODO This just plain ignores any change to fuse_read_in */
+	if (is_sync_kiocb(iocb)) {
+		*out = vfs_iter_read(ff->backing_file, to, &iocb->ki_pos,
+				iocb_to_rw_flags(iocb->ki_flags, FUSE_BPF_IOCB_MASK));
+	} else {
+		struct fuse_bpf_aio_req *aio_req;
+
+		*out = -ENOMEM;
+		aio_req = kmem_cache_zalloc(fuse_bpf_aio_request_cachep, GFP_KERNEL);
+		if (!aio_req)
+			goto out;
+
+		aio_req->iocb_orig = iocb;
+		kiocb_clone(&aio_req->iocb, iocb, ff->backing_file);
+		aio_req->iocb.ki_complete = fuse_bpf_aio_rw_complete;
+		refcount_set(&aio_req->ref, 2);
+		*out = vfs_iocb_iter_read(ff->backing_file, &aio_req->iocb, to);
+		fuse_bpf_aio_put(aio_req);
+		if (*out != -EIOCBQUEUED)
+			fuse_bpf_aio_cleanup_handler(aio_req);
+	}
+
+	frio->ret = *out;
+
+	/* TODO Need to point value at the buffer for post-modification */
+
+out:
+	fuse_file_accessed(file, ff->backing_file);
+
+	return *out;
+}
+
+int fuse_file_read_iter_finalize(struct bpf_fuse_args *fa, ssize_t *out,
+				 struct kiocb *iocb, struct iov_iter *to)
+{
+	struct fuse_read_iter_out *frio = fa->out_args[0].value;
+
+	*out = frio->ret;
+
+	return 0;
+}
+
+int fuse_file_write_iter_initialize_in(struct bpf_fuse_args *fa,
+				       struct fuse_file_write_iter_io *fwio,
+				       struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct fuse_file *ff = file->private_data;
+
+	*fwio = (struct fuse_file_write_iter_io) {
+		.fwi.fh = ff->fh,
+		.fwi.offset = iocb->ki_pos,
+		.fwi.size = from->count,
+	};
+
+	/* TODO we can't assume 'from' is a kvec */
+	*fa = (struct bpf_fuse_args) {
+		.opcode = FUSE_WRITE,
+		.nodeid = ff->nodeid,
+		.in_numargs = 2,
+		.in_args[0].size = sizeof(fwio->fwi),
+		.in_args[0].value = &fwio->fwi,
+		.in_args[1].size = fwio->fwi.size,
+		.in_args[1].value = from->kvec->iov_base,
+	};
+
+	return 0;
+}
+
+int fuse_file_write_iter_initialize_out(struct bpf_fuse_args *fa,
+					struct fuse_file_write_iter_io *fwio,
+					struct kiocb *iocb, struct iov_iter *from)
+{
+	/* TODO we can't assume 'from' is a kvec */
+	fa->out_numargs = 1;
+	fa->out_args[0].size = sizeof(fwio->fwio);
+	fa->out_args[0].value = &fwio->fwio;
+
+	return 0;
+}
+
+int fuse_file_write_iter_backing(struct bpf_fuse_args *fa, ssize_t *out,
+				 struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct fuse_file *ff = file->private_data;
+	struct fuse_write_iter_out *fwio = fa->out_args[0].value;
+
+	if (!iov_iter_count(from))
+		return 0;
+
+	/* TODO This just plain ignores any change to fuse_write_in */
+	/* TODO uint32_t seems smaller than ssize_t.... right? */
+	inode_lock(file_inode(file));
+
+	fuse_copyattr(file, ff->backing_file);
+
+	if (is_sync_kiocb(iocb)) {
+		file_start_write(ff->backing_file);
+		*out = vfs_iter_write(ff->backing_file, from, &iocb->ki_pos,
+					   iocb_to_rw_flags(iocb->ki_flags, FUSE_BPF_IOCB_MASK));
+		file_end_write(ff->backing_file);
+
+		/* Must reflect change in size of backing file to upper file */
+		if (*out > 0)
+			fuse_copyattr(file, ff->backing_file);
+	} else {
+		struct fuse_bpf_aio_req *aio_req;
+
+		*out = -ENOMEM;
+		aio_req = kmem_cache_zalloc(fuse_bpf_aio_request_cachep, GFP_KERNEL);
+		if (!aio_req)
+			goto out;
+
+		file_start_write(ff->backing_file);
+		__sb_writers_release(file_inode(ff->backing_file)->i_sb, SB_FREEZE_WRITE);
+		aio_req->iocb_orig = iocb;
+		kiocb_clone(&aio_req->iocb, iocb, ff->backing_file);
+		aio_req->iocb.ki_complete = fuse_bpf_aio_rw_complete;
+		refcount_set(&aio_req->ref, 2);
+		*out = vfs_iocb_iter_write(ff->backing_file, &aio_req->iocb, from);
+		fuse_bpf_aio_put(aio_req);
+		if (*out != -EIOCBQUEUED)
+			fuse_bpf_aio_cleanup_handler(aio_req);
+	}
+
+out:
+	inode_unlock(file_inode(file));
+	fwio->ret = *out;
+	if (*out < 0)
+		return *out;
+	return 0;
+}
+
+int fuse_file_write_iter_finalize(struct bpf_fuse_args *fa, ssize_t *out,
+				  struct kiocb *iocb, struct iov_iter *from)
+{
+	struct fuse_write_iter_out *fwio = fa->out_args[0].value;
+
+	*out = fwio->ret;
+	return 0;
+}
+
 ssize_t fuse_backing_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	int ret;
@@ -1074,3 +1350,18 @@  int fuse_access_finalize(struct bpf_fuse_args *fa, int *out, struct inode *inode
 	return 0;
 }
 
+int __init fuse_bpf_init(void)
+{
+	fuse_bpf_aio_request_cachep = kmem_cache_create("fuse_bpf_aio_req",
+						   sizeof(struct fuse_bpf_aio_req),
+						   0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!fuse_bpf_aio_request_cachep)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void __exit fuse_bpf_cleanup(void)
+{
+	kmem_cache_destroy(fuse_bpf_aio_request_cachep);
+}
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 247ef4f76761..685552453751 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -378,7 +378,7 @@  int __init fuse_ctl_init(void)
 	return register_filesystem(&fuse_ctl_fs_type);
 }
 
-void __exit fuse_ctl_cleanup(void)
+void fuse_ctl_cleanup(void)
 {
 	unregister_filesystem(&fuse_ctl_fs_type);
 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7feb73274c3e..443f1af8a431 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1625,6 +1625,20 @@  static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	if (FUSE_IS_DAX(inode))
 		return fuse_dax_read_iter(iocb, to);
 
+#ifdef CONFIG_FUSE_BPF
+	{
+		ssize_t ret;
+
+		if (fuse_bpf_backing(inode, struct fuse_file_read_iter_io, ret,
+				       fuse_file_read_iter_initialize_in,
+				       fuse_file_read_iter_initialize_out,
+				       fuse_file_read_iter_backing,
+				       fuse_file_read_iter_finalize,
+				       iocb, to))
+			return ret;
+	}
+#endif
+
 	if (!(ff->open_flags & FOPEN_DIRECT_IO))
 		return fuse_cache_read_iter(iocb, to);
 	else
@@ -1643,6 +1657,20 @@  static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (FUSE_IS_DAX(inode))
 		return fuse_dax_write_iter(iocb, from);
 
+#ifdef CONFIG_FUSE_BPF
+	{
+		ssize_t ret = 0;
+
+		if (fuse_bpf_backing(inode, struct fuse_file_write_iter_io, ret,
+				       fuse_file_write_iter_initialize_in,
+				       fuse_file_write_iter_initialize_out,
+				       fuse_file_write_iter_backing,
+				       fuse_file_write_iter_finalize,
+				       iocb, from))
+			return ret;
+	}
+#endif
+
 	if (!(ff->open_flags & FOPEN_DIRECT_IO))
 		return fuse_cache_write_iter(iocb, from);
 	else
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 9d6c9cc68268..f427a7bb367c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1135,7 +1135,7 @@  int fuse_dev_init(void);
 void fuse_dev_cleanup(void);
 
 int fuse_ctl_init(void);
-void __exit fuse_ctl_cleanup(void);
+void fuse_ctl_cleanup(void);
 
 /**
  * Simple request sending that does request allocation and freeing
@@ -1503,6 +1503,43 @@  int fuse_lseek_backing(struct bpf_fuse_args *fa, loff_t *out, struct file *file,
 int fuse_lseek_finalize(struct bpf_fuse_args *fa, loff_t *out, struct file *file,
 			loff_t offset, int whence);
 
+struct fuse_read_iter_out {
+	uint64_t ret;
+};
+struct fuse_file_read_iter_io {
+	struct fuse_read_in fri;
+	struct fuse_read_iter_out frio;
+};
+
+int fuse_file_read_iter_initialize_in(struct bpf_fuse_args *fa, struct fuse_file_read_iter_io *fri,
+				      struct kiocb *iocb, struct iov_iter *to);
+int fuse_file_read_iter_initialize_out(struct bpf_fuse_args *fa, struct fuse_file_read_iter_io *fri,
+				       struct kiocb *iocb, struct iov_iter *to);
+int fuse_file_read_iter_backing(struct bpf_fuse_args *fa, ssize_t *out,
+				struct kiocb *iocb, struct iov_iter *to);
+int fuse_file_read_iter_finalize(struct bpf_fuse_args *fa, ssize_t *out,
+				 struct kiocb *iocb, struct iov_iter *to);
+
+struct fuse_write_iter_out {
+	uint64_t ret;
+};
+struct fuse_file_write_iter_io {
+	struct fuse_write_in fwi;
+	struct fuse_write_out fwo;
+	struct fuse_write_iter_out fwio;
+};
+
+int fuse_file_write_iter_initialize_in(struct bpf_fuse_args *fa,
+				       struct fuse_file_write_iter_io *fwio,
+				       struct kiocb *iocb, struct iov_iter *from);
+int fuse_file_write_iter_initialize_out(struct bpf_fuse_args *fa,
+					struct fuse_file_write_iter_io *fwio,
+					struct kiocb *iocb, struct iov_iter *from);
+int fuse_file_write_iter_backing(struct bpf_fuse_args *fa, ssize_t *out,
+				 struct kiocb *iocb, struct iov_iter *from);
+int fuse_file_write_iter_finalize(struct bpf_fuse_args *fa, ssize_t *out,
+				  struct kiocb *iocb, struct iov_iter *from);
+
 ssize_t fuse_backing_mmap(struct file *file, struct vm_area_struct *vma);
 
 int fuse_file_fallocate_initialize_in(struct bpf_fuse_args *fa,
@@ -1570,6 +1607,9 @@  static inline u64 attr_timeout(struct fuse_attr_out *o)
 }
 
 #ifdef CONFIG_FUSE_BPF
+int __init fuse_bpf_init(void);
+void __exit fuse_bpf_cleanup(void);
+
 /*
  * expression statement to wrap the backing filter logic
  * struct inode *inode: inode with bpf and backing inode
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 290eae750282..c96cfcbfd96a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -2117,11 +2117,21 @@  static int __init fuse_init(void)
 	if (res)
 		goto err_sysfs_cleanup;
 
+#ifdef CONFIG_FUSE_BPF
+	res = fuse_bpf_init();
+	if (res)
+		goto err_ctl_cleanup;
+#endif
+
 	sanitize_global_limit(&max_user_bgreq);
 	sanitize_global_limit(&max_user_congthresh);
 
 	return 0;
 
+#ifdef CONFIG_FUSE_BPF
+ err_ctl_cleanup:
+	fuse_ctl_cleanup();
+#endif
  err_sysfs_cleanup:
 	fuse_sysfs_cleanup();
  err_dev_cleanup:
@@ -2139,6 +2149,9 @@  static void __exit fuse_exit(void)
 	fuse_ctl_cleanup();
 	fuse_sysfs_cleanup();
 	fuse_fs_cleanup();
+#ifdef CONFIG_FUSE_BPF
+	fuse_bpf_cleanup();
+#endif
 	fuse_dev_cleanup();
 }